Index: head/contrib/compiler-rt
===================================================================
--- head/contrib/compiler-rt	(revision 329982)
+++ head/contrib/compiler-rt	(revision 329983)

Property changes on: head/contrib/compiler-rt
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /vendor/compiler-rt/dist-release_60:r329406-329940
Index: head/contrib/libc++
===================================================================
--- head/contrib/libc++	(revision 329982)
+++ head/contrib/libc++	(revision 329983)

Property changes on: head/contrib/libc++
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /vendor/libc++/dist-release_60:r329406-329940
Index: head/contrib/llvm/include/llvm/Bitcode/LLVMBitCodes.h
===================================================================
--- head/contrib/llvm/include/llvm/Bitcode/LLVMBitCodes.h	(revision 329982)
+++ head/contrib/llvm/include/llvm/Bitcode/LLVMBitCodes.h	(revision 329983)
@@ -1,585 +1,599 @@
 //===- LLVMBitCodes.h - Enum values for the LLVM bitcode format -*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // This header defines Bitcode enum values for LLVM IR bitcode files.
 //
 // The enum values defined in this file should be considered permanent.  If
 // new features are added, they should have values added at the end of the
 // respective lists.
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_BITCODE_LLVMBITCODES_H
 #define LLVM_BITCODE_LLVMBITCODES_H
 
 #include "llvm/Bitcode/BitCodes.h"
 
 namespace llvm {
 namespace bitc {
 // The only top-level block types are MODULE, IDENTIFICATION, STRTAB and SYMTAB.
 enum BlockIDs {
   // Blocks
   MODULE_BLOCK_ID = FIRST_APPLICATION_BLOCKID,
 
   // Module sub-block id's.
   PARAMATTR_BLOCK_ID,
   PARAMATTR_GROUP_BLOCK_ID,
 
   CONSTANTS_BLOCK_ID,
   FUNCTION_BLOCK_ID,
 
   // Block intended to contains information on the bitcode versioning.
   // Can be used to provide better error messages when we fail to parse a
   // bitcode file.
   IDENTIFICATION_BLOCK_ID,
 
   VALUE_SYMTAB_BLOCK_ID,
   METADATA_BLOCK_ID,
   METADATA_ATTACHMENT_ID,
 
   TYPE_BLOCK_ID_NEW,
 
   USELIST_BLOCK_ID,
 
   MODULE_STRTAB_BLOCK_ID,
   GLOBALVAL_SUMMARY_BLOCK_ID,
 
   OPERAND_BUNDLE_TAGS_BLOCK_ID,
 
   METADATA_KIND_BLOCK_ID,
 
   STRTAB_BLOCK_ID,
 
   FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID,
 
   SYMTAB_BLOCK_ID,
 
   SYNC_SCOPE_NAMES_BLOCK_ID,
 };
 
 /// Identification block contains a string that describes the producer details,
 /// and an epoch that defines the auto-upgrade capability.
 enum IdentificationCodes {
   IDENTIFICATION_CODE_STRING = 1, // IDENTIFICATION:      [strchr x N]
   IDENTIFICATION_CODE_EPOCH = 2,  // EPOCH:               [epoch#]
 };
 
 /// The epoch that defines the auto-upgrade compatibility for the bitcode.
 ///
 /// LLVM guarantees in a major release that a minor release can read bitcode
 /// generated by previous minor releases. We translate this by making the reader
 /// accepting only bitcode with the same epoch, except for the X.0 release which
 /// also accepts N-1.
 enum { BITCODE_CURRENT_EPOCH = 0 };
 
 /// MODULE blocks have a number of optional fields and subblocks.
 enum ModuleCodes {
   MODULE_CODE_VERSION = 1,     // VERSION:     [version#]
   MODULE_CODE_TRIPLE = 2,      // TRIPLE:      [strchr x N]
   MODULE_CODE_DATALAYOUT = 3,  // DATALAYOUT:  [strchr x N]
   MODULE_CODE_ASM = 4,         // ASM:         [strchr x N]
   MODULE_CODE_SECTIONNAME = 5, // SECTIONNAME: [strchr x N]
 
   // FIXME: Remove DEPLIB in 4.0.
   MODULE_CODE_DEPLIB = 6, // DEPLIB:      [strchr x N]
 
   // GLOBALVAR: [pointer type, isconst, initid,
   //             linkage, alignment, section, visibility, threadlocal]
   MODULE_CODE_GLOBALVAR = 7,
 
   // FUNCTION:  [type, callingconv, isproto, linkage, paramattrs, alignment,
   //             section, visibility, gc, unnamed_addr]
   MODULE_CODE_FUNCTION = 8,
 
   // ALIAS: [alias type, aliasee val#, linkage, visibility]
   MODULE_CODE_ALIAS_OLD = 9,
 
   MODULE_CODE_GCNAME = 11, // GCNAME: [strchr x N]
   MODULE_CODE_COMDAT = 12, // COMDAT: [selection_kind, name]
 
   MODULE_CODE_VSTOFFSET = 13, // VSTOFFSET: [offset]
 
   // ALIAS: [alias value type, addrspace, aliasee val#, linkage, visibility]
   MODULE_CODE_ALIAS = 14,
 
   MODULE_CODE_METADATA_VALUES_UNUSED = 15,
 
   // SOURCE_FILENAME: [namechar x N]
   MODULE_CODE_SOURCE_FILENAME = 16,
 
   // HASH: [5*i32]
   MODULE_CODE_HASH = 17,
 
   // IFUNC: [ifunc value type, addrspace, resolver val#, linkage, visibility]
   MODULE_CODE_IFUNC = 18,
 };
 
 /// PARAMATTR blocks have code for defining a parameter attribute set.
 enum AttributeCodes {
   // FIXME: Remove `PARAMATTR_CODE_ENTRY_OLD' in 4.0
   PARAMATTR_CODE_ENTRY_OLD = 1, // ENTRY: [paramidx0, attr0,
                                 //         paramidx1, attr1...]
   PARAMATTR_CODE_ENTRY = 2,     // ENTRY: [attrgrp0, attrgrp1, ...]
   PARAMATTR_GRP_CODE_ENTRY = 3  // ENTRY: [grpid, idx, attr0, attr1, ...]
 };
 
 /// TYPE blocks have codes for each type primitive they use.
 enum TypeCodes {
   TYPE_CODE_NUMENTRY = 1, // NUMENTRY: [numentries]
 
   // Type Codes
   TYPE_CODE_VOID = 2,    // VOID
   TYPE_CODE_FLOAT = 3,   // FLOAT
   TYPE_CODE_DOUBLE = 4,  // DOUBLE
   TYPE_CODE_LABEL = 5,   // LABEL
   TYPE_CODE_OPAQUE = 6,  // OPAQUE
   TYPE_CODE_INTEGER = 7, // INTEGER: [width]
   TYPE_CODE_POINTER = 8, // POINTER: [pointee type]
 
   TYPE_CODE_FUNCTION_OLD = 9, // FUNCTION: [vararg, attrid, retty,
                               //            paramty x N]
 
   TYPE_CODE_HALF = 10, // HALF
 
   TYPE_CODE_ARRAY = 11,  // ARRAY: [numelts, eltty]
   TYPE_CODE_VECTOR = 12, // VECTOR: [numelts, eltty]
 
   // These are not with the other floating point types because they're
   // a late addition, and putting them in the right place breaks
   // binary compatibility.
   TYPE_CODE_X86_FP80 = 13,  // X86 LONG DOUBLE
   TYPE_CODE_FP128 = 14,     // LONG DOUBLE (112 bit mantissa)
   TYPE_CODE_PPC_FP128 = 15, // PPC LONG DOUBLE (2 doubles)
 
   TYPE_CODE_METADATA = 16, // METADATA
 
   TYPE_CODE_X86_MMX = 17, // X86 MMX
 
   TYPE_CODE_STRUCT_ANON = 18,  // STRUCT_ANON: [ispacked, eltty x N]
   TYPE_CODE_STRUCT_NAME = 19,  // STRUCT_NAME: [strchr x N]
   TYPE_CODE_STRUCT_NAMED = 20, // STRUCT_NAMED: [ispacked, eltty x N]
 
   TYPE_CODE_FUNCTION = 21, // FUNCTION: [vararg, retty, paramty x N]
 
   TYPE_CODE_TOKEN = 22 // TOKEN
 };
 
 enum OperandBundleTagCode {
   OPERAND_BUNDLE_TAG = 1, // TAG: [strchr x N]
 };
 
 enum SyncScopeNameCode {
   SYNC_SCOPE_NAME = 1,
 };
 
 // Value symbol table codes.
 enum ValueSymtabCodes {
   VST_CODE_ENTRY = 1,   // VST_ENTRY: [valueid, namechar x N]
   VST_CODE_BBENTRY = 2, // VST_BBENTRY: [bbid, namechar x N]
   VST_CODE_FNENTRY = 3, // VST_FNENTRY: [valueid, offset, namechar x N]
   // VST_COMBINED_ENTRY: [valueid, refguid]
   VST_CODE_COMBINED_ENTRY = 5
 };
 
 // The module path symbol table only has one code (MST_CODE_ENTRY).
 enum ModulePathSymtabCodes {
   MST_CODE_ENTRY = 1, // MST_ENTRY: [modid, namechar x N]
   MST_CODE_HASH = 2,  // MST_HASH:  [5*i32]
 };
 
 // The summary section uses different codes in the per-module
 // and combined index cases.
 enum GlobalValueSummarySymtabCodes {
   // PERMODULE: [valueid, flags, instcount, numrefs, numrefs x valueid,
   //             n x (valueid)]
   FS_PERMODULE = 1,
   // PERMODULE_PROFILE: [valueid, flags, instcount, numrefs,
   //                     numrefs x valueid,
   //                     n x (valueid, hotness)]
   FS_PERMODULE_PROFILE = 2,
   // PERMODULE_GLOBALVAR_INIT_REFS: [valueid, flags, n x valueid]
   FS_PERMODULE_GLOBALVAR_INIT_REFS = 3,
   // COMBINED: [valueid, modid, flags, instcount, numrefs, numrefs x valueid,
   //            n x (valueid)]
   FS_COMBINED = 4,
   // COMBINED_PROFILE: [valueid, modid, flags, instcount, numrefs,
   //                    numrefs x valueid,
   //                    n x (valueid, hotness)]
   FS_COMBINED_PROFILE = 5,
   // COMBINED_GLOBALVAR_INIT_REFS: [valueid, modid, flags, n x valueid]
   FS_COMBINED_GLOBALVAR_INIT_REFS = 6,
   // ALIAS: [valueid, flags, valueid]
   FS_ALIAS = 7,
   // COMBINED_ALIAS: [valueid, modid, flags, valueid]
   FS_COMBINED_ALIAS = 8,
   // COMBINED_ORIGINAL_NAME: [original_name_hash]
   FS_COMBINED_ORIGINAL_NAME = 9,
   // VERSION of the summary, bumped when adding flags for instance.
   FS_VERSION = 10,
   // The list of llvm.type.test type identifiers used by the following function
   // that are used other than by an llvm.assume.
   // [n x typeid]
   FS_TYPE_TESTS = 11,
   // The list of virtual calls made by this function using
   // llvm.assume(llvm.type.test) intrinsics that do not have all constant
   // integer arguments.
   // [n x (typeid, offset)]
   FS_TYPE_TEST_ASSUME_VCALLS = 12,
   // The list of virtual calls made by this function using
   // llvm.type.checked.load intrinsics that do not have all constant integer
   // arguments.
   // [n x (typeid, offset)]
   FS_TYPE_CHECKED_LOAD_VCALLS = 13,
   // Identifies a virtual call made by this function using an
   // llvm.assume(llvm.type.test) intrinsic with all constant integer arguments.
   // [typeid, offset, n x arg]
   FS_TYPE_TEST_ASSUME_CONST_VCALL = 14,
   // Identifies a virtual call made by this function using an
   // llvm.type.checked.load intrinsic with all constant integer arguments.
   // [typeid, offset, n x arg]
   FS_TYPE_CHECKED_LOAD_CONST_VCALL = 15,
   // Assigns a GUID to a value ID. This normally appears only in combined
   // summaries, but it can also appear in per-module summaries for PGO data.
   // [valueid, guid]
   FS_VALUE_GUID = 16,
   // The list of local functions with CFI jump tables. Function names are
   // strings in strtab.
   // [n * name]
   FS_CFI_FUNCTION_DEFS = 17,
   // The list of external functions with CFI jump tables. Function names are
   // strings in strtab.
   // [n * name]
   FS_CFI_FUNCTION_DECLS = 18,
 };
 
 enum MetadataCodes {
   METADATA_STRING_OLD = 1,     // MDSTRING:      [values]
   METADATA_VALUE = 2,          // VALUE:         [type num, value num]
   METADATA_NODE = 3,           // NODE:          [n x md num]
   METADATA_NAME = 4,           // STRING:        [values]
   METADATA_DISTINCT_NODE = 5,  // DISTINCT_NODE: [n x md num]
   METADATA_KIND = 6,           // [n x [id, name]]
   METADATA_LOCATION = 7,       // [distinct, line, col, scope, inlined-at?]
   METADATA_OLD_NODE = 8,       // OLD_NODE:      [n x (type num, value num)]
   METADATA_OLD_FN_NODE = 9,    // OLD_FN_NODE:   [n x (type num, value num)]
   METADATA_NAMED_NODE = 10,    // NAMED_NODE:    [n x mdnodes]
   METADATA_ATTACHMENT = 11,    // [m x [value, [n x [id, mdnode]]]
   METADATA_GENERIC_DEBUG = 12, // [distinct, tag, vers, header, n x md num]
   METADATA_SUBRANGE = 13,      // [distinct, count, lo]
   METADATA_ENUMERATOR = 14,    // [distinct, value, name]
   METADATA_BASIC_TYPE = 15,    // [distinct, tag, name, size, align, enc]
   METADATA_FILE = 16, // [distinct, filename, directory, checksumkind, checksum]
   METADATA_DERIVED_TYPE = 17,       // [distinct, ...]
   METADATA_COMPOSITE_TYPE = 18,     // [distinct, ...]
   METADATA_SUBROUTINE_TYPE = 19,    // [distinct, flags, types, cc]
   METADATA_COMPILE_UNIT = 20,       // [distinct, ...]
   METADATA_SUBPROGRAM = 21,         // [distinct, ...]
   METADATA_LEXICAL_BLOCK = 22,      // [distinct, scope, file, line, column]
   METADATA_LEXICAL_BLOCK_FILE = 23, //[distinct, scope, file, discriminator]
   METADATA_NAMESPACE = 24, // [distinct, scope, file, name, line, exportSymbols]
   METADATA_TEMPLATE_TYPE = 25,   // [distinct, scope, name, type, ...]
   METADATA_TEMPLATE_VALUE = 26,  // [distinct, scope, name, type, value, ...]
   METADATA_GLOBAL_VAR = 27,      // [distinct, ...]
   METADATA_LOCAL_VAR = 28,       // [distinct, ...]
   METADATA_EXPRESSION = 29,      // [distinct, n x element]
   METADATA_OBJC_PROPERTY = 30,   // [distinct, name, file, line, ...]
   METADATA_IMPORTED_ENTITY = 31, // [distinct, tag, scope, entity, line, name]
   METADATA_MODULE = 32,          // [distinct, scope, name, ...]
   METADATA_MACRO = 33,           // [distinct, macinfo, line, name, value]
   METADATA_MACRO_FILE = 34,      // [distinct, macinfo, line, file, ...]
   METADATA_STRINGS = 35,         // [count, offset] blob([lengths][chars])
   METADATA_GLOBAL_DECL_ATTACHMENT = 36, // [valueid, n x [id, mdnode]]
   METADATA_GLOBAL_VAR_EXPR = 37,        // [distinct, var, expr]
   METADATA_INDEX_OFFSET = 38,           // [offset]
   METADATA_INDEX = 39,                  // [bitpos]
 };
 
 // The constants block (CONSTANTS_BLOCK_ID) describes emission for each
 // constant and maintains an implicit current type value.
 enum ConstantsCodes {
   CST_CODE_SETTYPE = 1,          // SETTYPE:       [typeid]
   CST_CODE_NULL = 2,             // NULL
   CST_CODE_UNDEF = 3,            // UNDEF
   CST_CODE_INTEGER = 4,          // INTEGER:       [intval]
   CST_CODE_WIDE_INTEGER = 5,     // WIDE_INTEGER:  [n x intval]
   CST_CODE_FLOAT = 6,            // FLOAT:         [fpval]
   CST_CODE_AGGREGATE = 7,        // AGGREGATE:     [n x value number]
   CST_CODE_STRING = 8,           // STRING:        [values]
   CST_CODE_CSTRING = 9,          // CSTRING:       [values]
   CST_CODE_CE_BINOP = 10,        // CE_BINOP:      [opcode, opval, opval]
   CST_CODE_CE_CAST = 11,         // CE_CAST:       [opcode, opty, opval]
   CST_CODE_CE_GEP = 12,          // CE_GEP:        [n x operands]
   CST_CODE_CE_SELECT = 13,       // CE_SELECT:     [opval, opval, opval]
   CST_CODE_CE_EXTRACTELT = 14,   // CE_EXTRACTELT: [opty, opval, opval]
   CST_CODE_CE_INSERTELT = 15,    // CE_INSERTELT:  [opval, opval, opval]
   CST_CODE_CE_SHUFFLEVEC = 16,   // CE_SHUFFLEVEC: [opval, opval, opval]
   CST_CODE_CE_CMP = 17,          // CE_CMP:        [opty, opval, opval, pred]
   CST_CODE_INLINEASM_OLD = 18,   // INLINEASM:     [sideeffect|alignstack,
                                  //                 asmstr,conststr]
   CST_CODE_CE_SHUFVEC_EX = 19,   // SHUFVEC_EX:    [opty, opval, opval, opval]
   CST_CODE_CE_INBOUNDS_GEP = 20, // INBOUNDS_GEP:  [n x operands]
   CST_CODE_BLOCKADDRESS = 21,    // CST_CODE_BLOCKADDRESS [fnty, fnval, bb#]
   CST_CODE_DATA = 22,            // DATA:          [n x elements]
   CST_CODE_INLINEASM = 23,       // INLINEASM:     [sideeffect|alignstack|
                                  //                 asmdialect,asmstr,conststr]
   CST_CODE_CE_GEP_WITH_INRANGE_INDEX = 24, //      [opty, flags, n x operands]
 };
 
 /// CastOpcodes - These are values used in the bitcode files to encode which
 /// cast a CST_CODE_CE_CAST or a XXX refers to.  The values of these enums
 /// have no fixed relation to the LLVM IR enum values.  Changing these will
 /// break compatibility with old files.
 enum CastOpcodes {
   CAST_TRUNC = 0,
   CAST_ZEXT = 1,
   CAST_SEXT = 2,
   CAST_FPTOUI = 3,
   CAST_FPTOSI = 4,
   CAST_UITOFP = 5,
   CAST_SITOFP = 6,
   CAST_FPTRUNC = 7,
   CAST_FPEXT = 8,
   CAST_PTRTOINT = 9,
   CAST_INTTOPTR = 10,
   CAST_BITCAST = 11,
   CAST_ADDRSPACECAST = 12
 };
 
 /// BinaryOpcodes - These are values used in the bitcode files to encode which
 /// binop a CST_CODE_CE_BINOP or a XXX refers to.  The values of these enums
 /// have no fixed relation to the LLVM IR enum values.  Changing these will
 /// break compatibility with old files.
 enum BinaryOpcodes {
   BINOP_ADD = 0,
   BINOP_SUB = 1,
   BINOP_MUL = 2,
   BINOP_UDIV = 3,
   BINOP_SDIV = 4, // overloaded for FP
   BINOP_UREM = 5,
   BINOP_SREM = 6, // overloaded for FP
   BINOP_SHL = 7,
   BINOP_LSHR = 8,
   BINOP_ASHR = 9,
   BINOP_AND = 10,
   BINOP_OR = 11,
   BINOP_XOR = 12
 };
 
 /// These are values used in the bitcode files to encode AtomicRMW operations.
 /// The values of these enums have no fixed relation to the LLVM IR enum
 /// values.  Changing these will break compatibility with old files.
 enum RMWOperations {
   RMW_XCHG = 0,
   RMW_ADD = 1,
   RMW_SUB = 2,
   RMW_AND = 3,
   RMW_NAND = 4,
   RMW_OR = 5,
   RMW_XOR = 6,
   RMW_MAX = 7,
   RMW_MIN = 8,
   RMW_UMAX = 9,
   RMW_UMIN = 10
 };
 
 /// OverflowingBinaryOperatorOptionalFlags - Flags for serializing
 /// OverflowingBinaryOperator's SubclassOptionalData contents.
 enum OverflowingBinaryOperatorOptionalFlags {
   OBO_NO_UNSIGNED_WRAP = 0,
   OBO_NO_SIGNED_WRAP = 1
 };
 
+/// FastMath Flags
+/// This is a fixed layout derived from the bitcode emitted by LLVM 5.0
+/// intended to decouple the in-memory representation from the serialization.
+enum FastMathMap {
+  UnsafeAlgebra   = (1 << 0), // Legacy
+  NoNaNs          = (1 << 1),
+  NoInfs          = (1 << 2),
+  NoSignedZeros   = (1 << 3),
+  AllowReciprocal = (1 << 4),
+  AllowContract   = (1 << 5),
+  ApproxFunc      = (1 << 6),
+  AllowReassoc    = (1 << 7)
+};
+
 /// PossiblyExactOperatorOptionalFlags - Flags for serializing
 /// PossiblyExactOperator's SubclassOptionalData contents.
 enum PossiblyExactOperatorOptionalFlags { PEO_EXACT = 0 };
 
 /// Encoded AtomicOrdering values.
 enum AtomicOrderingCodes {
   ORDERING_NOTATOMIC = 0,
   ORDERING_UNORDERED = 1,
   ORDERING_MONOTONIC = 2,
   ORDERING_ACQUIRE = 3,
   ORDERING_RELEASE = 4,
   ORDERING_ACQREL = 5,
   ORDERING_SEQCST = 6
 };
 
 /// Markers and flags for call instruction.
 enum CallMarkersFlags {
   CALL_TAIL = 0,
   CALL_CCONV = 1,
   CALL_MUSTTAIL = 14,
   CALL_EXPLICIT_TYPE = 15,
   CALL_NOTAIL = 16,
   CALL_FMF = 17 // Call has optional fast-math-flags.
 };
 
 // The function body block (FUNCTION_BLOCK_ID) describes function bodies.  It
 // can contain a constant block (CONSTANTS_BLOCK_ID).
 enum FunctionCodes {
   FUNC_CODE_DECLAREBLOCKS = 1, // DECLAREBLOCKS: [n]
 
   FUNC_CODE_INST_BINOP = 2,      // BINOP:      [opcode, ty, opval, opval]
   FUNC_CODE_INST_CAST = 3,       // CAST:       [opcode, ty, opty, opval]
   FUNC_CODE_INST_GEP_OLD = 4,    // GEP:        [n x operands]
   FUNC_CODE_INST_SELECT = 5,     // SELECT:     [ty, opval, opval, opval]
   FUNC_CODE_INST_EXTRACTELT = 6, // EXTRACTELT: [opty, opval, opval]
   FUNC_CODE_INST_INSERTELT = 7,  // INSERTELT:  [ty, opval, opval, opval]
   FUNC_CODE_INST_SHUFFLEVEC = 8, // SHUFFLEVEC: [ty, opval, opval, opval]
   FUNC_CODE_INST_CMP = 9,        // CMP:        [opty, opval, opval, pred]
 
   FUNC_CODE_INST_RET = 10,    // RET:        [opty,opval<both optional>]
   FUNC_CODE_INST_BR = 11,     // BR:         [bb#, bb#, cond] or [bb#]
   FUNC_CODE_INST_SWITCH = 12, // SWITCH:     [opty, op0, op1, ...]
   FUNC_CODE_INST_INVOKE = 13, // INVOKE:     [attr, fnty, op0,op1, ...]
   // 14 is unused.
   FUNC_CODE_INST_UNREACHABLE = 15, // UNREACHABLE
 
   FUNC_CODE_INST_PHI = 16, // PHI:        [ty, val0,bb0, ...]
   // 17 is unused.
   // 18 is unused.
   FUNC_CODE_INST_ALLOCA = 19, // ALLOCA:     [instty, opty, op, align]
   FUNC_CODE_INST_LOAD = 20,   // LOAD:       [opty, op, align, vol]
   // 21 is unused.
   // 22 is unused.
   FUNC_CODE_INST_VAARG = 23, // VAARG:      [valistty, valist, instty]
   // This store code encodes the pointer type, rather than the value type
   // this is so information only available in the pointer type (e.g. address
   // spaces) is retained.
   FUNC_CODE_INST_STORE_OLD = 24, // STORE:      [ptrty,ptr,val, align, vol]
   // 25 is unused.
   FUNC_CODE_INST_EXTRACTVAL = 26, // EXTRACTVAL: [n x operands]
   FUNC_CODE_INST_INSERTVAL = 27,  // INSERTVAL:  [n x operands]
   // fcmp/icmp returning Int1TY or vector of Int1Ty. Same as CMP, exists to
   // support legacy vicmp/vfcmp instructions.
   FUNC_CODE_INST_CMP2 = 28, // CMP2:       [opty, opval, opval, pred]
   // new select on i1 or [N x i1]
   FUNC_CODE_INST_VSELECT = 29, // VSELECT:    [ty,opval,opval,predty,pred]
   FUNC_CODE_INST_INBOUNDS_GEP_OLD = 30, // INBOUNDS_GEP: [n x operands]
   FUNC_CODE_INST_INDIRECTBR = 31,       // INDIRECTBR: [opty, op0, op1, ...]
   // 32 is unused.
   FUNC_CODE_DEBUG_LOC_AGAIN = 33, // DEBUG_LOC_AGAIN
 
   FUNC_CODE_INST_CALL = 34, // CALL:    [attr, cc, fnty, fnid, args...]
 
   FUNC_CODE_DEBUG_LOC = 35,        // DEBUG_LOC:  [Line,Col,ScopeVal, IAVal]
   FUNC_CODE_INST_FENCE = 36,       // FENCE: [ordering, synchscope]
   FUNC_CODE_INST_CMPXCHG_OLD = 37, // CMPXCHG: [ptrty,ptr,cmp,new, align, vol,
                                    //           ordering, synchscope]
   FUNC_CODE_INST_ATOMICRMW = 38,   // ATOMICRMW: [ptrty,ptr,val, operation,
                                    //             align, vol,
                                    //             ordering, synchscope]
   FUNC_CODE_INST_RESUME = 39,      // RESUME:     [opval]
   FUNC_CODE_INST_LANDINGPAD_OLD =
       40,                         // LANDINGPAD: [ty,val,val,num,id0,val0...]
   FUNC_CODE_INST_LOADATOMIC = 41, // LOAD: [opty, op, align, vol,
                                   //        ordering, synchscope]
   FUNC_CODE_INST_STOREATOMIC_OLD = 42, // STORE: [ptrty,ptr,val, align, vol
                                        //         ordering, synchscope]
   FUNC_CODE_INST_GEP = 43,             // GEP:  [inbounds, n x operands]
   FUNC_CODE_INST_STORE = 44,       // STORE: [ptrty,ptr,valty,val, align, vol]
   FUNC_CODE_INST_STOREATOMIC = 45, // STORE: [ptrty,ptr,val, align, vol
   FUNC_CODE_INST_CMPXCHG = 46,     // CMPXCHG: [ptrty,ptr,valty,cmp,new, align,
                                    //           vol,ordering,synchscope]
   FUNC_CODE_INST_LANDINGPAD = 47,  // LANDINGPAD: [ty,val,num,id0,val0...]
   FUNC_CODE_INST_CLEANUPRET = 48,  // CLEANUPRET: [val] or [val,bb#]
   FUNC_CODE_INST_CATCHRET = 49,    // CATCHRET: [val,bb#]
   FUNC_CODE_INST_CATCHPAD = 50,    // CATCHPAD: [bb#,bb#,num,args...]
   FUNC_CODE_INST_CLEANUPPAD = 51,  // CLEANUPPAD: [num,args...]
   FUNC_CODE_INST_CATCHSWITCH =
       52, // CATCHSWITCH: [num,args...] or [num,args...,bb]
   // 53 is unused.
   // 54 is unused.
   FUNC_CODE_OPERAND_BUNDLE = 55, // OPERAND_BUNDLE: [tag#, value...]
 };
 
 enum UseListCodes {
   USELIST_CODE_DEFAULT = 1, // DEFAULT: [index..., value-id]
   USELIST_CODE_BB = 2       // BB: [index..., bb-id]
 };
 
 enum AttributeKindCodes {
   // = 0 is unused
   ATTR_KIND_ALIGNMENT = 1,
   ATTR_KIND_ALWAYS_INLINE = 2,
   ATTR_KIND_BY_VAL = 3,
   ATTR_KIND_INLINE_HINT = 4,
   ATTR_KIND_IN_REG = 5,
   ATTR_KIND_MIN_SIZE = 6,
   ATTR_KIND_NAKED = 7,
   ATTR_KIND_NEST = 8,
   ATTR_KIND_NO_ALIAS = 9,
   ATTR_KIND_NO_BUILTIN = 10,
   ATTR_KIND_NO_CAPTURE = 11,
   ATTR_KIND_NO_DUPLICATE = 12,
   ATTR_KIND_NO_IMPLICIT_FLOAT = 13,
   ATTR_KIND_NO_INLINE = 14,
   ATTR_KIND_NON_LAZY_BIND = 15,
   ATTR_KIND_NO_RED_ZONE = 16,
   ATTR_KIND_NO_RETURN = 17,
   ATTR_KIND_NO_UNWIND = 18,
   ATTR_KIND_OPTIMIZE_FOR_SIZE = 19,
   ATTR_KIND_READ_NONE = 20,
   ATTR_KIND_READ_ONLY = 21,
   ATTR_KIND_RETURNED = 22,
   ATTR_KIND_RETURNS_TWICE = 23,
   ATTR_KIND_S_EXT = 24,
   ATTR_KIND_STACK_ALIGNMENT = 25,
   ATTR_KIND_STACK_PROTECT = 26,
   ATTR_KIND_STACK_PROTECT_REQ = 27,
   ATTR_KIND_STACK_PROTECT_STRONG = 28,
   ATTR_KIND_STRUCT_RET = 29,
   ATTR_KIND_SANITIZE_ADDRESS = 30,
   ATTR_KIND_SANITIZE_THREAD = 31,
   ATTR_KIND_SANITIZE_MEMORY = 32,
   ATTR_KIND_UW_TABLE = 33,
   ATTR_KIND_Z_EXT = 34,
   ATTR_KIND_BUILTIN = 35,
   ATTR_KIND_COLD = 36,
   ATTR_KIND_OPTIMIZE_NONE = 37,
   ATTR_KIND_IN_ALLOCA = 38,
   ATTR_KIND_NON_NULL = 39,
   ATTR_KIND_JUMP_TABLE = 40,
   ATTR_KIND_DEREFERENCEABLE = 41,
   ATTR_KIND_DEREFERENCEABLE_OR_NULL = 42,
   ATTR_KIND_CONVERGENT = 43,
   ATTR_KIND_SAFESTACK = 44,
   ATTR_KIND_ARGMEMONLY = 45,
   ATTR_KIND_SWIFT_SELF = 46,
   ATTR_KIND_SWIFT_ERROR = 47,
   ATTR_KIND_NO_RECURSE = 48,
   ATTR_KIND_INACCESSIBLEMEM_ONLY = 49,
   ATTR_KIND_INACCESSIBLEMEM_OR_ARGMEMONLY = 50,
   ATTR_KIND_ALLOC_SIZE = 51,
   ATTR_KIND_WRITEONLY = 52,
   ATTR_KIND_SPECULATABLE = 53,
   ATTR_KIND_STRICT_FP = 54,
   ATTR_KIND_SANITIZE_HWADDRESS = 55,
 };
 
 enum ComdatSelectionKindCodes {
   COMDAT_SELECTION_KIND_ANY = 1,
   COMDAT_SELECTION_KIND_EXACT_MATCH = 2,
   COMDAT_SELECTION_KIND_LARGEST = 3,
   COMDAT_SELECTION_KIND_NO_DUPLICATES = 4,
   COMDAT_SELECTION_KIND_SAME_SIZE = 5,
 };
 
 enum StrtabCodes {
   STRTAB_BLOB = 1,
 };
 
 enum SymtabCodes {
   SYMTAB_BLOB = 1,
 };
 
 } // End bitc namespace
 } // End llvm namespace
 
 #endif
Index: head/contrib/llvm/include/llvm/MC/MCAsmMacro.h
===================================================================
--- head/contrib/llvm/include/llvm/MC/MCAsmMacro.h	(revision 329982)
+++ head/contrib/llvm/include/llvm/MC/MCAsmMacro.h	(revision 329983)
@@ -1,38 +1,38 @@
 //===- MCAsmMacro.h - Assembly Macros ---------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_MC_MCASMMACRO_H
 #define LLVM_MC_MCASMMACRO_H
 
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 
 namespace llvm {
 
 struct MCAsmMacroParameter {
   StringRef Name;
   std::vector<AsmToken> Value;
   bool Required = false;
   bool Vararg = false;
 
   MCAsmMacroParameter() = default;
 };
 
 typedef std::vector<MCAsmMacroParameter> MCAsmMacroParameters;
 struct MCAsmMacro {
   StringRef Name;
   StringRef Body;
   MCAsmMacroParameters Parameters;
 
 public:
   MCAsmMacro(StringRef N, StringRef B, MCAsmMacroParameters P)
       : Name(N), Body(B), Parameters(std::move(P)) {}
 };
-}; // namespace llvm
+} // namespace llvm
 
 #endif
Index: head/contrib/llvm/include/llvm/Transforms/Utils/LoopUtils.h
===================================================================
--- head/contrib/llvm/include/llvm/Transforms/Utils/LoopUtils.h	(revision 329982)
+++ head/contrib/llvm/include/llvm/Transforms/Utils/LoopUtils.h	(revision 329983)
@@ -1,575 +1,568 @@
 //===- llvm/Transforms/Utils/LoopUtils.h - Loop utilities -------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // This file defines some loop transformation utilities.
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_TRANSFORMS_UTILS_LOOPUTILS_H
 #define LLVM_TRANSFORMS_UTILS_LOOPUTILS_H
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/DemandedBits.h"
 #include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/Support/Casting.h"
 
 namespace llvm {
 
 class AliasSet;
 class AliasSetTracker;
 class BasicBlock;
 class DataLayout;
 class Loop;
 class LoopInfo;
 class OptimizationRemarkEmitter;
 class PredicatedScalarEvolution;
 class PredIteratorCache;
 class ScalarEvolution;
 class SCEV;
 class TargetLibraryInfo;
 class TargetTransformInfo;
 
 /// \brief Captures loop safety information.
 /// It keep information for loop & its header may throw exception.
 struct LoopSafetyInfo {
   bool MayThrow = false;       // The current loop contains an instruction which
                                // may throw.
   bool HeaderMayThrow = false; // Same as previous, but specific to loop header
   // Used to update funclet bundle operands.
   DenseMap<BasicBlock *, ColorVector> BlockColors;
 
   LoopSafetyInfo() = default;
 };
 
 /// The RecurrenceDescriptor is used to identify recurrences variables in a
 /// loop. Reduction is a special case of recurrence that has uses of the
 /// recurrence variable outside the loop. The method isReductionPHI identifies
 /// reductions that are basic recurrences.
 ///
 /// Basic recurrences are defined as the summation, product, OR, AND, XOR, min,
 /// or max of a set of terms. For example: for(i=0; i<n; i++) { total +=
 /// array[i]; } is a summation of array elements. Basic recurrences are a
 /// special case of chains of recurrences (CR). See ScalarEvolution for CR
 /// references.
 
 /// This struct holds information about recurrence variables.
 class RecurrenceDescriptor {
 public:
   /// This enum represents the kinds of recurrences that we support.
   enum RecurrenceKind {
     RK_NoRecurrence,  ///< Not a recurrence.
     RK_IntegerAdd,    ///< Sum of integers.
     RK_IntegerMult,   ///< Product of integers.
     RK_IntegerOr,     ///< Bitwise or logical OR of numbers.
     RK_IntegerAnd,    ///< Bitwise or logical AND of numbers.
     RK_IntegerXor,    ///< Bitwise or logical XOR of numbers.
     RK_IntegerMinMax, ///< Min/max implemented in terms of select(cmp()).
     RK_FloatAdd,      ///< Sum of floats.
     RK_FloatMult,     ///< Product of floats.
     RK_FloatMinMax    ///< Min/max implemented in terms of select(cmp()).
   };
 
   // This enum represents the kind of minmax recurrence.
   enum MinMaxRecurrenceKind {
     MRK_Invalid,
     MRK_UIntMin,
     MRK_UIntMax,
     MRK_SIntMin,
     MRK_SIntMax,
     MRK_FloatMin,
     MRK_FloatMax
   };
 
   RecurrenceDescriptor() = default;
 
   RecurrenceDescriptor(Value *Start, Instruction *Exit, RecurrenceKind K,
                        MinMaxRecurrenceKind MK, Instruction *UAI, Type *RT,
                        bool Signed, SmallPtrSetImpl<Instruction *> &CI)
       : StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK),
         UnsafeAlgebraInst(UAI), RecurrenceType(RT), IsSigned(Signed) {
     CastInsts.insert(CI.begin(), CI.end());
   }
 
   /// This POD struct holds information about a potential recurrence operation.
   class InstDesc {
   public:
     InstDesc(bool IsRecur, Instruction *I, Instruction *UAI = nullptr)
         : IsRecurrence(IsRecur), PatternLastInst(I), MinMaxKind(MRK_Invalid),
           UnsafeAlgebraInst(UAI) {}
 
     InstDesc(Instruction *I, MinMaxRecurrenceKind K, Instruction *UAI = nullptr)
         : IsRecurrence(true), PatternLastInst(I), MinMaxKind(K),
           UnsafeAlgebraInst(UAI) {}
 
     bool isRecurrence() { return IsRecurrence; }
 
     bool hasUnsafeAlgebra() { return UnsafeAlgebraInst != nullptr; }
 
     Instruction *getUnsafeAlgebraInst() { return UnsafeAlgebraInst; }
 
     MinMaxRecurrenceKind getMinMaxKind() { return MinMaxKind; }
 
     Instruction *getPatternInst() { return PatternLastInst; }
 
   private:
     // Is this instruction a recurrence candidate.
     bool IsRecurrence;
     // The last instruction in a min/max pattern (select of the select(icmp())
     // pattern), or the current recurrence instruction otherwise.
     Instruction *PatternLastInst;
     // If this is a min/max pattern the comparison predicate.
     MinMaxRecurrenceKind MinMaxKind;
     // Recurrence has unsafe algebra.
     Instruction *UnsafeAlgebraInst;
   };
 
   /// Returns a struct describing if the instruction 'I' can be a recurrence
   /// variable of type 'Kind'. If the recurrence is a min/max pattern of
   /// select(icmp()) this function advances the instruction pointer 'I' from the
   /// compare instruction to the select instruction and stores this pointer in
   /// 'PatternLastInst' member of the returned struct.
   static InstDesc isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
                                     InstDesc &Prev, bool HasFunNoNaNAttr);
 
   /// Returns true if instruction I has multiple uses in Insts
   static bool hasMultipleUsesOf(Instruction *I,
                                 SmallPtrSetImpl<Instruction *> &Insts);
 
   /// Returns true if all uses of the instruction I is within the Set.
   static bool areAllUsesIn(Instruction *I, SmallPtrSetImpl<Instruction *> &Set);
 
   /// Returns a struct describing if the instruction if the instruction is a
   /// Select(ICmp(X, Y), X, Y) instruction pattern corresponding to a min(X, Y)
   /// or max(X, Y).
   static InstDesc isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev);
 
   /// Returns identity corresponding to the RecurrenceKind.
   static Constant *getRecurrenceIdentity(RecurrenceKind K, Type *Tp);
 
   /// Returns the opcode of binary operation corresponding to the
   /// RecurrenceKind.
   static unsigned getRecurrenceBinOp(RecurrenceKind Kind);
 
   /// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
   static Value *createMinMaxOp(IRBuilder<> &Builder, MinMaxRecurrenceKind RK,
                                Value *Left, Value *Right);
 
   /// Returns true if Phi is a reduction of type Kind and adds it to the
-  /// RecurrenceDescriptor.
+  /// RecurrenceDescriptor. If either \p DB is non-null or \p AC and \p DT are
+  /// non-null, the minimal bit width needed to compute the reduction will be
+  /// computed.
   static bool AddReductionVar(PHINode *Phi, RecurrenceKind Kind, Loop *TheLoop,
                               bool HasFunNoNaNAttr,
-                              RecurrenceDescriptor &RedDes);
+                              RecurrenceDescriptor &RedDes,
+                              DemandedBits *DB = nullptr,
+                              AssumptionCache *AC = nullptr,
+                              DominatorTree *DT = nullptr);
 
-  /// Returns true if Phi is a reduction in TheLoop. The RecurrenceDescriptor is
-  /// returned in RedDes.
+  /// Returns true if Phi is a reduction in TheLoop. The RecurrenceDescriptor
+  /// is returned in RedDes. If either \p DB is non-null or \p AC and \p DT are
+  /// non-null, the minimal bit width needed to compute the reduction will be
+  /// computed.
   static bool isReductionPHI(PHINode *Phi, Loop *TheLoop,
-                             RecurrenceDescriptor &RedDes);
+                             RecurrenceDescriptor &RedDes,
+                             DemandedBits *DB = nullptr,
+                             AssumptionCache *AC = nullptr,
+                             DominatorTree *DT = nullptr);
 
   /// Returns true if Phi is a first-order recurrence. A first-order recurrence
   /// is a non-reduction recurrence relation in which the value of the
   /// recurrence in the current loop iteration equals a value defined in the
   /// previous iteration. \p SinkAfter includes pairs of instructions where the
   /// first will be rescheduled to appear after the second if/when the loop is
   /// vectorized. It may be augmented with additional pairs if needed in order
   /// to handle Phi as a first-order recurrence.
   static bool
   isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop,
                          DenseMap<Instruction *, Instruction *> &SinkAfter,
                          DominatorTree *DT);
 
   RecurrenceKind getRecurrenceKind() { return Kind; }
 
   MinMaxRecurrenceKind getMinMaxRecurrenceKind() { return MinMaxKind; }
 
   TrackingVH<Value> getRecurrenceStartValue() { return StartValue; }
 
   Instruction *getLoopExitInstr() { return LoopExitInstr; }
 
   /// Returns true if the recurrence has unsafe algebra which requires a relaxed
   /// floating-point model.
   bool hasUnsafeAlgebra() { return UnsafeAlgebraInst != nullptr; }
 
   /// Returns first unsafe algebra instruction in the PHI node's use-chain.
   Instruction *getUnsafeAlgebraInst() { return UnsafeAlgebraInst; }
 
   /// Returns true if the recurrence kind is an integer kind.
   static bool isIntegerRecurrenceKind(RecurrenceKind Kind);
 
   /// Returns true if the recurrence kind is a floating point kind.
   static bool isFloatingPointRecurrenceKind(RecurrenceKind Kind);
 
   /// Returns true if the recurrence kind is an arithmetic kind.
   static bool isArithmeticRecurrenceKind(RecurrenceKind Kind);
-
-  /// Determines if Phi may have been type-promoted. If Phi has a single user
-  /// that ANDs the Phi with a type mask, return the user. RT is updated to
-  /// account for the narrower bit width represented by the mask, and the AND
-  /// instruction is added to CI.
-  static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT,
-                                     SmallPtrSetImpl<Instruction *> &Visited,
-                                     SmallPtrSetImpl<Instruction *> &CI);
-
-  /// Returns true if all the source operands of a recurrence are either
-  /// SExtInsts or ZExtInsts. This function is intended to be used with
-  /// lookThroughAnd to determine if the recurrence has been type-promoted. The
-  /// source operands are added to CI, and IsSigned is updated to indicate if
-  /// all source operands are SExtInsts.
-  static bool getSourceExtensionKind(Instruction *Start, Instruction *Exit,
-                                     Type *RT, bool &IsSigned,
-                                     SmallPtrSetImpl<Instruction *> &Visited,
-                                     SmallPtrSetImpl<Instruction *> &CI);
 
   /// Returns the type of the recurrence. This type can be narrower than the
   /// actual type of the Phi if the recurrence has been type-promoted.
   Type *getRecurrenceType() { return RecurrenceType; }
 
   /// Returns a reference to the instructions used for type-promoting the
   /// recurrence.
   SmallPtrSet<Instruction *, 8> &getCastInsts() { return CastInsts; }
 
   /// Returns true if all source operands of the recurrence are SExtInsts.
   bool isSigned() { return IsSigned; }
 
 private:
   // The starting value of the recurrence.
   // It does not have to be zero!
   TrackingVH<Value> StartValue;
   // The instruction who's value is used outside the loop.
   Instruction *LoopExitInstr = nullptr;
   // The kind of the recurrence.
   RecurrenceKind Kind = RK_NoRecurrence;
   // If this a min/max recurrence the kind of recurrence.
   MinMaxRecurrenceKind MinMaxKind = MRK_Invalid;
   // First occurrence of unasfe algebra in the PHI's use-chain.
   Instruction *UnsafeAlgebraInst = nullptr;
   // The type of the recurrence.
   Type *RecurrenceType = nullptr;
   // True if all source operands of the recurrence are SExtInsts.
   bool IsSigned = false;
   // Instructions used for type-promoting the recurrence.
   SmallPtrSet<Instruction *, 8> CastInsts;
 };
 
 /// A struct for saving information about induction variables.
 class InductionDescriptor {
 public:
   /// This enum represents the kinds of inductions that we support.
   enum InductionKind {
     IK_NoInduction,  ///< Not an induction variable.
     IK_IntInduction, ///< Integer induction variable. Step = C.
     IK_PtrInduction, ///< Pointer induction var. Step = C / sizeof(elem).
     IK_FpInduction   ///< Floating point induction variable.
   };
 
 public:
   /// Default constructor - creates an invalid induction.
   InductionDescriptor() = default;
 
   /// Get the consecutive direction. Returns:
   ///   0 - unknown or non-consecutive.
   ///   1 - consecutive and increasing.
   ///  -1 - consecutive and decreasing.
   int getConsecutiveDirection() const;
 
   /// Compute the transformed value of Index at offset StartValue using step
   /// StepValue.
   /// For integer induction, returns StartValue + Index * StepValue.
   /// For pointer induction, returns StartValue[Index * StepValue].
   /// FIXME: The newly created binary instructions should contain nsw/nuw
   /// flags, which can be found from the original scalar operations.
   Value *transform(IRBuilder<> &B, Value *Index, ScalarEvolution *SE,
                    const DataLayout& DL) const;
 
   Value *getStartValue() const { return StartValue; }
   InductionKind getKind() const { return IK; }
   const SCEV *getStep() const { return Step; }
   ConstantInt *getConstIntStepValue() const;
 
   /// Returns true if \p Phi is an induction in the loop \p L. If \p Phi is an
   /// induction, the induction descriptor \p D will contain the data describing
   /// this induction. If by some other means the caller has a better SCEV
   /// expression for \p Phi than the one returned by the ScalarEvolution
   /// analysis, it can be passed through \p Expr. If the def-use chain 
   /// associated with the phi includes casts (that we know we can ignore
   /// under proper runtime checks), they are passed through \p CastsToIgnore.
   static bool 
   isInductionPHI(PHINode *Phi, const Loop* L, ScalarEvolution *SE,
                  InductionDescriptor &D, const SCEV *Expr = nullptr,
                  SmallVectorImpl<Instruction *> *CastsToIgnore = nullptr);
 
   /// Returns true if \p Phi is a floating point induction in the loop \p L.
   /// If \p Phi is an induction, the induction descriptor \p D will contain 
   /// the data describing this induction.
   static bool isFPInductionPHI(PHINode *Phi, const Loop* L,
                                ScalarEvolution *SE, InductionDescriptor &D);
 
   /// Returns true if \p Phi is a loop \p L induction, in the context associated
   /// with the run-time predicate of PSE. If \p Assume is true, this can add
   /// further SCEV predicates to \p PSE in order to prove that \p Phi is an
   /// induction.
   /// If \p Phi is an induction, \p D will contain the data describing this
   /// induction.
   static bool isInductionPHI(PHINode *Phi, const Loop* L,
                              PredicatedScalarEvolution &PSE,
                              InductionDescriptor &D, bool Assume = false);
 
   /// Returns true if the induction type is FP and the binary operator does
   /// not have the "fast-math" property. Such operation requires a relaxed FP
   /// mode.
   bool hasUnsafeAlgebra() {
     return InductionBinOp && !cast<FPMathOperator>(InductionBinOp)->isFast();
   }
 
   /// Returns induction operator that does not have "fast-math" property
   /// and requires FP unsafe mode.
   Instruction *getUnsafeAlgebraInst() {
     if (!InductionBinOp || cast<FPMathOperator>(InductionBinOp)->isFast())
       return nullptr;
     return InductionBinOp;
   }
 
   /// Returns binary opcode of the induction operator.
   Instruction::BinaryOps getInductionOpcode() const {
     return InductionBinOp ? InductionBinOp->getOpcode() :
       Instruction::BinaryOpsEnd;
   }
 
   /// Returns a reference to the type cast instructions in the induction 
   /// update chain, that are redundant when guarded with a runtime
   /// SCEV overflow check.
   const SmallVectorImpl<Instruction *> &getCastInsts() const { 
     return RedundantCasts; 
   }
 
 private:
   /// Private constructor - used by \c isInductionPHI.
   InductionDescriptor(Value *Start, InductionKind K, const SCEV *Step,
                       BinaryOperator *InductionBinOp = nullptr,
                       SmallVectorImpl<Instruction *> *Casts = nullptr);
 
   /// Start value.
   TrackingVH<Value> StartValue;
   /// Induction kind.
   InductionKind IK = IK_NoInduction;
   /// Step value.
   const SCEV *Step = nullptr;
   // Instruction that advances induction variable.
   BinaryOperator *InductionBinOp = nullptr;
   // Instructions used for type-casts of the induction variable,
   // that are redundant when guarded with a runtime SCEV overflow check.
   SmallVector<Instruction *, 2> RedundantCasts;
 };
 
 BasicBlock *InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,
                                    bool PreserveLCSSA);
 
 /// Ensure that all exit blocks of the loop are dedicated exits.
 ///
 /// For any loop exit block with non-loop predecessors, we split the loop
 /// predecessors to use a dedicated loop exit block. We update the dominator
 /// tree and loop info if provided, and will preserve LCSSA if requested.
 bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
                              bool PreserveLCSSA);
 
 /// Ensures LCSSA form for every instruction from the Worklist in the scope of
 /// innermost containing loop.
 ///
 /// For the given instruction which have uses outside of the loop, an LCSSA PHI
 /// node is inserted and the uses outside the loop are rewritten to use this
 /// node.
 ///
 /// LoopInfo and DominatorTree are required and, since the routine makes no
 /// changes to CFG, preserved.
 ///
 /// Returns true if any modifications are made.
 bool formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
                               DominatorTree &DT, LoopInfo &LI);
 
 /// \brief Put loop into LCSSA form.
 ///
 /// Looks at all instructions in the loop which have uses outside of the
 /// current loop. For each, an LCSSA PHI node is inserted and the uses outside
 /// the loop are rewritten to use this node.
 ///
 /// LoopInfo and DominatorTree are required and preserved.
 ///
 /// If ScalarEvolution is passed in, it will be preserved.
 ///
 /// Returns true if any modifications are made to the loop.
 bool formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE);
 
 /// \brief Put a loop nest into LCSSA form.
 ///
 /// This recursively forms LCSSA for a loop nest.
 ///
 /// LoopInfo and DominatorTree are required and preserved.
 ///
 /// If ScalarEvolution is passed in, it will be preserved.
 ///
 /// Returns true if any modifications are made to the loop.
 bool formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI,
                           ScalarEvolution *SE);
 
 /// \brief Walk the specified region of the CFG (defined by all blocks
 /// dominated by the specified block, and that are in the current loop) in
 /// reverse depth first order w.r.t the DominatorTree. This allows us to visit
 /// uses before definitions, allowing us to sink a loop body in one pass without
 /// iteration. Takes DomTreeNode, AliasAnalysis, LoopInfo, DominatorTree,
 /// DataLayout, TargetLibraryInfo, Loop, AliasSet information for all
 /// instructions of the loop and loop safety information as
 /// arguments. Diagnostics is emitted via \p ORE. It returns changed status.
 bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
                 TargetLibraryInfo *, TargetTransformInfo *, Loop *,
                 AliasSetTracker *, LoopSafetyInfo *,
                 OptimizationRemarkEmitter *ORE);
 
 /// \brief Walk the specified region of the CFG (defined by all blocks
 /// dominated by the specified block, and that are in the current loop) in depth
 /// first order w.r.t the DominatorTree.  This allows us to visit definitions
 /// before uses, allowing us to hoist a loop body in one pass without iteration.
 /// Takes DomTreeNode, AliasAnalysis, LoopInfo, DominatorTree, DataLayout,
 /// TargetLibraryInfo, Loop, AliasSet information for all instructions of the
 /// loop and loop safety information as arguments. Diagnostics is emitted via \p
 /// ORE. It returns changed status.
 bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
                  TargetLibraryInfo *, Loop *, AliasSetTracker *,
                  LoopSafetyInfo *, OptimizationRemarkEmitter *ORE);
 
 /// This function deletes dead loops. The caller of this function needs to
 /// guarantee that the loop is infact dead.
 /// The function requires a bunch or prerequisites to be present:
 ///   - The loop needs to be in LCSSA form
 ///   - The loop needs to have a Preheader
 ///   - A unique dedicated exit block must exist
 ///
 /// This also updates the relevant analysis information in \p DT, \p SE, and \p
 /// LI if pointers to those are provided.
 /// It also updates the loop PM if an updater struct is provided.
 
 void deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
                     LoopInfo *LI);
 
 /// \brief Try to promote memory values to scalars by sinking stores out of
 /// the loop and moving loads to before the loop.  We do this by looping over
 /// the stores in the loop, looking for stores to Must pointers which are
 /// loop invariant. It takes a set of must-alias values, Loop exit blocks
 /// vector, loop exit blocks insertion point vector, PredIteratorCache,
 /// LoopInfo, DominatorTree, Loop, AliasSet information for all instructions
 /// of the loop and loop safety information as arguments.
 /// Diagnostics is emitted via \p ORE. It returns changed status.
 bool promoteLoopAccessesToScalars(const SmallSetVector<Value *, 8> &,
                                   SmallVectorImpl<BasicBlock *> &,
                                   SmallVectorImpl<Instruction *> &,
                                   PredIteratorCache &, LoopInfo *,
                                   DominatorTree *, const TargetLibraryInfo *,
                                   Loop *, AliasSetTracker *, LoopSafetyInfo *,
                                   OptimizationRemarkEmitter *);
 
 /// Does a BFS from a given node to all of its children inside a given loop.
 /// The returned vector of nodes includes the starting point.
 SmallVector<DomTreeNode *, 16> collectChildrenInLoop(DomTreeNode *N,
                                                      const Loop *CurLoop);
 
 /// \brief Computes safety information for a loop
 /// checks loop body & header for the possibility of may throw
 /// exception, it takes LoopSafetyInfo and loop as argument.
 /// Updates safety information in LoopSafetyInfo argument.
 void computeLoopSafetyInfo(LoopSafetyInfo *, Loop *);
 
 /// Returns true if the instruction in a loop is guaranteed to execute at least
 /// once.
 bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT,
                            const Loop *CurLoop,
                            const LoopSafetyInfo *SafetyInfo);
 
 /// \brief Returns the instructions that use values defined in the loop.
 SmallVector<Instruction *, 8> findDefsUsedOutsideOfLoop(Loop *L);
 
 /// \brief Find string metadata for loop
 ///
 /// If it has a value (e.g. {"llvm.distribute", 1} return the value as an
 /// operand or null otherwise.  If the string metadata is not found return
 /// Optional's not-a-value.
 Optional<const MDOperand *> findStringMetadataForLoop(Loop *TheLoop,
                                                       StringRef Name);
 
 /// \brief Set input string into loop metadata by keeping other values intact.
 void addStringMetadataToLoop(Loop *TheLoop, const char *MDString,
                              unsigned V = 0);
 
 /// \brief Get a loop's estimated trip count based on branch weight metadata.
 /// Returns 0 when the count is estimated to be 0, or None when a meaningful
 /// estimate can not be made.
 Optional<unsigned> getLoopEstimatedTripCount(Loop *L);
 
 /// Helper to consistently add the set of standard passes to a loop pass's \c
 /// AnalysisUsage.
 ///
 /// All loop passes should call this as part of implementing their \c
 /// getAnalysisUsage.
 void getLoopAnalysisUsage(AnalysisUsage &AU);
 
 /// Returns true if the hoister and sinker can handle this instruction.
 /// If SafetyInfo is null, we are checking for sinking instructions from
 /// preheader to loop body (no speculation).
 /// If SafetyInfo is not null, we are checking for hoisting/sinking
 /// instructions from loop body to preheader/exit. Check if the instruction
 /// can execute speculatively.
 /// If \p ORE is set use it to emit optimization remarks.
 bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
                         Loop *CurLoop, AliasSetTracker *CurAST,
                         LoopSafetyInfo *SafetyInfo,
                         OptimizationRemarkEmitter *ORE = nullptr);
 
 /// Generates a vector reduction using shufflevectors to reduce the value.
 Value *getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
                            RecurrenceDescriptor::MinMaxRecurrenceKind
                                MinMaxKind = RecurrenceDescriptor::MRK_Invalid,
                            ArrayRef<Value *> RedOps = ArrayRef<Value *>());
 
 /// Create a target reduction of the given vector. The reduction operation
 /// is described by the \p Opcode parameter. min/max reductions require
 /// additional information supplied in \p Flags.
 /// The target is queried to determine if intrinsics or shuffle sequences are
 /// required to implement the reduction.
 Value *
 createSimpleTargetReduction(IRBuilder<> &B, const TargetTransformInfo *TTI,
                             unsigned Opcode, Value *Src,
                             TargetTransformInfo::ReductionFlags Flags =
                                 TargetTransformInfo::ReductionFlags(),
                             ArrayRef<Value *> RedOps = ArrayRef<Value *>());
 
 /// Create a generic target reduction using a recurrence descriptor \p Desc
 /// The target is queried to determine if intrinsics or shuffle sequences are
 /// required to implement the reduction.
 Value *createTargetReduction(IRBuilder<> &B, const TargetTransformInfo *TTI,
                              RecurrenceDescriptor &Desc, Value *Src,
                              bool NoNaN = false);
 
 /// Get the intersection (logical and) of all of the potential IR flags
 /// of each scalar operation (VL) that will be converted into a vector (I).
 /// If OpValue is non-null, we only consider operations similar to OpValue
 /// when intersecting.
 /// Flag set: NSW, NUW, exact, and all of fast-math.
 void propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue = nullptr);
 
 } // end namespace llvm
 
 #endif // LLVM_TRANSFORMS_UTILS_LOOPUTILS_H
Index: head/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
===================================================================
--- head/contrib/llvm/lib/Analysis/ScalarEvolution.cpp	(revision 329982)
+++ head/contrib/llvm/lib/Analysis/ScalarEvolution.cpp	(revision 329983)
@@ -1,11781 +1,11788 @@
 //===- ScalarEvolution.cpp - Scalar Evolution Analysis --------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // This file contains the implementation of the scalar evolution analysis
 // engine, which is used primarily to analyze expressions involving induction
 // variables in loops.
 //
 // There are several aspects to this library.  First is the representation of
 // scalar expressions, which are represented as subclasses of the SCEV class.
 // These classes are used to represent certain types of subexpressions that we
 // can handle. We only create one SCEV of a particular shape, so
 // pointer-comparisons for equality are legal.
 //
 // One important aspect of the SCEV objects is that they are never cyclic, even
 // if there is a cycle in the dataflow for an expression (ie, a PHI node).  If
 // the PHI node is one of the idioms that we can represent (e.g., a polynomial
 // recurrence) then we represent it directly as a recurrence node, otherwise we
 // represent it as a SCEVUnknown node.
 //
 // In addition to being able to represent expressions of various types, we also
 // have folders that are used to build the *canonical* representation for a
 // particular expression.  These folders are capable of using a variety of
 // rewrite rules to simplify the expressions.
 //
 // Once the folders are defined, we can implement the more interesting
 // higher-level code, such as the code that recognizes PHI nodes of various
 // types, computes the execution count of a loop, etc.
 //
 // TODO: We should use these routines and value representations to implement
 // dependence analysis!
 //
 //===----------------------------------------------------------------------===//
 //
 // There are several good references for the techniques used in this analysis.
 //
 //  Chains of recurrences -- a method to expedite the evaluation
 //  of closed-form functions
 //  Olaf Bachmann, Paul S. Wang, Eugene V. Zima
 //
 //  On computational properties of chains of recurrences
 //  Eugene V. Zima
 //
 //  Symbolic Evaluation of Chains of Recurrences for Loop Optimization
 //  Robert A. van Engelen
 //
 //  Efficient Symbolic Analysis for Optimizing Compilers
 //  Robert A. van Engelen
 //
 //  Using the chains of recurrences algebra for data dependence testing and
 //  induction variable substitution
 //  MS Thesis, Johnie Birch
 //
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/EquivalenceClasses.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/Sequence.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalAlias.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Use.h"
 #include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/KnownBits.h"
 #include "llvm/Support/SaveAndRestore.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
 #include <climits>
 #include <cstddef>
 #include <cstdint>
 #include <cstdlib>
 #include <map>
 #include <memory>
 #include <tuple>
 #include <utility>
 #include <vector>
 
 using namespace llvm;
 
 #define DEBUG_TYPE "scalar-evolution"
 
 STATISTIC(NumArrayLenItCounts,
           "Number of trip counts computed with array length");
 STATISTIC(NumTripCountsComputed,
           "Number of loops with predictable loop counts");
 STATISTIC(NumTripCountsNotComputed,
           "Number of loops without predictable loop counts");
 STATISTIC(NumBruteForceTripCountsComputed,
           "Number of loops with trip counts computed by force");
 
 static cl::opt<unsigned>
 MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden,
                         cl::desc("Maximum number of iterations SCEV will "
                                  "symbolically execute a constant "
                                  "derived loop"),
                         cl::init(100));
 
 // FIXME: Enable this with EXPENSIVE_CHECKS when the test suite is clean.
 static cl::opt<bool> VerifySCEV(
     "verify-scev", cl::Hidden,
     cl::desc("Verify ScalarEvolution's backedge taken counts (slow)"));
 static cl::opt<bool>
     VerifySCEVMap("verify-scev-maps", cl::Hidden,
                   cl::desc("Verify no dangling value in ScalarEvolution's "
                            "ExprValueMap (slow)"));
 
 static cl::opt<unsigned> MulOpsInlineThreshold(
     "scev-mulops-inline-threshold", cl::Hidden,
     cl::desc("Threshold for inlining multiplication operands into a SCEV"),
     cl::init(32));
 
 static cl::opt<unsigned> AddOpsInlineThreshold(
     "scev-addops-inline-threshold", cl::Hidden,
     cl::desc("Threshold for inlining addition operands into a SCEV"),
     cl::init(500));
 
 static cl::opt<unsigned> MaxSCEVCompareDepth(
     "scalar-evolution-max-scev-compare-depth", cl::Hidden,
     cl::desc("Maximum depth of recursive SCEV complexity comparisons"),
     cl::init(32));
 
 static cl::opt<unsigned> MaxSCEVOperationsImplicationDepth(
     "scalar-evolution-max-scev-operations-implication-depth", cl::Hidden,
     cl::desc("Maximum depth of recursive SCEV operations implication analysis"),
     cl::init(2));
 
 static cl::opt<unsigned> MaxValueCompareDepth(
     "scalar-evolution-max-value-compare-depth", cl::Hidden,
     cl::desc("Maximum depth of recursive value complexity comparisons"),
     cl::init(2));
 
 static cl::opt<unsigned>
     MaxArithDepth("scalar-evolution-max-arith-depth", cl::Hidden,
                   cl::desc("Maximum depth of recursive arithmetics"),
                   cl::init(32));
 
 static cl::opt<unsigned> MaxConstantEvolvingDepth(
     "scalar-evolution-max-constant-evolving-depth", cl::Hidden,
     cl::desc("Maximum depth of recursive constant evolving"), cl::init(32));
 
 static cl::opt<unsigned>
     MaxExtDepth("scalar-evolution-max-ext-depth", cl::Hidden,
                 cl::desc("Maximum depth of recursive SExt/ZExt"),
                 cl::init(8));
 
 static cl::opt<unsigned>
     MaxAddRecSize("scalar-evolution-max-add-rec-size", cl::Hidden,
                   cl::desc("Max coefficients in AddRec during evolving"),
                   cl::init(16));
 
+static cl::opt<bool> VersionUnknown(
+    "scev-version-unknown", cl::Hidden,
+    cl::desc("Use predicated scalar evolution to version SCEVUnknowns"),
+    cl::init(false));
+
 //===----------------------------------------------------------------------===//
 //                           SCEV class definitions
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
 // Implementation of the SCEV class.
 //
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 LLVM_DUMP_METHOD void SCEV::dump() const {
   print(dbgs());
   dbgs() << '\n';
 }
 #endif
 
 void SCEV::print(raw_ostream &OS) const {
   switch (static_cast<SCEVTypes>(getSCEVType())) {
   case scConstant:
     cast<SCEVConstant>(this)->getValue()->printAsOperand(OS, false);
     return;
   case scTruncate: {
     const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(this);
     const SCEV *Op = Trunc->getOperand();
     OS << "(trunc " << *Op->getType() << " " << *Op << " to "
        << *Trunc->getType() << ")";
     return;
   }
   case scZeroExtend: {
     const SCEVZeroExtendExpr *ZExt = cast<SCEVZeroExtendExpr>(this);
     const SCEV *Op = ZExt->getOperand();
     OS << "(zext " << *Op->getType() << " " << *Op << " to "
        << *ZExt->getType() << ")";
     return;
   }
   case scSignExtend: {
     const SCEVSignExtendExpr *SExt = cast<SCEVSignExtendExpr>(this);
     const SCEV *Op = SExt->getOperand();
     OS << "(sext " << *Op->getType() << " " << *Op << " to "
        << *SExt->getType() << ")";
     return;
   }
   case scAddRecExpr: {
     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(this);
     OS << "{" << *AR->getOperand(0);
     for (unsigned i = 1, e = AR->getNumOperands(); i != e; ++i)
       OS << ",+," << *AR->getOperand(i);
     OS << "}<";
     if (AR->hasNoUnsignedWrap())
       OS << "nuw><";
     if (AR->hasNoSignedWrap())
       OS << "nsw><";
     if (AR->hasNoSelfWrap() &&
         !AR->getNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW)))
       OS << "nw><";
     AR->getLoop()->getHeader()->printAsOperand(OS, /*PrintType=*/false);
     OS << ">";
     return;
   }
   case scAddExpr:
   case scMulExpr:
   case scUMaxExpr:
   case scSMaxExpr: {
     const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(this);
     const char *OpStr = nullptr;
     switch (NAry->getSCEVType()) {
     case scAddExpr: OpStr = " + "; break;
     case scMulExpr: OpStr = " * "; break;
     case scUMaxExpr: OpStr = " umax "; break;
     case scSMaxExpr: OpStr = " smax "; break;
     }
     OS << "(";
     for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
          I != E; ++I) {
       OS << **I;
       if (std::next(I) != E)
         OS << OpStr;
     }
     OS << ")";
     switch (NAry->getSCEVType()) {
     case scAddExpr:
     case scMulExpr:
       if (NAry->hasNoUnsignedWrap())
         OS << "<nuw>";
       if (NAry->hasNoSignedWrap())
         OS << "<nsw>";
     }
     return;
   }
   case scUDivExpr: {
     const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(this);
     OS << "(" << *UDiv->getLHS() << " /u " << *UDiv->getRHS() << ")";
     return;
   }
   case scUnknown: {
     const SCEVUnknown *U = cast<SCEVUnknown>(this);
     Type *AllocTy;
     if (U->isSizeOf(AllocTy)) {
       OS << "sizeof(" << *AllocTy << ")";
       return;
     }
     if (U->isAlignOf(AllocTy)) {
       OS << "alignof(" << *AllocTy << ")";
       return;
     }
 
     Type *CTy;
     Constant *FieldNo;
     if (U->isOffsetOf(CTy, FieldNo)) {
       OS << "offsetof(" << *CTy << ", ";
       FieldNo->printAsOperand(OS, false);
       OS << ")";
       return;
     }
 
     // Otherwise just print it normally.
     U->getValue()->printAsOperand(OS, false);
     return;
   }
   case scCouldNotCompute:
     OS << "***COULDNOTCOMPUTE***";
     return;
   }
   llvm_unreachable("Unknown SCEV kind!");
 }
 
 Type *SCEV::getType() const {
   switch (static_cast<SCEVTypes>(getSCEVType())) {
   case scConstant:
     return cast<SCEVConstant>(this)->getType();
   case scTruncate:
   case scZeroExtend:
   case scSignExtend:
     return cast<SCEVCastExpr>(this)->getType();
   case scAddRecExpr:
   case scMulExpr:
   case scUMaxExpr:
   case scSMaxExpr:
     return cast<SCEVNAryExpr>(this)->getType();
   case scAddExpr:
     return cast<SCEVAddExpr>(this)->getType();
   case scUDivExpr:
     return cast<SCEVUDivExpr>(this)->getType();
   case scUnknown:
     return cast<SCEVUnknown>(this)->getType();
   case scCouldNotCompute:
     llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
   }
   llvm_unreachable("Unknown SCEV kind!");
 }
 
 bool SCEV::isZero() const {
   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
     return SC->getValue()->isZero();
   return false;
 }
 
 bool SCEV::isOne() const {
   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
     return SC->getValue()->isOne();
   return false;
 }
 
 bool SCEV::isAllOnesValue() const {
   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
     return SC->getValue()->isMinusOne();
   return false;
 }
 
 bool SCEV::isNonConstantNegative() const {
   const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(this);
   if (!Mul) return false;
 
   // If there is a constant factor, it will be first.
   const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
   if (!SC) return false;
 
   // Return true if the value is negative, this matches things like (-42 * V).
   return SC->getAPInt().isNegative();
 }
 
 SCEVCouldNotCompute::SCEVCouldNotCompute() :
   SCEV(FoldingSetNodeIDRef(), scCouldNotCompute) {}
 
 bool SCEVCouldNotCompute::classof(const SCEV *S) {
   return S->getSCEVType() == scCouldNotCompute;
 }
 
 const SCEV *ScalarEvolution::getConstant(ConstantInt *V) {
   FoldingSetNodeID ID;
   ID.AddInteger(scConstant);
   ID.AddPointer(V);
   void *IP = nullptr;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
   SCEV *S = new (SCEVAllocator) SCEVConstant(ID.Intern(SCEVAllocator), V);
   UniqueSCEVs.InsertNode(S, IP);
   return S;
 }
 
 const SCEV *ScalarEvolution::getConstant(const APInt &Val) {
   return getConstant(ConstantInt::get(getContext(), Val));
 }
 
 const SCEV *
 ScalarEvolution::getConstant(Type *Ty, uint64_t V, bool isSigned) {
   IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty));
   return getConstant(ConstantInt::get(ITy, V, isSigned));
 }
 
 SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID,
                            unsigned SCEVTy, const SCEV *op, Type *ty)
   : SCEV(ID, SCEVTy), Op(op), Ty(ty) {}
 
 SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID,
                                    const SCEV *op, Type *ty)
   : SCEVCastExpr(ID, scTruncate, op, ty) {
   assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
          "Cannot truncate non-integer value!");
 }
 
 SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID,
                                        const SCEV *op, Type *ty)
   : SCEVCastExpr(ID, scZeroExtend, op, ty) {
   assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
          "Cannot zero extend non-integer value!");
 }
 
 SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID,
                                        const SCEV *op, Type *ty)
   : SCEVCastExpr(ID, scSignExtend, op, ty) {
   assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
          "Cannot sign extend non-integer value!");
 }
 
 void SCEVUnknown::deleted() {
   // Clear this SCEVUnknown from various maps.
   SE->forgetMemoizedResults(this);
 
   // Remove this SCEVUnknown from the uniquing map.
   SE->UniqueSCEVs.RemoveNode(this);
 
   // Release the value.
   setValPtr(nullptr);
 }
 
 void SCEVUnknown::allUsesReplacedWith(Value *New) {
   // Remove this SCEVUnknown from the uniquing map.
   SE->UniqueSCEVs.RemoveNode(this);
 
   // Update this SCEVUnknown to point to the new value. This is needed
   // because there may still be outstanding SCEVs which still point to
   // this SCEVUnknown.
   setValPtr(New);
 }
 
 bool SCEVUnknown::isSizeOf(Type *&AllocTy) const {
   if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
     if (VCE->getOpcode() == Instruction::PtrToInt)
       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
         if (CE->getOpcode() == Instruction::GetElementPtr &&
             CE->getOperand(0)->isNullValue() &&
             CE->getNumOperands() == 2)
           if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(1)))
             if (CI->isOne()) {
               AllocTy = cast<PointerType>(CE->getOperand(0)->getType())
                                  ->getElementType();
               return true;
             }
 
   return false;
 }
 
 bool SCEVUnknown::isAlignOf(Type *&AllocTy) const {
   if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
     if (VCE->getOpcode() == Instruction::PtrToInt)
       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
         if (CE->getOpcode() == Instruction::GetElementPtr &&
             CE->getOperand(0)->isNullValue()) {
           Type *Ty =
             cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
           if (StructType *STy = dyn_cast<StructType>(Ty))
             if (!STy->isPacked() &&
                 CE->getNumOperands() == 3 &&
                 CE->getOperand(1)->isNullValue()) {
               if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(2)))
                 if (CI->isOne() &&
                     STy->getNumElements() == 2 &&
                     STy->getElementType(0)->isIntegerTy(1)) {
                   AllocTy = STy->getElementType(1);
                   return true;
                 }
             }
         }
 
   return false;
 }
 
 bool SCEVUnknown::isOffsetOf(Type *&CTy, Constant *&FieldNo) const {
   if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
     if (VCE->getOpcode() == Instruction::PtrToInt)
       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
         if (CE->getOpcode() == Instruction::GetElementPtr &&
             CE->getNumOperands() == 3 &&
             CE->getOperand(0)->isNullValue() &&
             CE->getOperand(1)->isNullValue()) {
           Type *Ty =
             cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
           // Ignore vector types here so that ScalarEvolutionExpander doesn't
           // emit getelementptrs that index into vectors.
           if (Ty->isStructTy() || Ty->isArrayTy()) {
             CTy = Ty;
             FieldNo = CE->getOperand(2);
             return true;
           }
         }
 
   return false;
 }
 
 //===----------------------------------------------------------------------===//
 //                               SCEV Utilities
 //===----------------------------------------------------------------------===//
 
 /// Compare the two values \p LV and \p RV in terms of their "complexity" where
 /// "complexity" is a partial (and somewhat ad-hoc) relation used to order
 /// operands in SCEV expressions.  \p EqCache is a set of pairs of values that
 /// have been previously deemed to be "equally complex" by this routine.  It is
 /// intended to avoid exponential time complexity in cases like:
 ///
 ///   %a = f(%x, %y)
 ///   %b = f(%a, %a)
 ///   %c = f(%b, %b)
 ///
 ///   %d = f(%x, %y)
 ///   %e = f(%d, %d)
 ///   %f = f(%e, %e)
 ///
 ///   CompareValueComplexity(%f, %c)
 ///
 /// Since we do not continue running this routine on expression trees once we
 /// have seen unequal values, there is no need to track them in the cache.
 static int
 CompareValueComplexity(EquivalenceClasses<const Value *> &EqCacheValue,
                        const LoopInfo *const LI, Value *LV, Value *RV,
                        unsigned Depth) {
   if (Depth > MaxValueCompareDepth || EqCacheValue.isEquivalent(LV, RV))
     return 0;
 
   // Order pointer values after integer values. This helps SCEVExpander form
   // GEPs.
   bool LIsPointer = LV->getType()->isPointerTy(),
        RIsPointer = RV->getType()->isPointerTy();
   if (LIsPointer != RIsPointer)
     return (int)LIsPointer - (int)RIsPointer;
 
   // Compare getValueID values.
   unsigned LID = LV->getValueID(), RID = RV->getValueID();
   if (LID != RID)
     return (int)LID - (int)RID;
 
   // Sort arguments by their position.
   if (const auto *LA = dyn_cast<Argument>(LV)) {
     const auto *RA = cast<Argument>(RV);
     unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo();
     return (int)LArgNo - (int)RArgNo;
   }
 
   if (const auto *LGV = dyn_cast<GlobalValue>(LV)) {
     const auto *RGV = cast<GlobalValue>(RV);
 
     const auto IsGVNameSemantic = [&](const GlobalValue *GV) {
       auto LT = GV->getLinkage();
       return !(GlobalValue::isPrivateLinkage(LT) ||
                GlobalValue::isInternalLinkage(LT));
     };
 
     // Use the names to distinguish the two values, but only if the
     // names are semantically important.
     if (IsGVNameSemantic(LGV) && IsGVNameSemantic(RGV))
       return LGV->getName().compare(RGV->getName());
   }
 
   // For instructions, compare their loop depth, and their operand count.  This
   // is pretty loose.
   if (const auto *LInst = dyn_cast<Instruction>(LV)) {
     const auto *RInst = cast<Instruction>(RV);
 
     // Compare loop depths.
     const BasicBlock *LParent = LInst->getParent(),
                      *RParent = RInst->getParent();
     if (LParent != RParent) {
       unsigned LDepth = LI->getLoopDepth(LParent),
                RDepth = LI->getLoopDepth(RParent);
       if (LDepth != RDepth)
         return (int)LDepth - (int)RDepth;
     }
 
     // Compare the number of operands.
     unsigned LNumOps = LInst->getNumOperands(),
              RNumOps = RInst->getNumOperands();
     if (LNumOps != RNumOps)
       return (int)LNumOps - (int)RNumOps;
 
     for (unsigned Idx : seq(0u, LNumOps)) {
       int Result =
           CompareValueComplexity(EqCacheValue, LI, LInst->getOperand(Idx),
                                  RInst->getOperand(Idx), Depth + 1);
       if (Result != 0)
         return Result;
     }
   }
 
   EqCacheValue.unionSets(LV, RV);
   return 0;
 }
 
 // Return negative, zero, or positive, if LHS is less than, equal to, or greater
 // than RHS, respectively. A three-way result allows recursive comparisons to be
 // more efficient.
 static int CompareSCEVComplexity(
     EquivalenceClasses<const SCEV *> &EqCacheSCEV,
     EquivalenceClasses<const Value *> &EqCacheValue,
     const LoopInfo *const LI, const SCEV *LHS, const SCEV *RHS,
     DominatorTree &DT, unsigned Depth = 0) {
   // Fast-path: SCEVs are uniqued so we can do a quick equality check.
   if (LHS == RHS)
     return 0;
 
   // Primarily, sort the SCEVs by their getSCEVType().
   unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType();
   if (LType != RType)
     return (int)LType - (int)RType;
 
   if (Depth > MaxSCEVCompareDepth || EqCacheSCEV.isEquivalent(LHS, RHS))
     return 0;
   // Aside from the getSCEVType() ordering, the particular ordering
   // isn't very important except that it's beneficial to be consistent,
   // so that (a + b) and (b + a) don't end up as different expressions.
   switch (static_cast<SCEVTypes>(LType)) {
   case scUnknown: {
     const SCEVUnknown *LU = cast<SCEVUnknown>(LHS);
     const SCEVUnknown *RU = cast<SCEVUnknown>(RHS);
 
     int X = CompareValueComplexity(EqCacheValue, LI, LU->getValue(),
                                    RU->getValue(), Depth + 1);
     if (X == 0)
       EqCacheSCEV.unionSets(LHS, RHS);
     return X;
   }
 
   case scConstant: {
     const SCEVConstant *LC = cast<SCEVConstant>(LHS);
     const SCEVConstant *RC = cast<SCEVConstant>(RHS);
 
     // Compare constant values.
     const APInt &LA = LC->getAPInt();
     const APInt &RA = RC->getAPInt();
     unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth();
     if (LBitWidth != RBitWidth)
       return (int)LBitWidth - (int)RBitWidth;
     return LA.ult(RA) ? -1 : 1;
   }
 
   case scAddRecExpr: {
     const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS);
     const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS);
 
     // There is always a dominance between two recs that are used by one SCEV,
     // so we can safely sort recs by loop header dominance. We require such
     // order in getAddExpr.
     const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop();
     if (LLoop != RLoop) {
       const BasicBlock *LHead = LLoop->getHeader(), *RHead = RLoop->getHeader();
       assert(LHead != RHead && "Two loops share the same header?");
       if (DT.dominates(LHead, RHead))
         return 1;
       else
         assert(DT.dominates(RHead, LHead) &&
                "No dominance between recurrences used by one SCEV?");
       return -1;
     }
 
     // Addrec complexity grows with operand count.
     unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands();
     if (LNumOps != RNumOps)
       return (int)LNumOps - (int)RNumOps;
 
     // Compare NoWrap flags.
     if (LA->getNoWrapFlags() != RA->getNoWrapFlags())
       return (int)LA->getNoWrapFlags() - (int)RA->getNoWrapFlags();
 
     // Lexicographically compare.
     for (unsigned i = 0; i != LNumOps; ++i) {
       int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI,
                                     LA->getOperand(i), RA->getOperand(i), DT,
                                     Depth + 1);
       if (X != 0)
         return X;
     }
     EqCacheSCEV.unionSets(LHS, RHS);
     return 0;
   }
 
   case scAddExpr:
   case scMulExpr:
   case scSMaxExpr:
   case scUMaxExpr: {
     const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS);
     const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS);
 
     // Lexicographically compare n-ary expressions.
     unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands();
     if (LNumOps != RNumOps)
       return (int)LNumOps - (int)RNumOps;
 
     // Compare NoWrap flags.
     if (LC->getNoWrapFlags() != RC->getNoWrapFlags())
       return (int)LC->getNoWrapFlags() - (int)RC->getNoWrapFlags();
 
     for (unsigned i = 0; i != LNumOps; ++i) {
       int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI,
                                     LC->getOperand(i), RC->getOperand(i), DT,
                                     Depth + 1);
       if (X != 0)
         return X;
     }
     EqCacheSCEV.unionSets(LHS, RHS);
     return 0;
   }
 
   case scUDivExpr: {
     const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS);
     const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS);
 
     // Lexicographically compare udiv expressions.
     int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LC->getLHS(),
                                   RC->getLHS(), DT, Depth + 1);
     if (X != 0)
       return X;
     X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LC->getRHS(),
                               RC->getRHS(), DT, Depth + 1);
     if (X == 0)
       EqCacheSCEV.unionSets(LHS, RHS);
     return X;
   }
 
   case scTruncate:
   case scZeroExtend:
   case scSignExtend: {
     const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS);
     const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS);
 
     // Compare cast expressions by operand.
     int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI,
                                   LC->getOperand(), RC->getOperand(), DT,
                                   Depth + 1);
     if (X == 0)
       EqCacheSCEV.unionSets(LHS, RHS);
     return X;
   }
 
   case scCouldNotCompute:
     llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
   }
   llvm_unreachable("Unknown SCEV kind!");
 }
 
 /// Given a list of SCEV objects, order them by their complexity, and group
 /// objects of the same complexity together by value.  When this routine is
 /// finished, we know that any duplicates in the vector are consecutive and that
 /// complexity is monotonically increasing.
 ///
 /// Note that we go take special precautions to ensure that we get deterministic
 /// results from this routine.  In other words, we don't want the results of
 /// this to depend on where the addresses of various SCEV objects happened to
 /// land in memory.
 static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
                               LoopInfo *LI, DominatorTree &DT) {
   if (Ops.size() < 2) return;  // Noop
 
   EquivalenceClasses<const SCEV *> EqCacheSCEV;
   EquivalenceClasses<const Value *> EqCacheValue;
   if (Ops.size() == 2) {
     // This is the common case, which also happens to be trivially simple.
     // Special case it.
     const SCEV *&LHS = Ops[0], *&RHS = Ops[1];
     if (CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, RHS, LHS, DT) < 0)
       std::swap(LHS, RHS);
     return;
   }
 
   // Do the rough sort by complexity.
   std::stable_sort(Ops.begin(), Ops.end(),
                    [&](const SCEV *LHS, const SCEV *RHS) {
                      return CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI,
                                                   LHS, RHS, DT) < 0;
                    });
 
   // Now that we are sorted by complexity, group elements of the same
   // complexity.  Note that this is, at worst, N^2, but the vector is likely to
   // be extremely short in practice.  Note that we take this approach because we
   // do not want to depend on the addresses of the objects we are grouping.
   for (unsigned i = 0, e = Ops.size(); i != e-2; ++i) {
     const SCEV *S = Ops[i];
     unsigned Complexity = S->getSCEVType();
 
     // If there are any objects of the same complexity and same value as this
     // one, group them.
     for (unsigned j = i+1; j != e && Ops[j]->getSCEVType() == Complexity; ++j) {
       if (Ops[j] == S) { // Found a duplicate.
         // Move it to immediately after i'th element.
         std::swap(Ops[i+1], Ops[j]);
         ++i;   // no need to rescan it.
         if (i == e-2) return;  // Done!
       }
     }
   }
 }
 
 // Returns the size of the SCEV S.
 static inline int sizeOfSCEV(const SCEV *S) {
   struct FindSCEVSize {
     int Size = 0;
 
     FindSCEVSize() = default;
 
     bool follow(const SCEV *S) {
       ++Size;
       // Keep looking at all operands of S.
       return true;
     }
 
     bool isDone() const {
       return false;
     }
   };
 
   FindSCEVSize F;
   SCEVTraversal<FindSCEVSize> ST(F);
   ST.visitAll(S);
   return F.Size;
 }
 
 namespace {
 
 struct SCEVDivision : public SCEVVisitor<SCEVDivision, void> {
 public:
   // Computes the Quotient and Remainder of the division of Numerator by
   // Denominator.
   static void divide(ScalarEvolution &SE, const SCEV *Numerator,
                      const SCEV *Denominator, const SCEV **Quotient,
                      const SCEV **Remainder) {
     assert(Numerator && Denominator && "Uninitialized SCEV");
 
     SCEVDivision D(SE, Numerator, Denominator);
 
     // Check for the trivial case here to avoid having to check for it in the
     // rest of the code.
     if (Numerator == Denominator) {
       *Quotient = D.One;
       *Remainder = D.Zero;
       return;
     }
 
     if (Numerator->isZero()) {
       *Quotient = D.Zero;
       *Remainder = D.Zero;
       return;
     }
 
     // A simple case when N/1. The quotient is N.
     if (Denominator->isOne()) {
       *Quotient = Numerator;
       *Remainder = D.Zero;
       return;
     }
 
     // Split the Denominator when it is a product.
     if (const SCEVMulExpr *T = dyn_cast<SCEVMulExpr>(Denominator)) {
       const SCEV *Q, *R;
       *Quotient = Numerator;
       for (const SCEV *Op : T->operands()) {
         divide(SE, *Quotient, Op, &Q, &R);
         *Quotient = Q;
 
         // Bail out when the Numerator is not divisible by one of the terms of
         // the Denominator.
         if (!R->isZero()) {
           *Quotient = D.Zero;
           *Remainder = Numerator;
           return;
         }
       }
       *Remainder = D.Zero;
       return;
     }
 
     D.visit(Numerator);
     *Quotient = D.Quotient;
     *Remainder = D.Remainder;
   }
 
   // Except in the trivial case described above, we do not know how to divide
   // Expr by Denominator for the following functions with empty implementation.
   void visitTruncateExpr(const SCEVTruncateExpr *Numerator) {}
   void visitZeroExtendExpr(const SCEVZeroExtendExpr *Numerator) {}
   void visitSignExtendExpr(const SCEVSignExtendExpr *Numerator) {}
   void visitUDivExpr(const SCEVUDivExpr *Numerator) {}
   void visitSMaxExpr(const SCEVSMaxExpr *Numerator) {}
   void visitUMaxExpr(const SCEVUMaxExpr *Numerator) {}
   void visitUnknown(const SCEVUnknown *Numerator) {}
   void visitCouldNotCompute(const SCEVCouldNotCompute *Numerator) {}
 
   void visitConstant(const SCEVConstant *Numerator) {
     if (const SCEVConstant *D = dyn_cast<SCEVConstant>(Denominator)) {
       APInt NumeratorVal = Numerator->getAPInt();
       APInt DenominatorVal = D->getAPInt();
       uint32_t NumeratorBW = NumeratorVal.getBitWidth();
       uint32_t DenominatorBW = DenominatorVal.getBitWidth();
 
       if (NumeratorBW > DenominatorBW)
         DenominatorVal = DenominatorVal.sext(NumeratorBW);
       else if (NumeratorBW < DenominatorBW)
         NumeratorVal = NumeratorVal.sext(DenominatorBW);
 
       APInt QuotientVal(NumeratorVal.getBitWidth(), 0);
       APInt RemainderVal(NumeratorVal.getBitWidth(), 0);
       APInt::sdivrem(NumeratorVal, DenominatorVal, QuotientVal, RemainderVal);
       Quotient = SE.getConstant(QuotientVal);
       Remainder = SE.getConstant(RemainderVal);
       return;
     }
   }
 
   void visitAddRecExpr(const SCEVAddRecExpr *Numerator) {
     const SCEV *StartQ, *StartR, *StepQ, *StepR;
     if (!Numerator->isAffine())
       return cannotDivide(Numerator);
     divide(SE, Numerator->getStart(), Denominator, &StartQ, &StartR);
     divide(SE, Numerator->getStepRecurrence(SE), Denominator, &StepQ, &StepR);
     // Bail out if the types do not match.
     Type *Ty = Denominator->getType();
     if (Ty != StartQ->getType() || Ty != StartR->getType() ||
         Ty != StepQ->getType() || Ty != StepR->getType())
       return cannotDivide(Numerator);
     Quotient = SE.getAddRecExpr(StartQ, StepQ, Numerator->getLoop(),
                                 Numerator->getNoWrapFlags());
     Remainder = SE.getAddRecExpr(StartR, StepR, Numerator->getLoop(),
                                  Numerator->getNoWrapFlags());
   }
 
   void visitAddExpr(const SCEVAddExpr *Numerator) {
     SmallVector<const SCEV *, 2> Qs, Rs;
     Type *Ty = Denominator->getType();
 
     for (const SCEV *Op : Numerator->operands()) {
       const SCEV *Q, *R;
       divide(SE, Op, Denominator, &Q, &R);
 
       // Bail out if types do not match.
       if (Ty != Q->getType() || Ty != R->getType())
         return cannotDivide(Numerator);
 
       Qs.push_back(Q);
       Rs.push_back(R);
     }
 
     if (Qs.size() == 1) {
       Quotient = Qs[0];
       Remainder = Rs[0];
       return;
     }
 
     Quotient = SE.getAddExpr(Qs);
     Remainder = SE.getAddExpr(Rs);
   }
 
   void visitMulExpr(const SCEVMulExpr *Numerator) {
     SmallVector<const SCEV *, 2> Qs;
     Type *Ty = Denominator->getType();
 
     bool FoundDenominatorTerm = false;
     for (const SCEV *Op : Numerator->operands()) {
       // Bail out if types do not match.
       if (Ty != Op->getType())
         return cannotDivide(Numerator);
 
       if (FoundDenominatorTerm) {
         Qs.push_back(Op);
         continue;
       }
 
       // Check whether Denominator divides one of the product operands.
       const SCEV *Q, *R;
       divide(SE, Op, Denominator, &Q, &R);
       if (!R->isZero()) {
         Qs.push_back(Op);
         continue;
       }
 
       // Bail out if types do not match.
       if (Ty != Q->getType())
         return cannotDivide(Numerator);
 
       FoundDenominatorTerm = true;
       Qs.push_back(Q);
     }
 
     if (FoundDenominatorTerm) {
       Remainder = Zero;
       if (Qs.size() == 1)
         Quotient = Qs[0];
       else
         Quotient = SE.getMulExpr(Qs);
       return;
     }
 
     if (!isa<SCEVUnknown>(Denominator))
       return cannotDivide(Numerator);
 
     // The Remainder is obtained by replacing Denominator by 0 in Numerator.
     ValueToValueMap RewriteMap;
     RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] =
         cast<SCEVConstant>(Zero)->getValue();
     Remainder = SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true);
 
     if (Remainder->isZero()) {
       // The Quotient is obtained by replacing Denominator by 1 in Numerator.
       RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] =
           cast<SCEVConstant>(One)->getValue();
       Quotient =
           SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true);
       return;
     }
 
     // Quotient is (Numerator - Remainder) divided by Denominator.
     const SCEV *Q, *R;
     const SCEV *Diff = SE.getMinusSCEV(Numerator, Remainder);
     // This SCEV does not seem to simplify: fail the division here.
     if (sizeOfSCEV(Diff) > sizeOfSCEV(Numerator))
       return cannotDivide(Numerator);
     divide(SE, Diff, Denominator, &Q, &R);
     if (R != Zero)
       return cannotDivide(Numerator);
     Quotient = Q;
   }
 
 private:
   SCEVDivision(ScalarEvolution &S, const SCEV *Numerator,
                const SCEV *Denominator)
       : SE(S), Denominator(Denominator) {
     Zero = SE.getZero(Denominator->getType());
     One = SE.getOne(Denominator->getType());
 
     // We generally do not know how to divide Expr by Denominator. We
     // initialize the division to a "cannot divide" state to simplify the rest
     // of the code.
     cannotDivide(Numerator);
   }
 
   // Convenience function for giving up on the division. We set the quotient to
   // be equal to zero and the remainder to be equal to the numerator.
   void cannotDivide(const SCEV *Numerator) {
     Quotient = Zero;
     Remainder = Numerator;
   }
 
   ScalarEvolution &SE;
   const SCEV *Denominator, *Quotient, *Remainder, *Zero, *One;
 };
 
 } // end anonymous namespace
 
 //===----------------------------------------------------------------------===//
 //                      Simple SCEV method implementations
 //===----------------------------------------------------------------------===//
 
 /// Compute BC(It, K).  The result has width W.  Assume, K > 0.
 static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K,
                                        ScalarEvolution &SE,
                                        Type *ResultTy) {
   // Handle the simplest case efficiently.
   if (K == 1)
     return SE.getTruncateOrZeroExtend(It, ResultTy);
 
   // We are using the following formula for BC(It, K):
   //
   //   BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / K!
   //
   // Suppose, W is the bitwidth of the return value.  We must be prepared for
   // overflow.  Hence, we must assure that the result of our computation is
   // equal to the accurate one modulo 2^W.  Unfortunately, division isn't
   // safe in modular arithmetic.
   //
   // However, this code doesn't use exactly that formula; the formula it uses
   // is something like the following, where T is the number of factors of 2 in
   // K! (i.e. trailing zeros in the binary representation of K!), and ^ is
   // exponentiation:
   //
   //   BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / 2^T / (K! / 2^T)
   //
   // This formula is trivially equivalent to the previous formula.  However,
   // this formula can be implemented much more efficiently.  The trick is that
   // K! / 2^T is odd, and exact division by an odd number *is* safe in modular
   // arithmetic.  To do exact division in modular arithmetic, all we have
   // to do is multiply by the inverse.  Therefore, this step can be done at
   // width W.
   //
   // The next issue is how to safely do the division by 2^T.  The way this
   // is done is by doing the multiplication step at a width of at least W + T
   // bits.  This way, the bottom W+T bits of the product are accurate. Then,
   // when we perform the division by 2^T (which is equivalent to a right shift
   // by T), the bottom W bits are accurate.  Extra bits are okay; they'll get
   // truncated out after the division by 2^T.
   //
   // In comparison to just directly using the first formula, this technique
   // is much more efficient; using the first formula requires W * K bits,
   // but this formula less than W + K bits. Also, the first formula requires
   // a division step, whereas this formula only requires multiplies and shifts.
   //
   // It doesn't matter whether the subtraction step is done in the calculation
   // width or the input iteration count's width; if the subtraction overflows,
   // the result must be zero anyway.  We prefer here to do it in the width of
   // the induction variable because it helps a lot for certain cases; CodeGen
   // isn't smart enough to ignore the overflow, which leads to much less
   // efficient code if the width of the subtraction is wider than the native
   // register width.
   //
   // (It's possible to not widen at all by pulling out factors of 2 before
   // the multiplication; for example, K=2 can be calculated as
   // It/2*(It+(It*INT_MIN/INT_MIN)+-1). However, it requires
   // extra arithmetic, so it's not an obvious win, and it gets
   // much more complicated for K > 3.)
 
   // Protection from insane SCEVs; this bound is conservative,
   // but it probably doesn't matter.
   if (K > 1000)
     return SE.getCouldNotCompute();
 
   unsigned W = SE.getTypeSizeInBits(ResultTy);
 
   // Calculate K! / 2^T and T; we divide out the factors of two before
   // multiplying for calculating K! / 2^T to avoid overflow.
   // Other overflow doesn't matter because we only care about the bottom
   // W bits of the result.
   APInt OddFactorial(W, 1);
   unsigned T = 1;
   for (unsigned i = 3; i <= K; ++i) {
     APInt Mult(W, i);
     unsigned TwoFactors = Mult.countTrailingZeros();
     T += TwoFactors;
     Mult.lshrInPlace(TwoFactors);
     OddFactorial *= Mult;
   }
 
   // We need at least W + T bits for the multiplication step
   unsigned CalculationBits = W + T;
 
   // Calculate 2^T, at width T+W.
   APInt DivFactor = APInt::getOneBitSet(CalculationBits, T);
 
   // Calculate the multiplicative inverse of K! / 2^T;
   // this multiplication factor will perform the exact division by
   // K! / 2^T.
   APInt Mod = APInt::getSignedMinValue(W+1);
   APInt MultiplyFactor = OddFactorial.zext(W+1);
   MultiplyFactor = MultiplyFactor.multiplicativeInverse(Mod);
   MultiplyFactor = MultiplyFactor.trunc(W);
 
   // Calculate the product, at width T+W
   IntegerType *CalculationTy = IntegerType::get(SE.getContext(),
                                                       CalculationBits);
   const SCEV *Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy);
   for (unsigned i = 1; i != K; ++i) {
     const SCEV *S = SE.getMinusSCEV(It, SE.getConstant(It->getType(), i));
     Dividend = SE.getMulExpr(Dividend,
                              SE.getTruncateOrZeroExtend(S, CalculationTy));
   }
 
   // Divide by 2^T
   const SCEV *DivResult = SE.getUDivExpr(Dividend, SE.getConstant(DivFactor));
 
   // Truncate the result, and divide by K! / 2^T.
 
   return SE.getMulExpr(SE.getConstant(MultiplyFactor),
                        SE.getTruncateOrZeroExtend(DivResult, ResultTy));
 }
 
 /// Return the value of this chain of recurrences at the specified iteration
 /// number.  We can evaluate this recurrence by multiplying each element in the
 /// chain by the binomial coefficient corresponding to it.  In other words, we
 /// can evaluate {A,+,B,+,C,+,D} as:
 ///
 ///   A*BC(It, 0) + B*BC(It, 1) + C*BC(It, 2) + D*BC(It, 3)
 ///
 /// where BC(It, k) stands for binomial coefficient.
 const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It,
                                                 ScalarEvolution &SE) const {
   const SCEV *Result = getStart();
   for (unsigned i = 1, e = getNumOperands(); i != e; ++i) {
     // The computation is correct in the face of overflow provided that the
     // multiplication is performed _after_ the evaluation of the binomial
     // coefficient.
     const SCEV *Coeff = BinomialCoefficient(It, i, SE, getType());
     if (isa<SCEVCouldNotCompute>(Coeff))
       return Coeff;
 
     Result = SE.getAddExpr(Result, SE.getMulExpr(getOperand(i), Coeff));
   }
   return Result;
 }
 
 //===----------------------------------------------------------------------===//
 //                    SCEV Expression folder implementations
 //===----------------------------------------------------------------------===//
 
 const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
                                              Type *Ty) {
   assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) &&
          "This is not a truncating conversion!");
   assert(isSCEVable(Ty) &&
          "This is not a conversion to a SCEVable type!");
   Ty = getEffectiveSCEVType(Ty);
 
   FoldingSetNodeID ID;
   ID.AddInteger(scTruncate);
   ID.AddPointer(Op);
   ID.AddPointer(Ty);
   void *IP = nullptr;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
 
   // Fold if the operand is constant.
   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
     return getConstant(
       cast<ConstantInt>(ConstantExpr::getTrunc(SC->getValue(), Ty)));
 
   // trunc(trunc(x)) --> trunc(x)
   if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op))
     return getTruncateExpr(ST->getOperand(), Ty);
 
   // trunc(sext(x)) --> sext(x) if widening or trunc(x) if narrowing
   if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
     return getTruncateOrSignExtend(SS->getOperand(), Ty);
 
   // trunc(zext(x)) --> zext(x) if widening or trunc(x) if narrowing
   if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
     return getTruncateOrZeroExtend(SZ->getOperand(), Ty);
 
   // trunc(x1+x2+...+xN) --> trunc(x1)+trunc(x2)+...+trunc(xN) if we can
   // eliminate all the truncates, or we replace other casts with truncates.
   if (const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Op)) {
     SmallVector<const SCEV *, 4> Operands;
     bool hasTrunc = false;
     for (unsigned i = 0, e = SA->getNumOperands(); i != e && !hasTrunc; ++i) {
       const SCEV *S = getTruncateExpr(SA->getOperand(i), Ty);
       if (!isa<SCEVCastExpr>(SA->getOperand(i)))
         hasTrunc = isa<SCEVTruncateExpr>(S);
       Operands.push_back(S);
     }
     if (!hasTrunc)
       return getAddExpr(Operands);
     // In spite we checked in the beginning that ID is not in the cache,
     // it is possible that during recursion and different modification
     // ID came to cache, so if we found it, just return it.
     if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP))
       return S;
   }
 
   // trunc(x1*x2*...*xN) --> trunc(x1)*trunc(x2)*...*trunc(xN) if we can
   // eliminate all the truncates, or we replace other casts with truncates.
   if (const SCEVMulExpr *SM = dyn_cast<SCEVMulExpr>(Op)) {
     SmallVector<const SCEV *, 4> Operands;
     bool hasTrunc = false;
     for (unsigned i = 0, e = SM->getNumOperands(); i != e && !hasTrunc; ++i) {
       const SCEV *S = getTruncateExpr(SM->getOperand(i), Ty);
       if (!isa<SCEVCastExpr>(SM->getOperand(i)))
         hasTrunc = isa<SCEVTruncateExpr>(S);
       Operands.push_back(S);
     }
     if (!hasTrunc)
       return getMulExpr(Operands);
     // In spite we checked in the beginning that ID is not in the cache,
     // it is possible that during recursion and different modification
     // ID came to cache, so if we found it, just return it.
     if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP))
       return S;
   }
 
   // If the input value is a chrec scev, truncate the chrec's operands.
   if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
     SmallVector<const SCEV *, 4> Operands;
     for (const SCEV *Op : AddRec->operands())
       Operands.push_back(getTruncateExpr(Op, Ty));
     return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap);
   }
 
   // The cast wasn't folded; create an explicit cast node. We can reuse
   // the existing insert position since if we get here, we won't have
   // made any changes which would invalidate it.
   SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator),
                                                  Op, Ty);
   UniqueSCEVs.InsertNode(S, IP);
   addToLoopUseLists(S);
   return S;
 }
 
 // Get the limit of a recurrence such that incrementing by Step cannot cause
 // signed overflow as long as the value of the recurrence within the
 // loop does not exceed this limit before incrementing.
 static const SCEV *getSignedOverflowLimitForStep(const SCEV *Step,
                                                  ICmpInst::Predicate *Pred,
                                                  ScalarEvolution *SE) {
   unsigned BitWidth = SE->getTypeSizeInBits(Step->getType());
   if (SE->isKnownPositive(Step)) {
     *Pred = ICmpInst::ICMP_SLT;
     return SE->getConstant(APInt::getSignedMinValue(BitWidth) -
                            SE->getSignedRangeMax(Step));
   }
   if (SE->isKnownNegative(Step)) {
     *Pred = ICmpInst::ICMP_SGT;
     return SE->getConstant(APInt::getSignedMaxValue(BitWidth) -
                            SE->getSignedRangeMin(Step));
   }
   return nullptr;
 }
 
 // Get the limit of a recurrence such that incrementing by Step cannot cause
 // unsigned overflow as long as the value of the recurrence within the loop does
 // not exceed this limit before incrementing.
 static const SCEV *getUnsignedOverflowLimitForStep(const SCEV *Step,
                                                    ICmpInst::Predicate *Pred,
                                                    ScalarEvolution *SE) {
   unsigned BitWidth = SE->getTypeSizeInBits(Step->getType());
   *Pred = ICmpInst::ICMP_ULT;
 
   return SE->getConstant(APInt::getMinValue(BitWidth) -
                          SE->getUnsignedRangeMax(Step));
 }
 
 namespace {
 
 struct ExtendOpTraitsBase {
   typedef const SCEV *(ScalarEvolution::*GetExtendExprTy)(const SCEV *, Type *,
                                                           unsigned);
 };
 
 // Used to make code generic over signed and unsigned overflow.
 template <typename ExtendOp> struct ExtendOpTraits {
   // Members present:
   //
   // static const SCEV::NoWrapFlags WrapType;
   //
   // static const ExtendOpTraitsBase::GetExtendExprTy GetExtendExpr;
   //
   // static const SCEV *getOverflowLimitForStep(const SCEV *Step,
   //                                           ICmpInst::Predicate *Pred,
   //                                           ScalarEvolution *SE);
 };
 
 template <>
 struct ExtendOpTraits<SCEVSignExtendExpr> : public ExtendOpTraitsBase {
   static const SCEV::NoWrapFlags WrapType = SCEV::FlagNSW;
 
   static const GetExtendExprTy GetExtendExpr;
 
   static const SCEV *getOverflowLimitForStep(const SCEV *Step,
                                              ICmpInst::Predicate *Pred,
                                              ScalarEvolution *SE) {
     return getSignedOverflowLimitForStep(Step, Pred, SE);
   }
 };
 
 const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
     SCEVSignExtendExpr>::GetExtendExpr = &ScalarEvolution::getSignExtendExpr;
 
 template <>
 struct ExtendOpTraits<SCEVZeroExtendExpr> : public ExtendOpTraitsBase {
   static const SCEV::NoWrapFlags WrapType = SCEV::FlagNUW;
 
   static const GetExtendExprTy GetExtendExpr;
 
   static const SCEV *getOverflowLimitForStep(const SCEV *Step,
                                              ICmpInst::Predicate *Pred,
                                              ScalarEvolution *SE) {
     return getUnsignedOverflowLimitForStep(Step, Pred, SE);
   }
 };
 
 const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
     SCEVZeroExtendExpr>::GetExtendExpr = &ScalarEvolution::getZeroExtendExpr;
 
 } // end anonymous namespace
 
 // The recurrence AR has been shown to have no signed/unsigned wrap or something
 // close to it. Typically, if we can prove NSW/NUW for AR, then we can just as
 // easily prove NSW/NUW for its preincrement or postincrement sibling. This
 // allows normalizing a sign/zero extended AddRec as such: {sext/zext(Step +
 // Start),+,Step} => {(Step + sext/zext(Start),+,Step} As a result, the
 // expression "Step + sext/zext(PreIncAR)" is congruent with
 // "sext/zext(PostIncAR)"
 template <typename ExtendOpTy>
 static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty,
                                         ScalarEvolution *SE, unsigned Depth) {
   auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType;
   auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;
 
   const Loop *L = AR->getLoop();
   const SCEV *Start = AR->getStart();
   const SCEV *Step = AR->getStepRecurrence(*SE);
 
   // Check for a simple looking step prior to loop entry.
   const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start);
   if (!SA)
     return nullptr;
 
   // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV
   // subtraction is expensive. For this purpose, perform a quick and dirty
   // difference, by checking for Step in the operand list.
   SmallVector<const SCEV *, 4> DiffOps;
   for (const SCEV *Op : SA->operands())
     if (Op != Step)
       DiffOps.push_back(Op);
 
   if (DiffOps.size() == SA->getNumOperands())
     return nullptr;
 
   // Try to prove `WrapType` (SCEV::FlagNSW or SCEV::FlagNUW) on `PreStart` +
   // `Step`:
 
   // 1. NSW/NUW flags on the step increment.
   auto PreStartFlags =
     ScalarEvolution::maskFlags(SA->getNoWrapFlags(), SCEV::FlagNUW);
   const SCEV *PreStart = SE->getAddExpr(DiffOps, PreStartFlags);
   const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>(
       SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap));
 
   // "{S,+,X} is <nsw>/<nuw>" and "the backedge is taken at least once" implies
   // "S+X does not sign/unsign-overflow".
   //
 
   const SCEV *BECount = SE->getBackedgeTakenCount(L);
   if (PreAR && PreAR->getNoWrapFlags(WrapType) &&
       !isa<SCEVCouldNotCompute>(BECount) && SE->isKnownPositive(BECount))
     return PreStart;
 
   // 2. Direct overflow check on the step operation's expression.
   unsigned BitWidth = SE->getTypeSizeInBits(AR->getType());
   Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2);
   const SCEV *OperandExtendedStart =
       SE->getAddExpr((SE->*GetExtendExpr)(PreStart, WideTy, Depth),
                      (SE->*GetExtendExpr)(Step, WideTy, Depth));
   if ((SE->*GetExtendExpr)(Start, WideTy, Depth) == OperandExtendedStart) {
     if (PreAR && AR->getNoWrapFlags(WrapType)) {
       // If we know `AR` == {`PreStart`+`Step`,+,`Step`} is `WrapType` (FlagNSW
       // or FlagNUW) and that `PreStart` + `Step` is `WrapType` too, then
       // `PreAR` == {`PreStart`,+,`Step`} is also `WrapType`.  Cache this fact.
       const_cast<SCEVAddRecExpr *>(PreAR)->setNoWrapFlags(WrapType);
     }
     return PreStart;
   }
 
   // 3. Loop precondition.
   ICmpInst::Predicate Pred;
   const SCEV *OverflowLimit =
       ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(Step, &Pred, SE);
 
   if (OverflowLimit &&
       SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit))
     return PreStart;
 
   return nullptr;
 }
 
 // Get the normalized zero or sign extended expression for this AddRec's Start.
 template <typename ExtendOpTy>
 static const SCEV *getExtendAddRecStart(const SCEVAddRecExpr *AR, Type *Ty,
                                         ScalarEvolution *SE,
                                         unsigned Depth) {
   auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;
 
   const SCEV *PreStart = getPreStartForExtend<ExtendOpTy>(AR, Ty, SE, Depth);
   if (!PreStart)
     return (SE->*GetExtendExpr)(AR->getStart(), Ty, Depth);
 
   return SE->getAddExpr((SE->*GetExtendExpr)(AR->getStepRecurrence(*SE), Ty,
                                              Depth),
                         (SE->*GetExtendExpr)(PreStart, Ty, Depth));
 }
 
 // Try to prove away overflow by looking at "nearby" add recurrences.  A
 // motivating example for this rule: if we know `{0,+,4}` is `ult` `-1` and it
 // does not itself wrap then we can conclude that `{1,+,4}` is `nuw`.
 //
 // Formally:
 //
 //     {S,+,X} == {S-T,+,X} + T
 //  => Ext({S,+,X}) == Ext({S-T,+,X} + T)
 //
 // If ({S-T,+,X} + T) does not overflow  ... (1)
 //
 //  RHS == Ext({S-T,+,X} + T) == Ext({S-T,+,X}) + Ext(T)
 //
 // If {S-T,+,X} does not overflow  ... (2)
 //
 //  RHS == Ext({S-T,+,X}) + Ext(T) == {Ext(S-T),+,Ext(X)} + Ext(T)
 //      == {Ext(S-T)+Ext(T),+,Ext(X)}
 //
 // If (S-T)+T does not overflow  ... (3)
 //
 //  RHS == {Ext(S-T)+Ext(T),+,Ext(X)} == {Ext(S-T+T),+,Ext(X)}
 //      == {Ext(S),+,Ext(X)} == LHS
 //
 // Thus, if (1), (2) and (3) are true for some T, then
 //   Ext({S,+,X}) == {Ext(S),+,Ext(X)}
 //
 // (3) is implied by (1) -- "(S-T)+T does not overflow" is simply "({S-T,+,X}+T)
 // does not overflow" restricted to the 0th iteration.  Therefore we only need
 // to check for (1) and (2).
 //
 // In the current context, S is `Start`, X is `Step`, Ext is `ExtendOpTy` and T
 // is `Delta` (defined below).
 template <typename ExtendOpTy>
 bool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start,
                                                 const SCEV *Step,
                                                 const Loop *L) {
   auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType;
 
   // We restrict `Start` to a constant to prevent SCEV from spending too much
   // time here.  It is correct (but more expensive) to continue with a
   // non-constant `Start` and do a general SCEV subtraction to compute
   // `PreStart` below.
   const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start);
   if (!StartC)
     return false;
 
   APInt StartAI = StartC->getAPInt();
 
   for (unsigned Delta : {-2, -1, 1, 2}) {
     const SCEV *PreStart = getConstant(StartAI - Delta);
 
     FoldingSetNodeID ID;
     ID.AddInteger(scAddRecExpr);
     ID.AddPointer(PreStart);
     ID.AddPointer(Step);
     ID.AddPointer(L);
     void *IP = nullptr;
     const auto *PreAR =
       static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
 
     // Give up if we don't already have the add recurrence we need because
     // actually constructing an add recurrence is relatively expensive.
     if (PreAR && PreAR->getNoWrapFlags(WrapType)) {  // proves (2)
       const SCEV *DeltaS = getConstant(StartC->getType(), Delta);
       ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
       const SCEV *Limit = ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(
           DeltaS, &Pred, this);
       if (Limit && isKnownPredicate(Pred, PreAR, Limit))  // proves (1)
         return true;
     }
   }
 
   return false;
 }
 
 const SCEV *
 ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
   assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
          "This is not an extending conversion!");
   assert(isSCEVable(Ty) &&
          "This is not a conversion to a SCEVable type!");
   Ty = getEffectiveSCEVType(Ty);
 
   // Fold if the operand is constant.
   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
     return getConstant(
       cast<ConstantInt>(ConstantExpr::getZExt(SC->getValue(), Ty)));
 
   // zext(zext(x)) --> zext(x)
   if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
     return getZeroExtendExpr(SZ->getOperand(), Ty, Depth + 1);
 
   // Before doing any expensive analysis, check to see if we've already
   // computed a SCEV for this Op and Ty.
   FoldingSetNodeID ID;
   ID.AddInteger(scZeroExtend);
   ID.AddPointer(Op);
   ID.AddPointer(Ty);
   void *IP = nullptr;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
   if (Depth > MaxExtDepth) {
     SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator),
                                                      Op, Ty);
     UniqueSCEVs.InsertNode(S, IP);
     addToLoopUseLists(S);
     return S;
   }
 
   // zext(trunc(x)) --> zext(x) or x or trunc(x)
   if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
     // It's possible the bits taken off by the truncate were all zero bits. If
     // so, we should be able to simplify this further.
     const SCEV *X = ST->getOperand();
     ConstantRange CR = getUnsignedRange(X);
     unsigned TruncBits = getTypeSizeInBits(ST->getType());
     unsigned NewBits = getTypeSizeInBits(Ty);
     if (CR.truncate(TruncBits).zeroExtend(NewBits).contains(
             CR.zextOrTrunc(NewBits)))
       return getTruncateOrZeroExtend(X, Ty);
   }
 
   // If the input value is a chrec scev, and we can prove that the value
   // did not overflow the old, smaller, value, we can zero extend all of the
   // operands (often constants).  This allows analysis of something like
   // this:  for (unsigned char X = 0; X < 100; ++X) { int Y = X; }
   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
     if (AR->isAffine()) {
       const SCEV *Start = AR->getStart();
       const SCEV *Step = AR->getStepRecurrence(*this);
       unsigned BitWidth = getTypeSizeInBits(AR->getType());
       const Loop *L = AR->getLoop();
 
       if (!AR->hasNoUnsignedWrap()) {
         auto NewFlags = proveNoWrapViaConstantRanges(AR);
         const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(NewFlags);
       }
 
       // If we have special knowledge that this addrec won't overflow,
       // we don't need to do any further analysis.
       if (AR->hasNoUnsignedWrap())
         return getAddRecExpr(
             getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Depth + 1),
             getZeroExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
 
       // Check whether the backedge-taken count is SCEVCouldNotCompute.
       // Note that this serves two purposes: It filters out loops that are
       // simply not analyzable, and it covers the case where this code is
       // being called from within backedge-taken count analysis, such that
       // attempting to ask for the backedge-taken count would likely result
       // in infinite recursion. In the later case, the analysis code will
       // cope with a conservative value, and it will take care to purge
       // that value once it has finished.
       const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
       if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
         // Manually compute the final value for AR, checking for
         // overflow.
 
         // Check whether the backedge-taken count can be losslessly casted to
         // the addrec's type. The count is always unsigned.
         const SCEV *CastedMaxBECount =
           getTruncateOrZeroExtend(MaxBECount, Start->getType());
         const SCEV *RecastedMaxBECount =
           getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
         if (MaxBECount == RecastedMaxBECount) {
           Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
           // Check whether Start+Step*MaxBECount has no unsigned overflow.
           const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step,
                                         SCEV::FlagAnyWrap, Depth + 1);
           const SCEV *ZAdd = getZeroExtendExpr(getAddExpr(Start, ZMul,
                                                           SCEV::FlagAnyWrap,
                                                           Depth + 1),
                                                WideTy, Depth + 1);
           const SCEV *WideStart = getZeroExtendExpr(Start, WideTy, Depth + 1);
           const SCEV *WideMaxBECount =
             getZeroExtendExpr(CastedMaxBECount, WideTy, Depth + 1);
           const SCEV *OperandExtendedAdd =
             getAddExpr(WideStart,
                        getMulExpr(WideMaxBECount,
                                   getZeroExtendExpr(Step, WideTy, Depth + 1),
                                   SCEV::FlagAnyWrap, Depth + 1),
                        SCEV::FlagAnyWrap, Depth + 1);
           if (ZAdd == OperandExtendedAdd) {
             // Cache knowledge of AR NUW, which is propagated to this AddRec.
             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
             // Return the expression with the addrec on the outside.
             return getAddRecExpr(
                 getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
                                                          Depth + 1),
                 getZeroExtendExpr(Step, Ty, Depth + 1), L,
                 AR->getNoWrapFlags());
           }
           // Similar to above, only this time treat the step value as signed.
           // This covers loops that count down.
           OperandExtendedAdd =
             getAddExpr(WideStart,
                        getMulExpr(WideMaxBECount,
                                   getSignExtendExpr(Step, WideTy, Depth + 1),
                                   SCEV::FlagAnyWrap, Depth + 1),
                        SCEV::FlagAnyWrap, Depth + 1);
           if (ZAdd == OperandExtendedAdd) {
             // Cache knowledge of AR NW, which is propagated to this AddRec.
             // Negative step causes unsigned wrap, but it still can't self-wrap.
             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
             // Return the expression with the addrec on the outside.
             return getAddRecExpr(
                 getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
                                                          Depth + 1),
                 getSignExtendExpr(Step, Ty, Depth + 1), L,
                 AR->getNoWrapFlags());
           }
         }
       }
 
       // Normally, in the cases we can prove no-overflow via a
       // backedge guarding condition, we can also compute a backedge
       // taken count for the loop.  The exceptions are assumptions and
       // guards present in the loop -- SCEV is not great at exploiting
       // these to compute max backedge taken counts, but can still use
       // these to prove lack of overflow.  Use this fact to avoid
       // doing extra work that may not pay off.
       if (!isa<SCEVCouldNotCompute>(MaxBECount) || HasGuards ||
           !AC.assumptions().empty()) {
         // If the backedge is guarded by a comparison with the pre-inc
         // value the addrec is safe. Also, if the entry is guarded by
         // a comparison with the start value and the backedge is
         // guarded by a comparison with the post-inc value, the addrec
         // is safe.
         if (isKnownPositive(Step)) {
           const SCEV *N = getConstant(APInt::getMinValue(BitWidth) -
                                       getUnsignedRangeMax(Step));
           if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) ||
               (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_ULT, Start, N) &&
                isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT,
                                            AR->getPostIncExpr(*this), N))) {
             // Cache knowledge of AR NUW, which is propagated to this
             // AddRec.
             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
             // Return the expression with the addrec on the outside.
             return getAddRecExpr(
                 getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
                                                          Depth + 1),
                 getZeroExtendExpr(Step, Ty, Depth + 1), L,
                 AR->getNoWrapFlags());
           }
         } else if (isKnownNegative(Step)) {
           const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) -
                                       getSignedRangeMin(Step));
           if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) ||
               (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_UGT, Start, N) &&
                isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT,
                                            AR->getPostIncExpr(*this), N))) {
             // Cache knowledge of AR NW, which is propagated to this
             // AddRec.  Negative step causes unsigned wrap, but it
             // still can't self-wrap.
             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
             // Return the expression with the addrec on the outside.
             return getAddRecExpr(
                 getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
                                                          Depth + 1),
                 getSignExtendExpr(Step, Ty, Depth + 1), L,
                 AR->getNoWrapFlags());
           }
         }
       }
 
       if (proveNoWrapByVaryingStart<SCEVZeroExtendExpr>(Start, Step, L)) {
         const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
         return getAddRecExpr(
             getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Depth + 1),
             getZeroExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
       }
     }
 
   if (auto *SA = dyn_cast<SCEVAddExpr>(Op)) {
     // zext((A + B + ...)<nuw>) --> (zext(A) + zext(B) + ...)<nuw>
     if (SA->hasNoUnsignedWrap()) {
       // If the addition does not unsign overflow then we can, by definition,
       // commute the zero extension with the addition operation.
       SmallVector<const SCEV *, 4> Ops;
       for (const auto *Op : SA->operands())
         Ops.push_back(getZeroExtendExpr(Op, Ty, Depth + 1));
       return getAddExpr(Ops, SCEV::FlagNUW, Depth + 1);
     }
   }
 
   // The cast wasn't folded; create an explicit cast node.
   // Recompute the insert position, as it may have been invalidated.
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
   SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator),
                                                    Op, Ty);
   UniqueSCEVs.InsertNode(S, IP);
   addToLoopUseLists(S);
   return S;
 }
 
 const SCEV *
 ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
   assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
          "This is not an extending conversion!");
   assert(isSCEVable(Ty) &&
          "This is not a conversion to a SCEVable type!");
   Ty = getEffectiveSCEVType(Ty);
 
   // Fold if the operand is constant.
   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
     return getConstant(
       cast<ConstantInt>(ConstantExpr::getSExt(SC->getValue(), Ty)));
 
   // sext(sext(x)) --> sext(x)
   if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
     return getSignExtendExpr(SS->getOperand(), Ty, Depth + 1);
 
   // sext(zext(x)) --> zext(x)
   if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
     return getZeroExtendExpr(SZ->getOperand(), Ty, Depth + 1);
 
   // Before doing any expensive analysis, check to see if we've already
   // computed a SCEV for this Op and Ty.
   FoldingSetNodeID ID;
   ID.AddInteger(scSignExtend);
   ID.AddPointer(Op);
   ID.AddPointer(Ty);
   void *IP = nullptr;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
   // Limit recursion depth.
   if (Depth > MaxExtDepth) {
     SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator),
                                                      Op, Ty);
     UniqueSCEVs.InsertNode(S, IP);
     addToLoopUseLists(S);
     return S;
   }
 
   // sext(trunc(x)) --> sext(x) or x or trunc(x)
   if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
     // It's possible the bits taken off by the truncate were all sign bits. If
     // so, we should be able to simplify this further.
     const SCEV *X = ST->getOperand();
     ConstantRange CR = getSignedRange(X);
     unsigned TruncBits = getTypeSizeInBits(ST->getType());
     unsigned NewBits = getTypeSizeInBits(Ty);
     if (CR.truncate(TruncBits).signExtend(NewBits).contains(
             CR.sextOrTrunc(NewBits)))
       return getTruncateOrSignExtend(X, Ty);
   }
 
   // sext(C1 + (C2 * x)) --> C1 + sext(C2 * x) if C1 < C2
   if (auto *SA = dyn_cast<SCEVAddExpr>(Op)) {
     if (SA->getNumOperands() == 2) {
       auto *SC1 = dyn_cast<SCEVConstant>(SA->getOperand(0));
       auto *SMul = dyn_cast<SCEVMulExpr>(SA->getOperand(1));
       if (SMul && SC1) {
         if (auto *SC2 = dyn_cast<SCEVConstant>(SMul->getOperand(0))) {
           const APInt &C1 = SC1->getAPInt();
           const APInt &C2 = SC2->getAPInt();
           if (C1.isStrictlyPositive() && C2.isStrictlyPositive() &&
               C2.ugt(C1) && C2.isPowerOf2())
             return getAddExpr(getSignExtendExpr(SC1, Ty, Depth + 1),
                               getSignExtendExpr(SMul, Ty, Depth + 1),
                               SCEV::FlagAnyWrap, Depth + 1);
         }
       }
     }
 
     // sext((A + B + ...)<nsw>) --> (sext(A) + sext(B) + ...)<nsw>
     if (SA->hasNoSignedWrap()) {
       // If the addition does not sign overflow then we can, by definition,
       // commute the sign extension with the addition operation.
       SmallVector<const SCEV *, 4> Ops;
       for (const auto *Op : SA->operands())
         Ops.push_back(getSignExtendExpr(Op, Ty, Depth + 1));
       return getAddExpr(Ops, SCEV::FlagNSW, Depth + 1);
     }
   }
   // If the input value is a chrec scev, and we can prove that the value
   // did not overflow the old, smaller, value, we can sign extend all of the
   // operands (often constants).  This allows analysis of something like
   // this:  for (signed char X = 0; X < 100; ++X) { int Y = X; }
   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
     if (AR->isAffine()) {
       const SCEV *Start = AR->getStart();
       const SCEV *Step = AR->getStepRecurrence(*this);
       unsigned BitWidth = getTypeSizeInBits(AR->getType());
       const Loop *L = AR->getLoop();
 
       if (!AR->hasNoSignedWrap()) {
         auto NewFlags = proveNoWrapViaConstantRanges(AR);
         const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(NewFlags);
       }
 
       // If we have special knowledge that this addrec won't overflow,
       // we don't need to do any further analysis.
       if (AR->hasNoSignedWrap())
         return getAddRecExpr(
             getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1),
             getSignExtendExpr(Step, Ty, Depth + 1), L, SCEV::FlagNSW);
 
       // Check whether the backedge-taken count is SCEVCouldNotCompute.
       // Note that this serves two purposes: It filters out loops that are
       // simply not analyzable, and it covers the case where this code is
       // being called from within backedge-taken count analysis, such that
       // attempting to ask for the backedge-taken count would likely result
       // in infinite recursion. In the later case, the analysis code will
       // cope with a conservative value, and it will take care to purge
       // that value once it has finished.
       const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
       if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
         // Manually compute the final value for AR, checking for
         // overflow.
 
         // Check whether the backedge-taken count can be losslessly casted to
         // the addrec's type. The count is always unsigned.
         const SCEV *CastedMaxBECount =
           getTruncateOrZeroExtend(MaxBECount, Start->getType());
         const SCEV *RecastedMaxBECount =
           getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
         if (MaxBECount == RecastedMaxBECount) {
           Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
           // Check whether Start+Step*MaxBECount has no signed overflow.
           const SCEV *SMul = getMulExpr(CastedMaxBECount, Step,
                                         SCEV::FlagAnyWrap, Depth + 1);
           const SCEV *SAdd = getSignExtendExpr(getAddExpr(Start, SMul,
                                                           SCEV::FlagAnyWrap,
                                                           Depth + 1),
                                                WideTy, Depth + 1);
           const SCEV *WideStart = getSignExtendExpr(Start, WideTy, Depth + 1);
           const SCEV *WideMaxBECount =
             getZeroExtendExpr(CastedMaxBECount, WideTy, Depth + 1);
           const SCEV *OperandExtendedAdd =
             getAddExpr(WideStart,
                        getMulExpr(WideMaxBECount,
                                   getSignExtendExpr(Step, WideTy, Depth + 1),
                                   SCEV::FlagAnyWrap, Depth + 1),
                        SCEV::FlagAnyWrap, Depth + 1);
           if (SAdd == OperandExtendedAdd) {
             // Cache knowledge of AR NSW, which is propagated to this AddRec.
             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
             // Return the expression with the addrec on the outside.
             return getAddRecExpr(
                 getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this,
                                                          Depth + 1),
                 getSignExtendExpr(Step, Ty, Depth + 1), L,
                 AR->getNoWrapFlags());
           }
           // Similar to above, only this time treat the step value as unsigned.
           // This covers loops that count up with an unsigned step.
           OperandExtendedAdd =
             getAddExpr(WideStart,
                        getMulExpr(WideMaxBECount,
                                   getZeroExtendExpr(Step, WideTy, Depth + 1),
                                   SCEV::FlagAnyWrap, Depth + 1),
                        SCEV::FlagAnyWrap, Depth + 1);
           if (SAdd == OperandExtendedAdd) {
             // If AR wraps around then
             //
             //    abs(Step) * MaxBECount > unsigned-max(AR->getType())
             // => SAdd != OperandExtendedAdd
             //
             // Thus (AR is not NW => SAdd != OperandExtendedAdd) <=>
             // (SAdd == OperandExtendedAdd => AR is NW)
 
             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
 
             // Return the expression with the addrec on the outside.
             return getAddRecExpr(
                 getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this,
                                                          Depth + 1),
                 getZeroExtendExpr(Step, Ty, Depth + 1), L,
                 AR->getNoWrapFlags());
           }
         }
       }
 
       // Normally, in the cases we can prove no-overflow via a
       // backedge guarding condition, we can also compute a backedge
       // taken count for the loop.  The exceptions are assumptions and
       // guards present in the loop -- SCEV is not great at exploiting
       // these to compute max backedge taken counts, but can still use
       // these to prove lack of overflow.  Use this fact to avoid
       // doing extra work that may not pay off.
 
       if (!isa<SCEVCouldNotCompute>(MaxBECount) || HasGuards ||
           !AC.assumptions().empty()) {
         // If the backedge is guarded by a comparison with the pre-inc
         // value the addrec is safe. Also, if the entry is guarded by
         // a comparison with the start value and the backedge is
         // guarded by a comparison with the post-inc value, the addrec
         // is safe.
         ICmpInst::Predicate Pred;
         const SCEV *OverflowLimit =
             getSignedOverflowLimitForStep(Step, &Pred, this);
         if (OverflowLimit &&
             (isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) ||
              (isLoopEntryGuardedByCond(L, Pred, Start, OverflowLimit) &&
               isLoopBackedgeGuardedByCond(L, Pred, AR->getPostIncExpr(*this),
                                           OverflowLimit)))) {
           // Cache knowledge of AR NSW, then propagate NSW to the wide AddRec.
           const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
           return getAddRecExpr(
               getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1),
               getSignExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
         }
       }
 
       // If Start and Step are constants, check if we can apply this
       // transformation:
       // sext{C1,+,C2} --> C1 + sext{0,+,C2} if C1 < C2
       auto *SC1 = dyn_cast<SCEVConstant>(Start);
       auto *SC2 = dyn_cast<SCEVConstant>(Step);
       if (SC1 && SC2) {
         const APInt &C1 = SC1->getAPInt();
         const APInt &C2 = SC2->getAPInt();
         if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && C2.ugt(C1) &&
             C2.isPowerOf2()) {
           Start = getSignExtendExpr(Start, Ty, Depth + 1);
           const SCEV *NewAR = getAddRecExpr(getZero(AR->getType()), Step, L,
                                             AR->getNoWrapFlags());
           return getAddExpr(Start, getSignExtendExpr(NewAR, Ty, Depth + 1),
                             SCEV::FlagAnyWrap, Depth + 1);
         }
       }
 
       if (proveNoWrapByVaryingStart<SCEVSignExtendExpr>(Start, Step, L)) {
         const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
         return getAddRecExpr(
             getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1),
             getSignExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
       }
     }
 
   // If the input value is provably positive and we could not simplify
   // away the sext build a zext instead.
   if (isKnownNonNegative(Op))
     return getZeroExtendExpr(Op, Ty, Depth + 1);
 
   // The cast wasn't folded; create an explicit cast node.
   // Recompute the insert position, as it may have been invalidated.
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
   SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator),
                                                    Op, Ty);
   UniqueSCEVs.InsertNode(S, IP);
   addToLoopUseLists(S);
   return S;
 }
 
 /// getAnyExtendExpr - Return a SCEV for the given operand extended with
 /// unspecified bits out to the given type.
 const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op,
                                               Type *Ty) {
   assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
          "This is not an extending conversion!");
   assert(isSCEVable(Ty) &&
          "This is not a conversion to a SCEVable type!");
   Ty = getEffectiveSCEVType(Ty);
 
   // Sign-extend negative constants.
   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
     if (SC->getAPInt().isNegative())
       return getSignExtendExpr(Op, Ty);
 
   // Peel off a truncate cast.
   if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Op)) {
     const SCEV *NewOp = T->getOperand();
     if (getTypeSizeInBits(NewOp->getType()) < getTypeSizeInBits(Ty))
       return getAnyExtendExpr(NewOp, Ty);
     return getTruncateOrNoop(NewOp, Ty);
   }
 
   // Next try a zext cast. If the cast is folded, use it.
   const SCEV *ZExt = getZeroExtendExpr(Op, Ty);
   if (!isa<SCEVZeroExtendExpr>(ZExt))
     return ZExt;
 
   // Next try a sext cast. If the cast is folded, use it.
   const SCEV *SExt = getSignExtendExpr(Op, Ty);
   if (!isa<SCEVSignExtendExpr>(SExt))
     return SExt;
 
   // Force the cast to be folded into the operands of an addrec.
   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) {
     SmallVector<const SCEV *, 4> Ops;
     for (const SCEV *Op : AR->operands())
       Ops.push_back(getAnyExtendExpr(Op, Ty));
     return getAddRecExpr(Ops, AR->getLoop(), SCEV::FlagNW);
   }
 
   // If the expression is obviously signed, use the sext cast value.
   if (isa<SCEVSMaxExpr>(Op))
     return SExt;
 
   // Absent any other information, use the zext cast value.
   return ZExt;
 }
 
 /// Process the given Ops list, which is a list of operands to be added under
 /// the given scale, update the given map. This is a helper function for
 /// getAddRecExpr. As an example of what it does, given a sequence of operands
 /// that would form an add expression like this:
 ///
 ///    m + n + 13 + (A * (o + p + (B * (q + m + 29)))) + r + (-1 * r)
 ///
 /// where A and B are constants, update the map with these values:
 ///
 ///    (m, 1+A*B), (n, 1), (o, A), (p, A), (q, A*B), (r, 0)
 ///
 /// and add 13 + A*B*29 to AccumulatedConstant.
 /// This will allow getAddRecExpr to produce this:
 ///
 ///    13+A*B*29 + n + (m * (1+A*B)) + ((o + p) * A) + (q * A*B)
 ///
 /// This form often exposes folding opportunities that are hidden in
 /// the original operand list.
 ///
 /// Return true iff it appears that any interesting folding opportunities
 /// may be exposed. This helps getAddRecExpr short-circuit extra work in
 /// the common case where no interesting opportunities are present, and
 /// is also used as a check to avoid infinite recursion.
 static bool
 CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
                              SmallVectorImpl<const SCEV *> &NewOps,
                              APInt &AccumulatedConstant,
                              const SCEV *const *Ops, size_t NumOperands,
                              const APInt &Scale,
                              ScalarEvolution &SE) {
   bool Interesting = false;
 
   // Iterate over the add operands. They are sorted, with constants first.
   unsigned i = 0;
   while (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
     ++i;
     // Pull a buried constant out to the outside.
     if (Scale != 1 || AccumulatedConstant != 0 || C->getValue()->isZero())
       Interesting = true;
     AccumulatedConstant += Scale * C->getAPInt();
   }
 
   // Next comes everything else. We're especially interested in multiplies
   // here, but they're in the middle, so just visit the rest with one loop.
   for (; i != NumOperands; ++i) {
     const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[i]);
     if (Mul && isa<SCEVConstant>(Mul->getOperand(0))) {
       APInt NewScale =
           Scale * cast<SCEVConstant>(Mul->getOperand(0))->getAPInt();
       if (Mul->getNumOperands() == 2 && isa<SCEVAddExpr>(Mul->getOperand(1))) {
         // A multiplication of a constant with another add; recurse.
         const SCEVAddExpr *Add = cast<SCEVAddExpr>(Mul->getOperand(1));
         Interesting |=
           CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
                                        Add->op_begin(), Add->getNumOperands(),
                                        NewScale, SE);
       } else {
         // A multiplication of a constant with some other value. Update
         // the map.
         SmallVector<const SCEV *, 4> MulOps(Mul->op_begin()+1, Mul->op_end());
         const SCEV *Key = SE.getMulExpr(MulOps);
         auto Pair = M.insert({Key, NewScale});
         if (Pair.second) {
           NewOps.push_back(Pair.first->first);
         } else {
           Pair.first->second += NewScale;
           // The map already had an entry for this value, which may indicate
           // a folding opportunity.
           Interesting = true;
         }
       }
     } else {
       // An ordinary operand. Update the map.
       std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair =
           M.insert({Ops[i], Scale});
       if (Pair.second) {
         NewOps.push_back(Pair.first->first);
       } else {
         Pair.first->second += Scale;
         // The map already had an entry for this value, which may indicate
         // a folding opportunity.
         Interesting = true;
       }
     }
   }
 
   return Interesting;
 }
 
 // We're trying to construct a SCEV of type `Type' with `Ops' as operands and
 // `OldFlags' as can't-wrap behavior.  Infer a more aggressive set of
 // can't-overflow flags for the operation if possible.
 static SCEV::NoWrapFlags
 StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type,
                       const SmallVectorImpl<const SCEV *> &Ops,
                       SCEV::NoWrapFlags Flags) {
   using namespace std::placeholders;
 
   using OBO = OverflowingBinaryOperator;
 
   bool CanAnalyze =
       Type == scAddExpr || Type == scAddRecExpr || Type == scMulExpr;
   (void)CanAnalyze;
   assert(CanAnalyze && "don't call from other places!");
 
   int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
   SCEV::NoWrapFlags SignOrUnsignWrap =
       ScalarEvolution::maskFlags(Flags, SignOrUnsignMask);
 
   // If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
   auto IsKnownNonNegative = [&](const SCEV *S) {
     return SE->isKnownNonNegative(S);
   };
 
   if (SignOrUnsignWrap == SCEV::FlagNSW && all_of(Ops, IsKnownNonNegative))
     Flags =
         ScalarEvolution::setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);
 
   SignOrUnsignWrap = ScalarEvolution::maskFlags(Flags, SignOrUnsignMask);
 
   if (SignOrUnsignWrap != SignOrUnsignMask && Type == scAddExpr &&
       Ops.size() == 2 && isa<SCEVConstant>(Ops[0])) {
 
     // (A + C) --> (A + C)<nsw> if the addition does not sign overflow
     // (A + C) --> (A + C)<nuw> if the addition does not unsign overflow
 
     const APInt &C = cast<SCEVConstant>(Ops[0])->getAPInt();
     if (!(SignOrUnsignWrap & SCEV::FlagNSW)) {
       auto NSWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
           Instruction::Add, C, OBO::NoSignedWrap);
       if (NSWRegion.contains(SE->getSignedRange(Ops[1])))
         Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW);
     }
     if (!(SignOrUnsignWrap & SCEV::FlagNUW)) {
       auto NUWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
           Instruction::Add, C, OBO::NoUnsignedWrap);
       if (NUWRegion.contains(SE->getUnsignedRange(Ops[1])))
         Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW);
     }
   }
 
   return Flags;
 }
 
 bool ScalarEvolution::isAvailableAtLoopEntry(const SCEV *S, const Loop *L) {
   if (!isLoopInvariant(S, L))
     return false;
   // If a value depends on a SCEVUnknown which is defined after the loop, we
   // conservatively assume that we cannot calculate it at the loop's entry.
   struct FindDominatedSCEVUnknown {
     bool Found = false;
     const Loop *L;
     DominatorTree &DT;
     LoopInfo &LI;
 
     FindDominatedSCEVUnknown(const Loop *L, DominatorTree &DT, LoopInfo &LI)
         : L(L), DT(DT), LI(LI) {}
 
     bool checkSCEVUnknown(const SCEVUnknown *SU) {
       if (auto *I = dyn_cast<Instruction>(SU->getValue())) {
         if (DT.dominates(L->getHeader(), I->getParent()))
           Found = true;
         else
           assert(DT.dominates(I->getParent(), L->getHeader()) &&
                  "No dominance relationship between SCEV and loop?");
       }
       return false;
     }
 
     bool follow(const SCEV *S) {
       switch (static_cast<SCEVTypes>(S->getSCEVType())) {
       case scConstant:
         return false;
       case scAddRecExpr:
       case scTruncate:
       case scZeroExtend:
       case scSignExtend:
       case scAddExpr:
       case scMulExpr:
       case scUMaxExpr:
       case scSMaxExpr:
       case scUDivExpr:
         return true;
       case scUnknown:
         return checkSCEVUnknown(cast<SCEVUnknown>(S));
       case scCouldNotCompute:
         llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
       }
       return false;
     }
 
     bool isDone() { return Found; }
   };
 
   FindDominatedSCEVUnknown FSU(L, DT, LI);
   SCEVTraversal<FindDominatedSCEVUnknown> ST(FSU);
   ST.visitAll(S);
   return !FSU.Found;
 }
 
 /// Get a canonical add expression, or something simpler if possible.
 const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
                                         SCEV::NoWrapFlags Flags,
                                         unsigned Depth) {
   assert(!(Flags & ~(SCEV::FlagNUW | SCEV::FlagNSW)) &&
          "only nuw or nsw allowed");
   assert(!Ops.empty() && "Cannot get empty add!");
   if (Ops.size() == 1) return Ops[0];
 #ifndef NDEBUG
   Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
   for (unsigned i = 1, e = Ops.size(); i != e; ++i)
     assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
            "SCEVAddExpr operand types don't match!");
 #endif
 
   // Sort by complexity, this groups all similar expression types together.
   GroupByComplexity(Ops, &LI, DT);
 
   Flags = StrengthenNoWrapFlags(this, scAddExpr, Ops, Flags);
 
   // If there are any constants, fold them together.
   unsigned Idx = 0;
   if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
     ++Idx;
     assert(Idx < Ops.size());
     while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
       // We found two constants, fold them together!
       Ops[0] = getConstant(LHSC->getAPInt() + RHSC->getAPInt());
       if (Ops.size() == 2) return Ops[0];
       Ops.erase(Ops.begin()+1);  // Erase the folded element
       LHSC = cast<SCEVConstant>(Ops[0]);
     }
 
     // If we are left with a constant zero being added, strip it off.
     if (LHSC->getValue()->isZero()) {
       Ops.erase(Ops.begin());
       --Idx;
     }
 
     if (Ops.size() == 1) return Ops[0];
   }
 
   // Limit recursion calls depth.
   if (Depth > MaxArithDepth)
     return getOrCreateAddExpr(Ops, Flags);
 
   // Okay, check to see if the same value occurs in the operand list more than
   // once.  If so, merge them together into an multiply expression.  Since we
   // sorted the list, these values are required to be adjacent.
   Type *Ty = Ops[0]->getType();
   bool FoundMatch = false;
   for (unsigned i = 0, e = Ops.size(); i != e-1; ++i)
     if (Ops[i] == Ops[i+1]) {      //  X + Y + Y  -->  X + Y*2
       // Scan ahead to count how many equal operands there are.
       unsigned Count = 2;
       while (i+Count != e && Ops[i+Count] == Ops[i])
         ++Count;
       // Merge the values into a multiply.
       const SCEV *Scale = getConstant(Ty, Count);
       const SCEV *Mul = getMulExpr(Scale, Ops[i], SCEV::FlagAnyWrap, Depth + 1);
       if (Ops.size() == Count)
         return Mul;
       Ops[i] = Mul;
       Ops.erase(Ops.begin()+i+1, Ops.begin()+i+Count);
       --i; e -= Count - 1;
       FoundMatch = true;
     }
   if (FoundMatch)
     return getAddExpr(Ops, Flags, Depth + 1);
 
   // Check for truncates. If all the operands are truncated from the same
   // type, see if factoring out the truncate would permit the result to be
   // folded. eg., n*trunc(x) + m*trunc(y) --> trunc(trunc(m)*x + trunc(n)*y)
   // if the contents of the resulting outer trunc fold to something simple.
   auto FindTruncSrcType = [&]() -> Type * {
     // We're ultimately looking to fold an addrec of truncs and muls of only
     // constants and truncs, so if we find any other types of SCEV
     // as operands of the addrec then we bail and return nullptr here.
     // Otherwise, we return the type of the operand of a trunc that we find.
     if (auto *T = dyn_cast<SCEVTruncateExpr>(Ops[Idx]))
       return T->getOperand()->getType();
     if (const auto *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) {
       const auto *LastOp = Mul->getOperand(Mul->getNumOperands() - 1);
       if (const auto *T = dyn_cast<SCEVTruncateExpr>(LastOp))
         return T->getOperand()->getType();
     }
     return nullptr;
   };
   if (auto *SrcType = FindTruncSrcType()) {
     SmallVector<const SCEV *, 8> LargeOps;
     bool Ok = true;
     // Check all the operands to see if they can be represented in the
     // source type of the truncate.
     for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
       if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Ops[i])) {
         if (T->getOperand()->getType() != SrcType) {
           Ok = false;
           break;
         }
         LargeOps.push_back(T->getOperand());
       } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
         LargeOps.push_back(getAnyExtendExpr(C, SrcType));
       } else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Ops[i])) {
         SmallVector<const SCEV *, 8> LargeMulOps;
         for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) {
           if (const SCEVTruncateExpr *T =
                 dyn_cast<SCEVTruncateExpr>(M->getOperand(j))) {
             if (T->getOperand()->getType() != SrcType) {
               Ok = false;
               break;
             }
             LargeMulOps.push_back(T->getOperand());
           } else if (const auto *C = dyn_cast<SCEVConstant>(M->getOperand(j))) {
             LargeMulOps.push_back(getAnyExtendExpr(C, SrcType));
           } else {
             Ok = false;
             break;
           }
         }
         if (Ok)
           LargeOps.push_back(getMulExpr(LargeMulOps, SCEV::FlagAnyWrap, Depth + 1));
       } else {
         Ok = false;
         break;
       }
     }
     if (Ok) {
       // Evaluate the expression in the larger type.
       const SCEV *Fold = getAddExpr(LargeOps, Flags, Depth + 1);
       // If it folds to something simple, use it. Otherwise, don't.
       if (isa<SCEVConstant>(Fold) || isa<SCEVUnknown>(Fold))
         return getTruncateExpr(Fold, Ty);
     }
   }
 
   // Skip past any other cast SCEVs.
   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddExpr)
     ++Idx;
 
   // If there are add operands they would be next.
   if (Idx < Ops.size()) {
     bool DeletedAdd = false;
     while (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[Idx])) {
       if (Ops.size() > AddOpsInlineThreshold ||
           Add->getNumOperands() > AddOpsInlineThreshold)
         break;
       // If we have an add, expand the add operands onto the end of the operands
       // list.
       Ops.erase(Ops.begin()+Idx);
       Ops.append(Add->op_begin(), Add->op_end());
       DeletedAdd = true;
     }
 
     // If we deleted at least one add, we added operands to the end of the list,
     // and they are not necessarily sorted.  Recurse to resort and resimplify
     // any operands we just acquired.
     if (DeletedAdd)
       return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
   }
 
   // Skip over the add expression until we get to a multiply.
   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
     ++Idx;
 
   // Check to see if there are any folding opportunities present with
   // operands multiplied by constant values.
   if (Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx])) {
     uint64_t BitWidth = getTypeSizeInBits(Ty);
     DenseMap<const SCEV *, APInt> M;
     SmallVector<const SCEV *, 8> NewOps;
     APInt AccumulatedConstant(BitWidth, 0);
     if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
                                      Ops.data(), Ops.size(),
                                      APInt(BitWidth, 1), *this)) {
       struct APIntCompare {
         bool operator()(const APInt &LHS, const APInt &RHS) const {
           return LHS.ult(RHS);
         }
       };
 
       // Some interesting folding opportunity is present, so its worthwhile to
       // re-generate the operands list. Group the operands by constant scale,
       // to avoid multiplying by the same constant scale multiple times.
       std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists;
       for (const SCEV *NewOp : NewOps)
         MulOpLists[M.find(NewOp)->second].push_back(NewOp);
       // Re-generate the operands list.
       Ops.clear();
       if (AccumulatedConstant != 0)
         Ops.push_back(getConstant(AccumulatedConstant));
       for (auto &MulOp : MulOpLists)
         if (MulOp.first != 0)
           Ops.push_back(getMulExpr(
               getConstant(MulOp.first),
               getAddExpr(MulOp.second, SCEV::FlagAnyWrap, Depth + 1),
               SCEV::FlagAnyWrap, Depth + 1));
       if (Ops.empty())
         return getZero(Ty);
       if (Ops.size() == 1)
         return Ops[0];
       return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
     }
   }
 
   // If we are adding something to a multiply expression, make sure the
   // something is not already an operand of the multiply.  If so, merge it into
   // the multiply.
   for (; Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx]); ++Idx) {
     const SCEVMulExpr *Mul = cast<SCEVMulExpr>(Ops[Idx]);
     for (unsigned MulOp = 0, e = Mul->getNumOperands(); MulOp != e; ++MulOp) {
       const SCEV *MulOpSCEV = Mul->getOperand(MulOp);
       if (isa<SCEVConstant>(MulOpSCEV))
         continue;
       for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp)
         if (MulOpSCEV == Ops[AddOp]) {
           // Fold W + X + (X * Y * Z)  -->  W + (X * ((Y*Z)+1))
           const SCEV *InnerMul = Mul->getOperand(MulOp == 0);
           if (Mul->getNumOperands() != 2) {
             // If the multiply has more than two operands, we must get the
             // Y*Z term.
             SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
                                                 Mul->op_begin()+MulOp);
             MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
             InnerMul = getMulExpr(MulOps, SCEV::FlagAnyWrap, Depth + 1);
           }
           SmallVector<const SCEV *, 2> TwoOps = {getOne(Ty), InnerMul};
           const SCEV *AddOne = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1);
           const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV,
                                             SCEV::FlagAnyWrap, Depth + 1);
           if (Ops.size() == 2) return OuterMul;
           if (AddOp < Idx) {
             Ops.erase(Ops.begin()+AddOp);
             Ops.erase(Ops.begin()+Idx-1);
           } else {
             Ops.erase(Ops.begin()+Idx);
             Ops.erase(Ops.begin()+AddOp-1);
           }
           Ops.push_back(OuterMul);
           return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
         }
 
       // Check this multiply against other multiplies being added together.
       for (unsigned OtherMulIdx = Idx+1;
            OtherMulIdx < Ops.size() && isa<SCEVMulExpr>(Ops[OtherMulIdx]);
            ++OtherMulIdx) {
         const SCEVMulExpr *OtherMul = cast<SCEVMulExpr>(Ops[OtherMulIdx]);
         // If MulOp occurs in OtherMul, we can fold the two multiplies
         // together.
         for (unsigned OMulOp = 0, e = OtherMul->getNumOperands();
              OMulOp != e; ++OMulOp)
           if (OtherMul->getOperand(OMulOp) == MulOpSCEV) {
             // Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E))
             const SCEV *InnerMul1 = Mul->getOperand(MulOp == 0);
             if (Mul->getNumOperands() != 2) {
               SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
                                                   Mul->op_begin()+MulOp);
               MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
               InnerMul1 = getMulExpr(MulOps, SCEV::FlagAnyWrap, Depth + 1);
             }
             const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0);
             if (OtherMul->getNumOperands() != 2) {
               SmallVector<const SCEV *, 4> MulOps(OtherMul->op_begin(),
                                                   OtherMul->op_begin()+OMulOp);
               MulOps.append(OtherMul->op_begin()+OMulOp+1, OtherMul->op_end());
               InnerMul2 = getMulExpr(MulOps, SCEV::FlagAnyWrap, Depth + 1);
             }
             SmallVector<const SCEV *, 2> TwoOps = {InnerMul1, InnerMul2};
             const SCEV *InnerMulSum =
                 getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1);
             const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum,
                                               SCEV::FlagAnyWrap, Depth + 1);
             if (Ops.size() == 2) return OuterMul;
             Ops.erase(Ops.begin()+Idx);
             Ops.erase(Ops.begin()+OtherMulIdx-1);
             Ops.push_back(OuterMul);
             return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
           }
       }
     }
   }
 
   // If there are any add recurrences in the operands list, see if any other
   // added values are loop invariant.  If so, we can fold them into the
   // recurrence.
   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr)
     ++Idx;
 
   // Scan over all recurrences, trying to fold loop invariants into them.
   for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
     // Scan all of the other operands to this add and add them to the vector if
     // they are loop invariant w.r.t. the recurrence.
     SmallVector<const SCEV *, 8> LIOps;
     const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
     const Loop *AddRecLoop = AddRec->getLoop();
     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
       if (isAvailableAtLoopEntry(Ops[i], AddRecLoop)) {
         LIOps.push_back(Ops[i]);
         Ops.erase(Ops.begin()+i);
         --i; --e;
       }
 
     // If we found some loop invariants, fold them into the recurrence.
     if (!LIOps.empty()) {
       //  NLI + LI + {Start,+,Step}  -->  NLI + {LI+Start,+,Step}
       LIOps.push_back(AddRec->getStart());
 
       SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
                                              AddRec->op_end());
       // This follows from the fact that the no-wrap flags on the outer add
       // expression are applicable on the 0th iteration, when the add recurrence
       // will be equal to its start value.
       AddRecOps[0] = getAddExpr(LIOps, Flags, Depth + 1);
 
       // Build the new addrec. Propagate the NUW and NSW flags if both the
       // outer add and the inner addrec are guaranteed to have no overflow.
       // Always propagate NW.
       Flags = AddRec->getNoWrapFlags(setFlags(Flags, SCEV::FlagNW));
       const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop, Flags);
 
       // If all of the other operands were loop invariant, we are done.
       if (Ops.size() == 1) return NewRec;
 
       // Otherwise, add the folded AddRec by the non-invariant parts.
       for (unsigned i = 0;; ++i)
         if (Ops[i] == AddRec) {
           Ops[i] = NewRec;
           break;
         }
       return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
     }
 
     // Okay, if there weren't any loop invariants to be folded, check to see if
     // there are multiple AddRec's with the same loop induction variable being
     // added together.  If so, we can fold them.
     for (unsigned OtherIdx = Idx+1;
          OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
          ++OtherIdx) {
       // We expect the AddRecExpr's to be sorted in reverse dominance order,
       // so that the 1st found AddRecExpr is dominated by all others.
       assert(DT.dominates(
            cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()->getHeader(),
            AddRec->getLoop()->getHeader()) &&
         "AddRecExprs are not sorted in reverse dominance order?");
       if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) {
         // Other + {A,+,B}<L> + {C,+,D}<L>  -->  Other + {A+C,+,B+D}<L>
         SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
                                                AddRec->op_end());
         for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
              ++OtherIdx) {
           const auto *OtherAddRec = cast<SCEVAddRecExpr>(Ops[OtherIdx]);
           if (OtherAddRec->getLoop() == AddRecLoop) {
             for (unsigned i = 0, e = OtherAddRec->getNumOperands();
                  i != e; ++i) {
               if (i >= AddRecOps.size()) {
                 AddRecOps.append(OtherAddRec->op_begin()+i,
                                  OtherAddRec->op_end());
                 break;
               }
               SmallVector<const SCEV *, 2> TwoOps = {
                   AddRecOps[i], OtherAddRec->getOperand(i)};
               AddRecOps[i] = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1);
             }
             Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
           }
         }
         // Step size has changed, so we cannot guarantee no self-wraparound.
         Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap);
         return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
       }
     }
 
     // Otherwise couldn't fold anything into this recurrence.  Move onto the
     // next one.
   }
 
   // Okay, it looks like we really DO need an add expr.  Check to see if we
   // already have one, otherwise create a new one.
   return getOrCreateAddExpr(Ops, Flags);
 }
 
 const SCEV *
 ScalarEvolution::getOrCreateAddExpr(SmallVectorImpl<const SCEV *> &Ops,
                                     SCEV::NoWrapFlags Flags) {
   FoldingSetNodeID ID;
   ID.AddInteger(scAddExpr);
   for (const SCEV *Op : Ops)
     ID.AddPointer(Op);
   void *IP = nullptr;
   SCEVAddExpr *S =
       static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
   if (!S) {
     const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
     std::uninitialized_copy(Ops.begin(), Ops.end(), O);
     S = new (SCEVAllocator)
         SCEVAddExpr(ID.Intern(SCEVAllocator), O, Ops.size());
     UniqueSCEVs.InsertNode(S, IP);
     addToLoopUseLists(S);
   }
   S->setNoWrapFlags(Flags);
   return S;
 }
 
 const SCEV *
 ScalarEvolution::getOrCreateMulExpr(SmallVectorImpl<const SCEV *> &Ops,
                                     SCEV::NoWrapFlags Flags) {
   FoldingSetNodeID ID;
   ID.AddInteger(scMulExpr);
   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
     ID.AddPointer(Ops[i]);
   void *IP = nullptr;
   SCEVMulExpr *S =
     static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
   if (!S) {
     const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
     std::uninitialized_copy(Ops.begin(), Ops.end(), O);
     S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator),
                                         O, Ops.size());
     UniqueSCEVs.InsertNode(S, IP);
     addToLoopUseLists(S);
   }
   S->setNoWrapFlags(Flags);
   return S;
 }
 
 static uint64_t umul_ov(uint64_t i, uint64_t j, bool &Overflow) {
   uint64_t k = i*j;
   if (j > 1 && k / j != i) Overflow = true;
   return k;
 }
 
 /// Compute the result of "n choose k", the binomial coefficient.  If an
 /// intermediate computation overflows, Overflow will be set and the return will
 /// be garbage. Overflow is not cleared on absence of overflow.
 static uint64_t Choose(uint64_t n, uint64_t k, bool &Overflow) {
   // We use the multiplicative formula:
   //     n(n-1)(n-2)...(n-(k-1)) / k(k-1)(k-2)...1 .
   // At each iteration, we take the n-th term of the numeral and divide by the
   // (k-n)th term of the denominator.  This division will always produce an
   // integral result, and helps reduce the chance of overflow in the
   // intermediate computations. However, we can still overflow even when the
   // final result would fit.
 
   if (n == 0 || n == k) return 1;
   if (k > n) return 0;
 
   if (k > n/2)
     k = n-k;
 
   uint64_t r = 1;
   for (uint64_t i = 1; i <= k; ++i) {
     r = umul_ov(r, n-(i-1), Overflow);
     r /= i;
   }
   return r;
 }
 
 /// Determine if any of the operands in this SCEV are a constant or if
 /// any of the add or multiply expressions in this SCEV contain a constant.
 static bool containsConstantInAddMulChain(const SCEV *StartExpr) {
   struct FindConstantInAddMulChain {
     bool FoundConstant = false;
 
     bool follow(const SCEV *S) {
       FoundConstant |= isa<SCEVConstant>(S);
       return isa<SCEVAddExpr>(S) || isa<SCEVMulExpr>(S);
     }
 
     bool isDone() const {
       return FoundConstant;
     }
   };
 
   FindConstantInAddMulChain F;
   SCEVTraversal<FindConstantInAddMulChain> ST(F);
   ST.visitAll(StartExpr);
   return F.FoundConstant;
 }
 
 /// Get a canonical multiply expression, or something simpler if possible.
 const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
                                         SCEV::NoWrapFlags Flags,
                                         unsigned Depth) {
   assert(Flags == maskFlags(Flags, SCEV::FlagNUW | SCEV::FlagNSW) &&
          "only nuw or nsw allowed");
   assert(!Ops.empty() && "Cannot get empty mul!");
   if (Ops.size() == 1) return Ops[0];
 #ifndef NDEBUG
   Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
   for (unsigned i = 1, e = Ops.size(); i != e; ++i)
     assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
            "SCEVMulExpr operand types don't match!");
 #endif
 
   // Sort by complexity, this groups all similar expression types together.
   GroupByComplexity(Ops, &LI, DT);
 
   Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags);
 
   // Limit recursion calls depth.
   if (Depth > MaxArithDepth)
     return getOrCreateMulExpr(Ops, Flags);
 
   // If there are any constants, fold them together.
   unsigned Idx = 0;
   if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
 
     // C1*(C2+V) -> C1*C2 + C1*V
     if (Ops.size() == 2)
         if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1]))
           // If any of Add's ops are Adds or Muls with a constant,
           // apply this transformation as well.
           if (Add->getNumOperands() == 2)
             // TODO: There are some cases where this transformation is not
             // profitable, for example:
             // Add = (C0 + X) * Y + Z.
             // Maybe the scope of this transformation should be narrowed down.
             if (containsConstantInAddMulChain(Add))
               return getAddExpr(getMulExpr(LHSC, Add->getOperand(0),
                                            SCEV::FlagAnyWrap, Depth + 1),
                                 getMulExpr(LHSC, Add->getOperand(1),
                                            SCEV::FlagAnyWrap, Depth + 1),
                                 SCEV::FlagAnyWrap, Depth + 1);
 
     ++Idx;
     while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
       // We found two constants, fold them together!
       ConstantInt *Fold =
           ConstantInt::get(getContext(), LHSC->getAPInt() * RHSC->getAPInt());
       Ops[0] = getConstant(Fold);
       Ops.erase(Ops.begin()+1);  // Erase the folded element
       if (Ops.size() == 1) return Ops[0];
       LHSC = cast<SCEVConstant>(Ops[0]);
     }
 
     // If we are left with a constant one being multiplied, strip it off.
     if (cast<SCEVConstant>(Ops[0])->getValue()->isOne()) {
       Ops.erase(Ops.begin());
       --Idx;
     } else if (cast<SCEVConstant>(Ops[0])->getValue()->isZero()) {
       // If we have a multiply of zero, it will always be zero.
       return Ops[0];
     } else if (Ops[0]->isAllOnesValue()) {
       // If we have a mul by -1 of an add, try distributing the -1 among the
       // add operands.
       if (Ops.size() == 2) {
         if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) {
           SmallVector<const SCEV *, 4> NewOps;
           bool AnyFolded = false;
           for (const SCEV *AddOp : Add->operands()) {
             const SCEV *Mul = getMulExpr(Ops[0], AddOp, SCEV::FlagAnyWrap,
                                          Depth + 1);
             if (!isa<SCEVMulExpr>(Mul)) AnyFolded = true;
             NewOps.push_back(Mul);
           }
           if (AnyFolded)
             return getAddExpr(NewOps, SCEV::FlagAnyWrap, Depth + 1);
         } else if (const auto *AddRec = dyn_cast<SCEVAddRecExpr>(Ops[1])) {
           // Negation preserves a recurrence's no self-wrap property.
           SmallVector<const SCEV *, 4> Operands;
           for (const SCEV *AddRecOp : AddRec->operands())
             Operands.push_back(getMulExpr(Ops[0], AddRecOp, SCEV::FlagAnyWrap,
                                           Depth + 1));
 
           return getAddRecExpr(Operands, AddRec->getLoop(),
                                AddRec->getNoWrapFlags(SCEV::FlagNW));
         }
       }
     }
 
     if (Ops.size() == 1)
       return Ops[0];
   }
 
   // Skip over the add expression until we get to a multiply.
   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
     ++Idx;
 
   // If there are mul operands inline them all into this expression.
   if (Idx < Ops.size()) {
     bool DeletedMul = false;
     while (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) {
       if (Ops.size() > MulOpsInlineThreshold)
         break;
       // If we have an mul, expand the mul operands onto the end of the
       // operands list.
       Ops.erase(Ops.begin()+Idx);
       Ops.append(Mul->op_begin(), Mul->op_end());
       DeletedMul = true;
     }
 
     // If we deleted at least one mul, we added operands to the end of the
     // list, and they are not necessarily sorted.  Recurse to resort and
     // resimplify any operands we just acquired.
     if (DeletedMul)
       return getMulExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
   }
 
   // If there are any add recurrences in the operands list, see if any other
   // added values are loop invariant.  If so, we can fold them into the
   // recurrence.
   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr)
     ++Idx;
 
   // Scan over all recurrences, trying to fold loop invariants into them.
   for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
     // Scan all of the other operands to this mul and add them to the vector
     // if they are loop invariant w.r.t. the recurrence.
     SmallVector<const SCEV *, 8> LIOps;
     const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
     const Loop *AddRecLoop = AddRec->getLoop();
     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
       if (isAvailableAtLoopEntry(Ops[i], AddRecLoop)) {
         LIOps.push_back(Ops[i]);
         Ops.erase(Ops.begin()+i);
         --i; --e;
       }
 
     // If we found some loop invariants, fold them into the recurrence.
     if (!LIOps.empty()) {
       //  NLI * LI * {Start,+,Step}  -->  NLI * {LI*Start,+,LI*Step}
       SmallVector<const SCEV *, 4> NewOps;
       NewOps.reserve(AddRec->getNumOperands());
       const SCEV *Scale = getMulExpr(LIOps, SCEV::FlagAnyWrap, Depth + 1);
       for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
         NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i),
                                     SCEV::FlagAnyWrap, Depth + 1));
 
       // Build the new addrec. Propagate the NUW and NSW flags if both the
       // outer mul and the inner addrec are guaranteed to have no overflow.
       //
       // No self-wrap cannot be guaranteed after changing the step size, but
       // will be inferred if either NUW or NSW is true.
       Flags = AddRec->getNoWrapFlags(clearFlags(Flags, SCEV::FlagNW));
       const SCEV *NewRec = getAddRecExpr(NewOps, AddRecLoop, Flags);
 
       // If all of the other operands were loop invariant, we are done.
       if (Ops.size() == 1) return NewRec;
 
       // Otherwise, multiply the folded AddRec by the non-invariant parts.
       for (unsigned i = 0;; ++i)
         if (Ops[i] == AddRec) {
           Ops[i] = NewRec;
           break;
         }
       return getMulExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
     }
 
     // Okay, if there weren't any loop invariants to be folded, check to see
     // if there are multiple AddRec's with the same loop induction variable
     // being multiplied together.  If so, we can fold them.
 
     // {A1,+,A2,+,...,+,An}<L> * {B1,+,B2,+,...,+,Bn}<L>
     // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [
     //       choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z
     //   ]]],+,...up to x=2n}.
     // Note that the arguments to choose() are always integers with values
     // known at compile time, never SCEV objects.
     //
     // The implementation avoids pointless extra computations when the two
     // addrec's are of different length (mathematically, it's equivalent to
     // an infinite stream of zeros on the right).
     bool OpsModified = false;
     for (unsigned OtherIdx = Idx+1;
          OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
          ++OtherIdx) {
       const SCEVAddRecExpr *OtherAddRec =
         dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]);
       if (!OtherAddRec || OtherAddRec->getLoop() != AddRecLoop)
         continue;
 
       // Limit max number of arguments to avoid creation of unreasonably big
       // SCEVAddRecs with very complex operands.
       if (AddRec->getNumOperands() + OtherAddRec->getNumOperands() - 1 >
           MaxAddRecSize)
         continue;
 
       bool Overflow = false;
       Type *Ty = AddRec->getType();
       bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64;
       SmallVector<const SCEV*, 7> AddRecOps;
       for (int x = 0, xe = AddRec->getNumOperands() +
              OtherAddRec->getNumOperands() - 1; x != xe && !Overflow; ++x) {
         const SCEV *Term = getZero(Ty);
         for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) {
           uint64_t Coeff1 = Choose(x, 2*x - y, Overflow);
           for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1),
                  ze = std::min(x+1, (int)OtherAddRec->getNumOperands());
                z < ze && !Overflow; ++z) {
             uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow);
             uint64_t Coeff;
             if (LargerThan64Bits)
               Coeff = umul_ov(Coeff1, Coeff2, Overflow);
             else
               Coeff = Coeff1*Coeff2;
             const SCEV *CoeffTerm = getConstant(Ty, Coeff);
             const SCEV *Term1 = AddRec->getOperand(y-z);
             const SCEV *Term2 = OtherAddRec->getOperand(z);
             Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1, Term2,
                                                SCEV::FlagAnyWrap, Depth + 1),
                               SCEV::FlagAnyWrap, Depth + 1);
           }
         }
         AddRecOps.push_back(Term);
       }
       if (!Overflow) {
         const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRec->getLoop(),
                                               SCEV::FlagAnyWrap);
         if (Ops.size() == 2) return NewAddRec;
         Ops[Idx] = NewAddRec;
         Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
         OpsModified = true;
         AddRec = dyn_cast<SCEVAddRecExpr>(NewAddRec);
         if (!AddRec)
           break;
       }
     }
     if (OpsModified)
       return getMulExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
 
     // Otherwise couldn't fold anything into this recurrence.  Move onto the
     // next one.
   }
 
   // Okay, it looks like we really DO need an mul expr.  Check to see if we
   // already have one, otherwise create a new one.
   return getOrCreateMulExpr(Ops, Flags);
 }
 
 /// Represents an unsigned remainder expression based on unsigned division.
 const SCEV *ScalarEvolution::getURemExpr(const SCEV *LHS,
                                          const SCEV *RHS) {
   assert(getEffectiveSCEVType(LHS->getType()) ==
          getEffectiveSCEVType(RHS->getType()) &&
          "SCEVURemExpr operand types don't match!");
 
   // Short-circuit easy cases
   if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
     // If constant is one, the result is trivial
     if (RHSC->getValue()->isOne())
       return getZero(LHS->getType()); // X urem 1 --> 0
 
     // If constant is a power of two, fold into a zext(trunc(LHS)).
     if (RHSC->getAPInt().isPowerOf2()) {
       Type *FullTy = LHS->getType();
       Type *TruncTy =
           IntegerType::get(getContext(), RHSC->getAPInt().logBase2());
       return getZeroExtendExpr(getTruncateExpr(LHS, TruncTy), FullTy);
     }
   }
 
   // Fallback to %a == %x urem %y == %x -<nuw> ((%x udiv %y) *<nuw> %y)
   const SCEV *UDiv = getUDivExpr(LHS, RHS);
   const SCEV *Mult = getMulExpr(UDiv, RHS, SCEV::FlagNUW);
   return getMinusSCEV(LHS, Mult, SCEV::FlagNUW);
 }
 
 /// Get a canonical unsigned division expression, or something simpler if
 /// possible.
 const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
                                          const SCEV *RHS) {
   assert(getEffectiveSCEVType(LHS->getType()) ==
          getEffectiveSCEVType(RHS->getType()) &&
          "SCEVUDivExpr operand types don't match!");
 
   if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
     if (RHSC->getValue()->isOne())
       return LHS;                               // X udiv 1 --> x
     // If the denominator is zero, the result of the udiv is undefined. Don't
     // try to analyze it, because the resolution chosen here may differ from
     // the resolution chosen in other parts of the compiler.
     if (!RHSC->getValue()->isZero()) {
       // Determine if the division can be folded into the operands of
       // its operands.
       // TODO: Generalize this to non-constants by using known-bits information.
       Type *Ty = LHS->getType();
       unsigned LZ = RHSC->getAPInt().countLeadingZeros();
       unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ - 1;
       // For non-power-of-two values, effectively round the value up to the
       // nearest power of two.
       if (!RHSC->getAPInt().isPowerOf2())
         ++MaxShiftAmt;
       IntegerType *ExtTy =
         IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt);
       if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS))
         if (const SCEVConstant *Step =
             dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this))) {
           // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded.
           const APInt &StepInt = Step->getAPInt();
           const APInt &DivInt = RHSC->getAPInt();
           if (!StepInt.urem(DivInt) &&
               getZeroExtendExpr(AR, ExtTy) ==
               getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
                             getZeroExtendExpr(Step, ExtTy),
                             AR->getLoop(), SCEV::FlagAnyWrap)) {
             SmallVector<const SCEV *, 4> Operands;
             for (const SCEV *Op : AR->operands())
               Operands.push_back(getUDivExpr(Op, RHS));
             return getAddRecExpr(Operands, AR->getLoop(), SCEV::FlagNW);
           }
           /// Get a canonical UDivExpr for a recurrence.
           /// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0.
           // We can currently only fold X%N if X is constant.
           const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart());
           if (StartC && !DivInt.urem(StepInt) &&
               getZeroExtendExpr(AR, ExtTy) ==
               getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
                             getZeroExtendExpr(Step, ExtTy),
                             AR->getLoop(), SCEV::FlagAnyWrap)) {
             const APInt &StartInt = StartC->getAPInt();
             const APInt &StartRem = StartInt.urem(StepInt);
             if (StartRem != 0)
               LHS = getAddRecExpr(getConstant(StartInt - StartRem), Step,
                                   AR->getLoop(), SCEV::FlagNW);
           }
         }
       // (A*B)/C --> A*(B/C) if safe and B/C can be folded.
       if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) {
         SmallVector<const SCEV *, 4> Operands;
         for (const SCEV *Op : M->operands())
           Operands.push_back(getZeroExtendExpr(Op, ExtTy));
         if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands))
           // Find an operand that's safely divisible.
           for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
             const SCEV *Op = M->getOperand(i);
             const SCEV *Div = getUDivExpr(Op, RHSC);
             if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) {
               Operands = SmallVector<const SCEV *, 4>(M->op_begin(),
                                                       M->op_end());
               Operands[i] = Div;
               return getMulExpr(Operands);
             }
           }
       }
       // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded.
       if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(LHS)) {
         SmallVector<const SCEV *, 4> Operands;
         for (const SCEV *Op : A->operands())
           Operands.push_back(getZeroExtendExpr(Op, ExtTy));
         if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) {
           Operands.clear();
           for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) {
             const SCEV *Op = getUDivExpr(A->getOperand(i), RHS);
             if (isa<SCEVUDivExpr>(Op) ||
                 getMulExpr(Op, RHS) != A->getOperand(i))
               break;
             Operands.push_back(Op);
           }
           if (Operands.size() == A->getNumOperands())
             return getAddExpr(Operands);
         }
       }
 
       // Fold if both operands are constant.
       if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
         Constant *LHSCV = LHSC->getValue();
         Constant *RHSCV = RHSC->getValue();
         return getConstant(cast<ConstantInt>(ConstantExpr::getUDiv(LHSCV,
                                                                    RHSCV)));
       }
     }
   }
 
   FoldingSetNodeID ID;
   ID.AddInteger(scUDivExpr);
   ID.AddPointer(LHS);
   ID.AddPointer(RHS);
   void *IP = nullptr;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
   SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator),
                                              LHS, RHS);
   UniqueSCEVs.InsertNode(S, IP);
   addToLoopUseLists(S);
   return S;
 }
 
 static const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) {
   APInt A = C1->getAPInt().abs();
   APInt B = C2->getAPInt().abs();
   uint32_t ABW = A.getBitWidth();
   uint32_t BBW = B.getBitWidth();
 
   if (ABW > BBW)
     B = B.zext(ABW);
   else if (ABW < BBW)
     A = A.zext(BBW);
 
   return APIntOps::GreatestCommonDivisor(std::move(A), std::move(B));
 }
 
 /// Get a canonical unsigned division expression, or something simpler if
 /// possible. There is no representation for an exact udiv in SCEV IR, but we
 /// can attempt to remove factors from the LHS and RHS.  We can't do this when
 /// it's not exact because the udiv may be clearing bits.
 const SCEV *ScalarEvolution::getUDivExactExpr(const SCEV *LHS,
                                               const SCEV *RHS) {
   // TODO: we could try to find factors in all sorts of things, but for now we
   // just deal with u/exact (multiply, constant). See SCEVDivision towards the
   // end of this file for inspiration.
 
   const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS);
   if (!Mul || !Mul->hasNoUnsignedWrap())
     return getUDivExpr(LHS, RHS);
 
   if (const SCEVConstant *RHSCst = dyn_cast<SCEVConstant>(RHS)) {
     // If the mulexpr multiplies by a constant, then that constant must be the
     // first element of the mulexpr.
     if (const auto *LHSCst = dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
       if (LHSCst == RHSCst) {
         SmallVector<const SCEV *, 2> Operands;
         Operands.append(Mul->op_begin() + 1, Mul->op_end());
         return getMulExpr(Operands);
       }
 
       // We can't just assume that LHSCst divides RHSCst cleanly, it could be
       // that there's a factor provided by one of the other terms. We need to
       // check.
       APInt Factor = gcd(LHSCst, RHSCst);
       if (!Factor.isIntN(1)) {
         LHSCst =
             cast<SCEVConstant>(getConstant(LHSCst->getAPInt().udiv(Factor)));
         RHSCst =
             cast<SCEVConstant>(getConstant(RHSCst->getAPInt().udiv(Factor)));
         SmallVector<const SCEV *, 2> Operands;
         Operands.push_back(LHSCst);
         Operands.append(Mul->op_begin() + 1, Mul->op_end());
         LHS = getMulExpr(Operands);
         RHS = RHSCst;
         Mul = dyn_cast<SCEVMulExpr>(LHS);
         if (!Mul)
           return getUDivExactExpr(LHS, RHS);
       }
     }
   }
 
   for (int i = 0, e = Mul->getNumOperands(); i != e; ++i) {
     if (Mul->getOperand(i) == RHS) {
       SmallVector<const SCEV *, 2> Operands;
       Operands.append(Mul->op_begin(), Mul->op_begin() + i);
       Operands.append(Mul->op_begin() + i + 1, Mul->op_end());
       return getMulExpr(Operands);
     }
   }
 
   return getUDivExpr(LHS, RHS);
 }
 
 /// Get an add recurrence expression for the specified loop.  Simplify the
 /// expression as much as possible.
 const SCEV *ScalarEvolution::getAddRecExpr(const SCEV *Start, const SCEV *Step,
                                            const Loop *L,
                                            SCEV::NoWrapFlags Flags) {
   SmallVector<const SCEV *, 4> Operands;
   Operands.push_back(Start);
   if (const SCEVAddRecExpr *StepChrec = dyn_cast<SCEVAddRecExpr>(Step))
     if (StepChrec->getLoop() == L) {
       Operands.append(StepChrec->op_begin(), StepChrec->op_end());
       return getAddRecExpr(Operands, L, maskFlags(Flags, SCEV::FlagNW));
     }
 
   Operands.push_back(Step);
   return getAddRecExpr(Operands, L, Flags);
 }
 
 /// Get an add recurrence expression for the specified loop.  Simplify the
 /// expression as much as possible.
 const SCEV *
 ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
                                const Loop *L, SCEV::NoWrapFlags Flags) {
   if (Operands.size() == 1) return Operands[0];
 #ifndef NDEBUG
   Type *ETy = getEffectiveSCEVType(Operands[0]->getType());
   for (unsigned i = 1, e = Operands.size(); i != e; ++i)
     assert(getEffectiveSCEVType(Operands[i]->getType()) == ETy &&
            "SCEVAddRecExpr operand types don't match!");
   for (unsigned i = 0, e = Operands.size(); i != e; ++i)
     assert(isLoopInvariant(Operands[i], L) &&
            "SCEVAddRecExpr operand is not loop-invariant!");
 #endif
 
   if (Operands.back()->isZero()) {
     Operands.pop_back();
     return getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); // {X,+,0}  -->  X
   }
 
   // It's tempting to want to call getMaxBackedgeTakenCount count here and
   // use that information to infer NUW and NSW flags. However, computing a
   // BE count requires calling getAddRecExpr, so we may not yet have a
   // meaningful BE count at this point (and if we don't, we'd be stuck
   // with a SCEVCouldNotCompute as the cached BE count).
 
   Flags = StrengthenNoWrapFlags(this, scAddRecExpr, Operands, Flags);
 
   // Canonicalize nested AddRecs in by nesting them in order of loop depth.
   if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) {
     const Loop *NestedLoop = NestedAR->getLoop();
     if (L->contains(NestedLoop)
             ? (L->getLoopDepth() < NestedLoop->getLoopDepth())
             : (!NestedLoop->contains(L) &&
                DT.dominates(L->getHeader(), NestedLoop->getHeader()))) {
       SmallVector<const SCEV *, 4> NestedOperands(NestedAR->op_begin(),
                                                   NestedAR->op_end());
       Operands[0] = NestedAR->getStart();
       // AddRecs require their operands be loop-invariant with respect to their
       // loops. Don't perform this transformation if it would break this
       // requirement.
       bool AllInvariant = all_of(
           Operands, [&](const SCEV *Op) { return isLoopInvariant(Op, L); });
 
       if (AllInvariant) {
         // Create a recurrence for the outer loop with the same step size.
         //
         // The outer recurrence keeps its NW flag but only keeps NUW/NSW if the
         // inner recurrence has the same property.
         SCEV::NoWrapFlags OuterFlags =
           maskFlags(Flags, SCEV::FlagNW | NestedAR->getNoWrapFlags());
 
         NestedOperands[0] = getAddRecExpr(Operands, L, OuterFlags);
         AllInvariant = all_of(NestedOperands, [&](const SCEV *Op) {
           return isLoopInvariant(Op, NestedLoop);
         });
 
         if (AllInvariant) {
           // Ok, both add recurrences are valid after the transformation.
           //
           // The inner recurrence keeps its NW flag but only keeps NUW/NSW if
           // the outer recurrence has the same property.
           SCEV::NoWrapFlags InnerFlags =
             maskFlags(NestedAR->getNoWrapFlags(), SCEV::FlagNW | Flags);
           return getAddRecExpr(NestedOperands, NestedLoop, InnerFlags);
         }
       }
       // Reset Operands to its original state.
       Operands[0] = NestedAR;
     }
   }
 
   // Okay, it looks like we really DO need an addrec expr.  Check to see if we
   // already have one, otherwise create a new one.
   FoldingSetNodeID ID;
   ID.AddInteger(scAddRecExpr);
   for (unsigned i = 0, e = Operands.size(); i != e; ++i)
     ID.AddPointer(Operands[i]);
   ID.AddPointer(L);
   void *IP = nullptr;
   SCEVAddRecExpr *S =
     static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
   if (!S) {
     const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Operands.size());
     std::uninitialized_copy(Operands.begin(), Operands.end(), O);
     S = new (SCEVAllocator) SCEVAddRecExpr(ID.Intern(SCEVAllocator),
                                            O, Operands.size(), L);
     UniqueSCEVs.InsertNode(S, IP);
     addToLoopUseLists(S);
   }
   S->setNoWrapFlags(Flags);
   return S;
 }
 
 const SCEV *
 ScalarEvolution::getGEPExpr(GEPOperator *GEP,
                             const SmallVectorImpl<const SCEV *> &IndexExprs) {
   const SCEV *BaseExpr = getSCEV(GEP->getPointerOperand());
   // getSCEV(Base)->getType() has the same address space as Base->getType()
   // because SCEV::getType() preserves the address space.
   Type *IntPtrTy = getEffectiveSCEVType(BaseExpr->getType());
   // FIXME(PR23527): Don't blindly transfer the inbounds flag from the GEP
   // instruction to its SCEV, because the Instruction may be guarded by control
   // flow and the no-overflow bits may not be valid for the expression in any
   // context. This can be fixed similarly to how these flags are handled for
   // adds.
   SCEV::NoWrapFlags Wrap = GEP->isInBounds() ? SCEV::FlagNSW
                                              : SCEV::FlagAnyWrap;
 
   const SCEV *TotalOffset = getZero(IntPtrTy);
   // The array size is unimportant. The first thing we do on CurTy is getting
   // its element type.
   Type *CurTy = ArrayType::get(GEP->getSourceElementType(), 0);
   for (const SCEV *IndexExpr : IndexExprs) {
     // Compute the (potentially symbolic) offset in bytes for this index.
     if (StructType *STy = dyn_cast<StructType>(CurTy)) {
       // For a struct, add the member offset.
       ConstantInt *Index = cast<SCEVConstant>(IndexExpr)->getValue();
       unsigned FieldNo = Index->getZExtValue();
       const SCEV *FieldOffset = getOffsetOfExpr(IntPtrTy, STy, FieldNo);
 
       // Add the field offset to the running total offset.
       TotalOffset = getAddExpr(TotalOffset, FieldOffset);
 
       // Update CurTy to the type of the field at Index.
       CurTy = STy->getTypeAtIndex(Index);
     } else {
       // Update CurTy to its element type.
       CurTy = cast<SequentialType>(CurTy)->getElementType();
       // For an array, add the element offset, explicitly scaled.
       const SCEV *ElementSize = getSizeOfExpr(IntPtrTy, CurTy);
       // Getelementptr indices are signed.
       IndexExpr = getTruncateOrSignExtend(IndexExpr, IntPtrTy);
 
       // Multiply the index by the element size to compute the element offset.
       const SCEV *LocalOffset = getMulExpr(IndexExpr, ElementSize, Wrap);
 
       // Add the element offset to the running total offset.
       TotalOffset = getAddExpr(TotalOffset, LocalOffset);
     }
   }
 
   // Add the total offset from all the GEP indices to the base.
   return getAddExpr(BaseExpr, TotalOffset, Wrap);
 }
 
 const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS,
                                          const SCEV *RHS) {
   SmallVector<const SCEV *, 2> Ops = {LHS, RHS};
   return getSMaxExpr(Ops);
 }
 
 const SCEV *
 ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
   assert(!Ops.empty() && "Cannot get empty smax!");
   if (Ops.size() == 1) return Ops[0];
 #ifndef NDEBUG
   Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
   for (unsigned i = 1, e = Ops.size(); i != e; ++i)
     assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
            "SCEVSMaxExpr operand types don't match!");
 #endif
 
   // Sort by complexity, this groups all similar expression types together.
   GroupByComplexity(Ops, &LI, DT);
 
   // If there are any constants, fold them together.
   unsigned Idx = 0;
   if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
     ++Idx;
     assert(Idx < Ops.size());
     while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
       // We found two constants, fold them together!
       ConstantInt *Fold = ConstantInt::get(
           getContext(), APIntOps::smax(LHSC->getAPInt(), RHSC->getAPInt()));
       Ops[0] = getConstant(Fold);
       Ops.erase(Ops.begin()+1);  // Erase the folded element
       if (Ops.size() == 1) return Ops[0];
       LHSC = cast<SCEVConstant>(Ops[0]);
     }
 
     // If we are left with a constant minimum-int, strip it off.
     if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(true)) {
       Ops.erase(Ops.begin());
       --Idx;
     } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(true)) {
       // If we have an smax with a constant maximum-int, it will always be
       // maximum-int.
       return Ops[0];
     }
 
     if (Ops.size() == 1) return Ops[0];
   }
 
   // Find the first SMax
   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scSMaxExpr)
     ++Idx;
 
   // Check to see if one of the operands is an SMax. If so, expand its operands
   // onto our operand list, and recurse to simplify.
   if (Idx < Ops.size()) {
     bool DeletedSMax = false;
     while (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(Ops[Idx])) {
       Ops.erase(Ops.begin()+Idx);
       Ops.append(SMax->op_begin(), SMax->op_end());
       DeletedSMax = true;
     }
 
     if (DeletedSMax)
       return getSMaxExpr(Ops);
   }
 
   // Okay, check to see if the same value occurs in the operand list twice.  If
   // so, delete one.  Since we sorted the list, these values are required to
   // be adjacent.
   for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
     //  X smax Y smax Y  -->  X smax Y
     //  X smax Y         -->  X, if X is always greater than Y
     if (Ops[i] == Ops[i+1] ||
         isKnownPredicate(ICmpInst::ICMP_SGE, Ops[i], Ops[i+1])) {
       Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2);
       --i; --e;
     } else if (isKnownPredicate(ICmpInst::ICMP_SLE, Ops[i], Ops[i+1])) {
       Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
       --i; --e;
     }
 
   if (Ops.size() == 1) return Ops[0];
 
   assert(!Ops.empty() && "Reduced smax down to nothing!");
 
   // Okay, it looks like we really DO need an smax expr.  Check to see if we
   // already have one, otherwise create a new one.
   FoldingSetNodeID ID;
   ID.AddInteger(scSMaxExpr);
   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
     ID.AddPointer(Ops[i]);
   void *IP = nullptr;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
   const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
   std::uninitialized_copy(Ops.begin(), Ops.end(), O);
   SCEV *S = new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator),
                                              O, Ops.size());
   UniqueSCEVs.InsertNode(S, IP);
   addToLoopUseLists(S);
   return S;
 }
 
 const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS,
                                          const SCEV *RHS) {
   SmallVector<const SCEV *, 2> Ops = {LHS, RHS};
   return getUMaxExpr(Ops);
 }
 
 const SCEV *
 ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
   assert(!Ops.empty() && "Cannot get empty umax!");
   if (Ops.size() == 1) return Ops[0];
 #ifndef NDEBUG
   Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
   for (unsigned i = 1, e = Ops.size(); i != e; ++i)
     assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
            "SCEVUMaxExpr operand types don't match!");
 #endif
 
   // Sort by complexity, this groups all similar expression types together.
   GroupByComplexity(Ops, &LI, DT);
 
   // If there are any constants, fold them together.
   unsigned Idx = 0;
   if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
     ++Idx;
     assert(Idx < Ops.size());
     while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
       // We found two constants, fold them together!
       ConstantInt *Fold = ConstantInt::get(
           getContext(), APIntOps::umax(LHSC->getAPInt(), RHSC->getAPInt()));
       Ops[0] = getConstant(Fold);
       Ops.erase(Ops.begin()+1);  // Erase the folded element
       if (Ops.size() == 1) return Ops[0];
       LHSC = cast<SCEVConstant>(Ops[0]);
     }
 
     // If we are left with a constant minimum-int, strip it off.
     if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(false)) {
       Ops.erase(Ops.begin());
       --Idx;
     } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(false)) {
       // If we have an umax with a constant maximum-int, it will always be
       // maximum-int.
       return Ops[0];
     }
 
     if (Ops.size() == 1) return Ops[0];
   }
 
   // Find the first UMax
   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scUMaxExpr)
     ++Idx;
 
   // Check to see if one of the operands is a UMax. If so, expand its operands
   // onto our operand list, and recurse to simplify.
   if (Idx < Ops.size()) {
     bool DeletedUMax = false;
     while (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(Ops[Idx])) {
       Ops.erase(Ops.begin()+Idx);
       Ops.append(UMax->op_begin(), UMax->op_end());
       DeletedUMax = true;
     }
 
     if (DeletedUMax)
       return getUMaxExpr(Ops);
   }
 
   // Okay, check to see if the same value occurs in the operand list twice.  If
   // so, delete one.  Since we sorted the list, these values are required to
   // be adjacent.
   for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
     //  X umax Y umax Y  -->  X umax Y
     //  X umax Y         -->  X, if X is always greater than Y
     if (Ops[i] == Ops[i+1] ||
         isKnownPredicate(ICmpInst::ICMP_UGE, Ops[i], Ops[i+1])) {
       Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2);
       --i; --e;
     } else if (isKnownPredicate(ICmpInst::ICMP_ULE, Ops[i], Ops[i+1])) {
       Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
       --i; --e;
     }
 
   if (Ops.size() == 1) return Ops[0];
 
   assert(!Ops.empty() && "Reduced umax down to nothing!");
 
   // Okay, it looks like we really DO need a umax expr.  Check to see if we
   // already have one, otherwise create a new one.
   FoldingSetNodeID ID;
   ID.AddInteger(scUMaxExpr);
   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
     ID.AddPointer(Ops[i]);
   void *IP = nullptr;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
   const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
   std::uninitialized_copy(Ops.begin(), Ops.end(), O);
   SCEV *S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator),
                                              O, Ops.size());
   UniqueSCEVs.InsertNode(S, IP);
   addToLoopUseLists(S);
   return S;
 }
 
 const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS,
                                          const SCEV *RHS) {
   // ~smax(~x, ~y) == smin(x, y).
   return getNotSCEV(getSMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
 }
 
 const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
                                          const SCEV *RHS) {
   // ~umax(~x, ~y) == umin(x, y)
   return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
 }
 
 const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) {
   // We can bypass creating a target-independent
   // constant expression and then folding it back into a ConstantInt.
   // This is just a compile-time optimization.
   return getConstant(IntTy, getDataLayout().getTypeAllocSize(AllocTy));
 }
 
 const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy,
                                              StructType *STy,
                                              unsigned FieldNo) {
   // We can bypass creating a target-independent
   // constant expression and then folding it back into a ConstantInt.
   // This is just a compile-time optimization.
   return getConstant(
       IntTy, getDataLayout().getStructLayout(STy)->getElementOffset(FieldNo));
 }
 
 const SCEV *ScalarEvolution::getUnknown(Value *V) {
   // Don't attempt to do anything other than create a SCEVUnknown object
   // here.  createSCEV only calls getUnknown after checking for all other
   // interesting possibilities, and any other code that calls getUnknown
   // is doing so in order to hide a value from SCEV canonicalization.
 
   FoldingSetNodeID ID;
   ID.AddInteger(scUnknown);
   ID.AddPointer(V);
   void *IP = nullptr;
   if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) {
     assert(cast<SCEVUnknown>(S)->getValue() == V &&
            "Stale SCEVUnknown in uniquing map!");
     return S;
   }
   SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V, this,
                                             FirstUnknown);
   FirstUnknown = cast<SCEVUnknown>(S);
   UniqueSCEVs.InsertNode(S, IP);
   return S;
 }
 
 //===----------------------------------------------------------------------===//
 //            Basic SCEV Analysis and PHI Idiom Recognition Code
 //
 
 /// Test if values of the given type are analyzable within the SCEV
 /// framework. This primarily includes integer types, and it can optionally
 /// include pointer types if the ScalarEvolution class has access to
 /// target-specific information.
 bool ScalarEvolution::isSCEVable(Type *Ty) const {
   // Integers and pointers are always SCEVable.
   return Ty->isIntegerTy() || Ty->isPointerTy();
 }
 
 /// Return the size in bits of the specified type, for which isSCEVable must
 /// return true.
 uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const {
   assert(isSCEVable(Ty) && "Type is not SCEVable!");
   return getDataLayout().getTypeSizeInBits(Ty);
 }
 
 /// Return a type with the same bitwidth as the given type and which represents
 /// how SCEV will treat the given type, for which isSCEVable must return
 /// true. For pointer types, this is the pointer-sized integer type.
 Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const {
   assert(isSCEVable(Ty) && "Type is not SCEVable!");
 
   if (Ty->isIntegerTy())
     return Ty;
 
   // The only other support type is pointer.
   assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!");
   return getDataLayout().getIntPtrType(Ty);
 }
 
 Type *ScalarEvolution::getWiderType(Type *T1, Type *T2) const {
   return  getTypeSizeInBits(T1) >= getTypeSizeInBits(T2) ? T1 : T2;
 }
 
 const SCEV *ScalarEvolution::getCouldNotCompute() {
   return CouldNotCompute.get();
 }
 
 bool ScalarEvolution::checkValidity(const SCEV *S) const {
   bool ContainsNulls = SCEVExprContains(S, [](const SCEV *S) {
     auto *SU = dyn_cast<SCEVUnknown>(S);
     return SU && SU->getValue() == nullptr;
   });
 
   return !ContainsNulls;
 }
 
 bool ScalarEvolution::containsAddRecurrence(const SCEV *S) {
   HasRecMapType::iterator I = HasRecMap.find(S);
   if (I != HasRecMap.end())
     return I->second;
 
   bool FoundAddRec = SCEVExprContains(S, isa<SCEVAddRecExpr, const SCEV *>);
   HasRecMap.insert({S, FoundAddRec});
   return FoundAddRec;
 }
 
 /// Try to split a SCEVAddExpr into a pair of {SCEV, ConstantInt}.
 /// If \p S is a SCEVAddExpr and is composed of a sub SCEV S' and an
 /// offset I, then return {S', I}, else return {\p S, nullptr}.
 static std::pair<const SCEV *, ConstantInt *> splitAddExpr(const SCEV *S) {
   const auto *Add = dyn_cast<SCEVAddExpr>(S);
   if (!Add)
     return {S, nullptr};
 
   if (Add->getNumOperands() != 2)
     return {S, nullptr};
 
   auto *ConstOp = dyn_cast<SCEVConstant>(Add->getOperand(0));
   if (!ConstOp)
     return {S, nullptr};
 
   return {Add->getOperand(1), ConstOp->getValue()};
 }
 
 /// Return the ValueOffsetPair set for \p S. \p S can be represented
 /// by the value and offset from any ValueOffsetPair in the set.
 SetVector<ScalarEvolution::ValueOffsetPair> *
 ScalarEvolution::getSCEVValues(const SCEV *S) {
   ExprValueMapType::iterator SI = ExprValueMap.find_as(S);
   if (SI == ExprValueMap.end())
     return nullptr;
 #ifndef NDEBUG
   if (VerifySCEVMap) {
     // Check there is no dangling Value in the set returned.
     for (const auto &VE : SI->second)
       assert(ValueExprMap.count(VE.first));
   }
 #endif
   return &SI->second;
 }
 
 /// Erase Value from ValueExprMap and ExprValueMap. ValueExprMap.erase(V)
 /// cannot be used separately. eraseValueFromMap should be used to remove
 /// V from ValueExprMap and ExprValueMap at the same time.
 void ScalarEvolution::eraseValueFromMap(Value *V) {
   ValueExprMapType::iterator I = ValueExprMap.find_as(V);
   if (I != ValueExprMap.end()) {
     const SCEV *S = I->second;
     // Remove {V, 0} from the set of ExprValueMap[S]
     if (SetVector<ValueOffsetPair> *SV = getSCEVValues(S))
       SV->remove({V, nullptr});
 
     // Remove {V, Offset} from the set of ExprValueMap[Stripped]
     const SCEV *Stripped;
     ConstantInt *Offset;
     std::tie(Stripped, Offset) = splitAddExpr(S);
     if (Offset != nullptr) {
       if (SetVector<ValueOffsetPair> *SV = getSCEVValues(Stripped))
         SV->remove({V, Offset});
     }
     ValueExprMap.erase(V);
   }
 }
 
 /// Return an existing SCEV if it exists, otherwise analyze the expression and
 /// create a new one.
 const SCEV *ScalarEvolution::getSCEV(Value *V) {
   assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
 
   const SCEV *S = getExistingSCEV(V);
   if (S == nullptr) {
     S = createSCEV(V);
     // During PHI resolution, it is possible to create two SCEVs for the same
     // V, so it is needed to double check whether V->S is inserted into
     // ValueExprMap before insert S->{V, 0} into ExprValueMap.
     std::pair<ValueExprMapType::iterator, bool> Pair =
         ValueExprMap.insert({SCEVCallbackVH(V, this), S});
     if (Pair.second) {
       ExprValueMap[S].insert({V, nullptr});
 
       // If S == Stripped + Offset, add Stripped -> {V, Offset} into
       // ExprValueMap.
       const SCEV *Stripped = S;
       ConstantInt *Offset = nullptr;
       std::tie(Stripped, Offset) = splitAddExpr(S);
       // If stripped is SCEVUnknown, don't bother to save
       // Stripped -> {V, offset}. It doesn't simplify and sometimes even
       // increase the complexity of the expansion code.
       // If V is GetElementPtrInst, don't save Stripped -> {V, offset}
       // because it may generate add/sub instead of GEP in SCEV expansion.
       if (Offset != nullptr && !isa<SCEVUnknown>(Stripped) &&
           !isa<GetElementPtrInst>(V))
         ExprValueMap[Stripped].insert({V, Offset});
     }
   }
   return S;
 }
 
 const SCEV *ScalarEvolution::getExistingSCEV(Value *V) {
   assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
 
   ValueExprMapType::iterator I = ValueExprMap.find_as(V);
   if (I != ValueExprMap.end()) {
     const SCEV *S = I->second;
     if (checkValidity(S))
       return S;
     eraseValueFromMap(V);
     forgetMemoizedResults(S);
   }
   return nullptr;
 }
 
 /// Return a SCEV corresponding to -V = -1*V
 const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V,
                                              SCEV::NoWrapFlags Flags) {
   if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
     return getConstant(
                cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue())));
 
   Type *Ty = V->getType();
   Ty = getEffectiveSCEVType(Ty);
   return getMulExpr(
       V, getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))), Flags);
 }
 
 /// Return a SCEV corresponding to ~V = -1-V
 const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
   if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
     return getConstant(
                 cast<ConstantInt>(ConstantExpr::getNot(VC->getValue())));
 
   Type *Ty = V->getType();
   Ty = getEffectiveSCEVType(Ty);
   const SCEV *AllOnes =
                    getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty)));
   return getMinusSCEV(AllOnes, V);
 }
 
 const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
                                           SCEV::NoWrapFlags Flags,
                                           unsigned Depth) {
   // Fast path: X - X --> 0.
   if (LHS == RHS)
     return getZero(LHS->getType());
 
   // We represent LHS - RHS as LHS + (-1)*RHS. This transformation
   // makes it so that we cannot make much use of NUW.
   auto AddFlags = SCEV::FlagAnyWrap;
   const bool RHSIsNotMinSigned =
       !getSignedRangeMin(RHS).isMinSignedValue();
   if (maskFlags(Flags, SCEV::FlagNSW) == SCEV::FlagNSW) {
     // Let M be the minimum representable signed value. Then (-1)*RHS
     // signed-wraps if and only if RHS is M. That can happen even for
     // a NSW subtraction because e.g. (-1)*M signed-wraps even though
     // -1 - M does not. So to transfer NSW from LHS - RHS to LHS +
     // (-1)*RHS, we need to prove that RHS != M.
     //
     // If LHS is non-negative and we know that LHS - RHS does not
     // signed-wrap, then RHS cannot be M. So we can rule out signed-wrap
     // either by proving that RHS > M or that LHS >= 0.
     if (RHSIsNotMinSigned || isKnownNonNegative(LHS)) {
       AddFlags = SCEV::FlagNSW;
     }
   }
 
   // FIXME: Find a correct way to transfer NSW to (-1)*M when LHS -
   // RHS is NSW and LHS >= 0.
   //
   // The difficulty here is that the NSW flag may have been proven
   // relative to a loop that is to be found in a recurrence in LHS and
   // not in RHS. Applying NSW to (-1)*M may then let the NSW have a
   // larger scope than intended.
   auto NegFlags = RHSIsNotMinSigned ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
 
   return getAddExpr(LHS, getNegativeSCEV(RHS, NegFlags), AddFlags, Depth);
 }
 
 const SCEV *
 ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, Type *Ty) {
   Type *SrcTy = V->getType();
   assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
          "Cannot truncate or zero extend with non-integer arguments!");
   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
     return V;  // No conversion
   if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
     return getTruncateExpr(V, Ty);
   return getZeroExtendExpr(V, Ty);
 }
 
 const SCEV *
 ScalarEvolution::getTruncateOrSignExtend(const SCEV *V,
                                          Type *Ty) {
   Type *SrcTy = V->getType();
   assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
          "Cannot truncate or zero extend with non-integer arguments!");
   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
     return V;  // No conversion
   if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
     return getTruncateExpr(V, Ty);
   return getSignExtendExpr(V, Ty);
 }
 
 const SCEV *
 ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, Type *Ty) {
   Type *SrcTy = V->getType();
   assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
          "Cannot noop or zero extend with non-integer arguments!");
   assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
          "getNoopOrZeroExtend cannot truncate!");
   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
     return V;  // No conversion
   return getZeroExtendExpr(V, Ty);
 }
 
 const SCEV *
 ScalarEvolution::getNoopOrSignExtend(const SCEV *V, Type *Ty) {
   Type *SrcTy = V->getType();
   assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
          "Cannot noop or sign extend with non-integer arguments!");
   assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
          "getNoopOrSignExtend cannot truncate!");
   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
     return V;  // No conversion
   return getSignExtendExpr(V, Ty);
 }
 
 const SCEV *
 ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, Type *Ty) {
   Type *SrcTy = V->getType();
   assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
          "Cannot noop or any extend with non-integer arguments!");
   assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
          "getNoopOrAnyExtend cannot truncate!");
   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
     return V;  // No conversion
   return getAnyExtendExpr(V, Ty);
 }
 
 const SCEV *
 ScalarEvolution::getTruncateOrNoop(const SCEV *V, Type *Ty) {
   Type *SrcTy = V->getType();
   assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
          "Cannot truncate or noop with non-integer arguments!");
   assert(getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) &&
          "getTruncateOrNoop cannot extend!");
   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
     return V;  // No conversion
   return getTruncateExpr(V, Ty);
 }
 
 const SCEV *ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV *LHS,
                                                         const SCEV *RHS) {
   const SCEV *PromotedLHS = LHS;
   const SCEV *PromotedRHS = RHS;
 
   if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType()))
     PromotedRHS = getZeroExtendExpr(RHS, LHS->getType());
   else
     PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType());
 
   return getUMaxExpr(PromotedLHS, PromotedRHS);
 }
 
 const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(const SCEV *LHS,
                                                         const SCEV *RHS) {
   const SCEV *PromotedLHS = LHS;
   const SCEV *PromotedRHS = RHS;
 
   if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType()))
     PromotedRHS = getZeroExtendExpr(RHS, LHS->getType());
   else
     PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType());
 
   return getUMinExpr(PromotedLHS, PromotedRHS);
 }
 
 const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) {
   // A pointer operand may evaluate to a nonpointer expression, such as null.
   if (!V->getType()->isPointerTy())
     return V;
 
   if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(V)) {
     return getPointerBase(Cast->getOperand());
   } else if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(V)) {
     const SCEV *PtrOp = nullptr;
     for (const SCEV *NAryOp : NAry->operands()) {
       if (NAryOp->getType()->isPointerTy()) {
         // Cannot find the base of an expression with multiple pointer operands.
         if (PtrOp)
           return V;
         PtrOp = NAryOp;
       }
     }
     if (!PtrOp)
       return V;
     return getPointerBase(PtrOp);
   }
   return V;
 }
 
 /// Push users of the given Instruction onto the given Worklist.
 static void
 PushDefUseChildren(Instruction *I,
                    SmallVectorImpl<Instruction *> &Worklist) {
   // Push the def-use children onto the Worklist stack.
   for (User *U : I->users())
     Worklist.push_back(cast<Instruction>(U));
 }
 
 void ScalarEvolution::forgetSymbolicName(Instruction *PN, const SCEV *SymName) {
   SmallVector<Instruction *, 16> Worklist;
   PushDefUseChildren(PN, Worklist);
 
   SmallPtrSet<Instruction *, 8> Visited;
   Visited.insert(PN);
   while (!Worklist.empty()) {
     Instruction *I = Worklist.pop_back_val();
     if (!Visited.insert(I).second)
       continue;
 
     auto It = ValueExprMap.find_as(static_cast<Value *>(I));
     if (It != ValueExprMap.end()) {
       const SCEV *Old = It->second;
 
       // Short-circuit the def-use traversal if the symbolic name
       // ceases to appear in expressions.
       if (Old != SymName && !hasOperand(Old, SymName))
         continue;
 
       // SCEVUnknown for a PHI either means that it has an unrecognized
       // structure, it's a PHI that's in the progress of being computed
       // by createNodeForPHI, or it's a single-value PHI. In the first case,
       // additional loop trip count information isn't going to change anything.
       // In the second case, createNodeForPHI will perform the necessary
       // updates on its own when it gets to that point. In the third, we do
       // want to forget the SCEVUnknown.
       if (!isa<PHINode>(I) ||
           !isa<SCEVUnknown>(Old) ||
           (I != PN && Old == SymName)) {
         eraseValueFromMap(It->first);
         forgetMemoizedResults(Old);
       }
     }
 
     PushDefUseChildren(I, Worklist);
   }
 }
 
 namespace {
 
 class SCEVInitRewriter : public SCEVRewriteVisitor<SCEVInitRewriter> {
 public:
   static const SCEV *rewrite(const SCEV *S, const Loop *L,
                              ScalarEvolution &SE) {
     SCEVInitRewriter Rewriter(L, SE);
     const SCEV *Result = Rewriter.visit(S);
     return Rewriter.isValid() ? Result : SE.getCouldNotCompute();
   }
 
   const SCEV *visitUnknown(const SCEVUnknown *Expr) {
     if (!SE.isLoopInvariant(Expr, L))
       Valid = false;
     return Expr;
   }
 
   const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
     // Only allow AddRecExprs for this loop.
     if (Expr->getLoop() == L)
       return Expr->getStart();
     Valid = false;
     return Expr;
   }
 
   bool isValid() { return Valid; }
 
 private:
   explicit SCEVInitRewriter(const Loop *L, ScalarEvolution &SE)
       : SCEVRewriteVisitor(SE), L(L) {}
 
   const Loop *L;
   bool Valid = true;
 };
 
 /// This class evaluates the compare condition by matching it against the
 /// condition of loop latch. If there is a match we assume a true value
 /// for the condition while building SCEV nodes.
 class SCEVBackedgeConditionFolder
     : public SCEVRewriteVisitor<SCEVBackedgeConditionFolder> {
 public:
   static const SCEV *rewrite(const SCEV *S, const Loop *L,
                              ScalarEvolution &SE) {
     bool IsPosBECond = false;
     Value *BECond = nullptr;
     if (BasicBlock *Latch = L->getLoopLatch()) {
       BranchInst *BI = dyn_cast<BranchInst>(Latch->getTerminator());
       if (BI && BI->isConditional()) {
         assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
                "Both outgoing branches should not target same header!");
         BECond = BI->getCondition();
         IsPosBECond = BI->getSuccessor(0) == L->getHeader();
       } else {
         return S;
       }
     }
     SCEVBackedgeConditionFolder Rewriter(L, BECond, IsPosBECond, SE);
     return Rewriter.visit(S);
   }
 
   const SCEV *visitUnknown(const SCEVUnknown *Expr) {
     const SCEV *Result = Expr;
     bool InvariantF = SE.isLoopInvariant(Expr, L);
 
     if (!InvariantF) {
       Instruction *I = cast<Instruction>(Expr->getValue());
       switch (I->getOpcode()) {
       case Instruction::Select: {
         SelectInst *SI = cast<SelectInst>(I);
         Optional<const SCEV *> Res =
             compareWithBackedgeCondition(SI->getCondition());
         if (Res.hasValue()) {
           bool IsOne = cast<SCEVConstant>(Res.getValue())->getValue()->isOne();
           Result = SE.getSCEV(IsOne ? SI->getTrueValue() : SI->getFalseValue());
         }
         break;
       }
       default: {
         Optional<const SCEV *> Res = compareWithBackedgeCondition(I);
         if (Res.hasValue())
           Result = Res.getValue();
         break;
       }
       }
     }
     return Result;
   }
 
 private:
   explicit SCEVBackedgeConditionFolder(const Loop *L, Value *BECond,
                                        bool IsPosBECond, ScalarEvolution &SE)
       : SCEVRewriteVisitor(SE), L(L), BackedgeCond(BECond),
         IsPositiveBECond(IsPosBECond) {}
 
   Optional<const SCEV *> compareWithBackedgeCondition(Value *IC);
 
   const Loop *L;
   /// Loop back condition.
   Value *BackedgeCond = nullptr;
   /// Set to true if loop back is on positive branch condition.
   bool IsPositiveBECond;
 };
 
 Optional<const SCEV *>
 SCEVBackedgeConditionFolder::compareWithBackedgeCondition(Value *IC) {
 
   // If value matches the backedge condition for loop latch,
   // then return a constant evolution node based on loopback
   // branch taken.
   if (BackedgeCond == IC)
     return IsPositiveBECond ? SE.getOne(Type::getInt1Ty(SE.getContext()))
                             : SE.getZero(Type::getInt1Ty(SE.getContext()));
   return None;
 }
 
 class SCEVShiftRewriter : public SCEVRewriteVisitor<SCEVShiftRewriter> {
 public:
   static const SCEV *rewrite(const SCEV *S, const Loop *L,
                              ScalarEvolution &SE) {
     SCEVShiftRewriter Rewriter(L, SE);
     const SCEV *Result = Rewriter.visit(S);
     return Rewriter.isValid() ? Result : SE.getCouldNotCompute();
   }
 
   const SCEV *visitUnknown(const SCEVUnknown *Expr) {
     // Only allow AddRecExprs for this loop.
     if (!SE.isLoopInvariant(Expr, L))
       Valid = false;
     return Expr;
   }
 
   const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
     if (Expr->getLoop() == L && Expr->isAffine())
       return SE.getMinusSCEV(Expr, Expr->getStepRecurrence(SE));
     Valid = false;
     return Expr;
   }
 
   bool isValid() { return Valid; }
 
 private:
   explicit SCEVShiftRewriter(const Loop *L, ScalarEvolution &SE)
       : SCEVRewriteVisitor(SE), L(L) {}
 
   const Loop *L;
   bool Valid = true;
 };
 
 } // end anonymous namespace
 
 SCEV::NoWrapFlags
 ScalarEvolution::proveNoWrapViaConstantRanges(const SCEVAddRecExpr *AR) {
   if (!AR->isAffine())
     return SCEV::FlagAnyWrap;
 
   using OBO = OverflowingBinaryOperator;
 
   SCEV::NoWrapFlags Result = SCEV::FlagAnyWrap;
 
   if (!AR->hasNoSignedWrap()) {
     ConstantRange AddRecRange = getSignedRange(AR);
     ConstantRange IncRange = getSignedRange(AR->getStepRecurrence(*this));
 
     auto NSWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
         Instruction::Add, IncRange, OBO::NoSignedWrap);
     if (NSWRegion.contains(AddRecRange))
       Result = ScalarEvolution::setFlags(Result, SCEV::FlagNSW);
   }
 
   if (!AR->hasNoUnsignedWrap()) {
     ConstantRange AddRecRange = getUnsignedRange(AR);
     ConstantRange IncRange = getUnsignedRange(AR->getStepRecurrence(*this));
 
     auto NUWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
         Instruction::Add, IncRange, OBO::NoUnsignedWrap);
     if (NUWRegion.contains(AddRecRange))
       Result = ScalarEvolution::setFlags(Result, SCEV::FlagNUW);
   }
 
   return Result;
 }
 
 namespace {
 
 /// Represents an abstract binary operation.  This may exist as a
 /// normal instruction or constant expression, or may have been
 /// derived from an expression tree.
 struct BinaryOp {
   unsigned Opcode;
   Value *LHS;
   Value *RHS;
   bool IsNSW = false;
   bool IsNUW = false;
 
   /// Op is set if this BinaryOp corresponds to a concrete LLVM instruction or
   /// constant expression.
   Operator *Op = nullptr;
 
   explicit BinaryOp(Operator *Op)
       : Opcode(Op->getOpcode()), LHS(Op->getOperand(0)), RHS(Op->getOperand(1)),
         Op(Op) {
     if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(Op)) {
       IsNSW = OBO->hasNoSignedWrap();
       IsNUW = OBO->hasNoUnsignedWrap();
     }
   }
 
   explicit BinaryOp(unsigned Opcode, Value *LHS, Value *RHS, bool IsNSW = false,
                     bool IsNUW = false)
       : Opcode(Opcode), LHS(LHS), RHS(RHS), IsNSW(IsNSW), IsNUW(IsNUW) {}
 };
 
 } // end anonymous namespace
 
 /// Try to map \p V into a BinaryOp, and return \c None on failure.
 static Optional<BinaryOp> MatchBinaryOp(Value *V, DominatorTree &DT) {
   auto *Op = dyn_cast<Operator>(V);
   if (!Op)
     return None;
 
   // Implementation detail: all the cleverness here should happen without
   // creating new SCEV expressions -- our caller knowns tricks to avoid creating
   // SCEV expressions when possible, and we should not break that.
 
   switch (Op->getOpcode()) {
   case Instruction::Add:
   case Instruction::Sub:
   case Instruction::Mul:
   case Instruction::UDiv:
   case Instruction::URem:
   case Instruction::And:
   case Instruction::Or:
   case Instruction::AShr:
   case Instruction::Shl:
     return BinaryOp(Op);
 
   case Instruction::Xor:
     if (auto *RHSC = dyn_cast<ConstantInt>(Op->getOperand(1)))
       // If the RHS of the xor is a signmask, then this is just an add.
       // Instcombine turns add of signmask into xor as a strength reduction step.
       if (RHSC->getValue().isSignMask())
         return BinaryOp(Instruction::Add, Op->getOperand(0), Op->getOperand(1));
     return BinaryOp(Op);
 
   case Instruction::LShr:
     // Turn logical shift right of a constant into a unsigned divide.
     if (ConstantInt *SA = dyn_cast<ConstantInt>(Op->getOperand(1))) {
       uint32_t BitWidth = cast<IntegerType>(Op->getType())->getBitWidth();
 
       // If the shift count is not less than the bitwidth, the result of
       // the shift is undefined. Don't try to analyze it, because the
       // resolution chosen here may differ from the resolution chosen in
       // other parts of the compiler.
       if (SA->getValue().ult(BitWidth)) {
         Constant *X =
             ConstantInt::get(SA->getContext(),
                              APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
         return BinaryOp(Instruction::UDiv, Op->getOperand(0), X);
       }
     }
     return BinaryOp(Op);
 
   case Instruction::ExtractValue: {
     auto *EVI = cast<ExtractValueInst>(Op);
     if (EVI->getNumIndices() != 1 || EVI->getIndices()[0] != 0)
       break;
 
     auto *CI = dyn_cast<CallInst>(EVI->getAggregateOperand());
     if (!CI)
       break;
 
     if (auto *F = CI->getCalledFunction())
       switch (F->getIntrinsicID()) {
       case Intrinsic::sadd_with_overflow:
       case Intrinsic::uadd_with_overflow:
         if (!isOverflowIntrinsicNoWrap(cast<IntrinsicInst>(CI), DT))
           return BinaryOp(Instruction::Add, CI->getArgOperand(0),
                           CI->getArgOperand(1));
 
         // Now that we know that all uses of the arithmetic-result component of
         // CI are guarded by the overflow check, we can go ahead and pretend
         // that the arithmetic is non-overflowing.
         if (F->getIntrinsicID() == Intrinsic::sadd_with_overflow)
           return BinaryOp(Instruction::Add, CI->getArgOperand(0),
                           CI->getArgOperand(1), /* IsNSW = */ true,
                           /* IsNUW = */ false);
         else
           return BinaryOp(Instruction::Add, CI->getArgOperand(0),
                           CI->getArgOperand(1), /* IsNSW = */ false,
                           /* IsNUW*/ true);
       case Intrinsic::ssub_with_overflow:
       case Intrinsic::usub_with_overflow:
         if (!isOverflowIntrinsicNoWrap(cast<IntrinsicInst>(CI), DT))
           return BinaryOp(Instruction::Sub, CI->getArgOperand(0),
                           CI->getArgOperand(1));
 
         // The same reasoning as sadd/uadd above.
         if (F->getIntrinsicID() == Intrinsic::ssub_with_overflow)
           return BinaryOp(Instruction::Sub, CI->getArgOperand(0),
                           CI->getArgOperand(1), /* IsNSW = */ true,
                           /* IsNUW = */ false);
         else
           return BinaryOp(Instruction::Sub, CI->getArgOperand(0),
                           CI->getArgOperand(1), /* IsNSW = */ false,
                           /* IsNUW = */ true);
       case Intrinsic::smul_with_overflow:
       case Intrinsic::umul_with_overflow:
         return BinaryOp(Instruction::Mul, CI->getArgOperand(0),
                         CI->getArgOperand(1));
       default:
         break;
       }
     break;
   }
 
   default:
     break;
   }
 
   return None;
 }
 
 /// Helper function to createAddRecFromPHIWithCasts. We have a phi
 /// node whose symbolic (unknown) SCEV is \p SymbolicPHI, which is updated via
 /// the loop backedge by a SCEVAddExpr, possibly also with a few casts on the
 /// way. This function checks if \p Op, an operand of this SCEVAddExpr,
 /// follows one of the following patterns:
 /// Op == (SExt ix (Trunc iy (%SymbolicPHI) to ix) to iy)
 /// Op == (ZExt ix (Trunc iy (%SymbolicPHI) to ix) to iy)
 /// If the SCEV expression of \p Op conforms with one of the expected patterns
 /// we return the type of the truncation operation, and indicate whether the
 /// truncated type should be treated as signed/unsigned by setting
 /// \p Signed to true/false, respectively.
 static Type *isSimpleCastedPHI(const SCEV *Op, const SCEVUnknown *SymbolicPHI,
                                bool &Signed, ScalarEvolution &SE) {
   // The case where Op == SymbolicPHI (that is, with no type conversions on
   // the way) is handled by the regular add recurrence creating logic and
   // would have already been triggered in createAddRecForPHI. Reaching it here
   // means that createAddRecFromPHI had failed for this PHI before (e.g.,
   // because one of the other operands of the SCEVAddExpr updating this PHI is
   // not invariant).
   //
   // Here we look for the case where Op = (ext(trunc(SymbolicPHI))), and in
   // this case predicates that allow us to prove that Op == SymbolicPHI will
   // be added.
   if (Op == SymbolicPHI)
     return nullptr;
 
   unsigned SourceBits = SE.getTypeSizeInBits(SymbolicPHI->getType());
   unsigned NewBits = SE.getTypeSizeInBits(Op->getType());
   if (SourceBits != NewBits)
     return nullptr;
 
   const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(Op);
   const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(Op);
   if (!SExt && !ZExt)
     return nullptr;
   const SCEVTruncateExpr *Trunc =
       SExt ? dyn_cast<SCEVTruncateExpr>(SExt->getOperand())
            : dyn_cast<SCEVTruncateExpr>(ZExt->getOperand());
   if (!Trunc)
     return nullptr;
   const SCEV *X = Trunc->getOperand();
   if (X != SymbolicPHI)
     return nullptr;
   Signed = SExt != nullptr;
   return Trunc->getType();
 }
 
 static const Loop *isIntegerLoopHeaderPHI(const PHINode *PN, LoopInfo &LI) {
   if (!PN->getType()->isIntegerTy())
     return nullptr;
   const Loop *L = LI.getLoopFor(PN->getParent());
   if (!L || L->getHeader() != PN->getParent())
     return nullptr;
   return L;
 }
 
 // Analyze \p SymbolicPHI, a SCEV expression of a phi node, and check if the
 // computation that updates the phi follows the following pattern:
 //   (SExt/ZExt ix (Trunc iy (%SymbolicPHI) to ix) to iy) + InvariantAccum
 // which correspond to a phi->trunc->sext/zext->add->phi update chain.
 // If so, try to see if it can be rewritten as an AddRecExpr under some
 // Predicates. If successful, return them as a pair. Also cache the results
 // of the analysis.
 //
 // Example usage scenario:
 //    Say the Rewriter is called for the following SCEV:
 //         8 * ((sext i32 (trunc i64 %X to i32) to i64) + %Step)
 //    where:
 //         %X = phi i64 (%Start, %BEValue)
 //    It will visitMul->visitAdd->visitSExt->visitTrunc->visitUnknown(%X),
 //    and call this function with %SymbolicPHI = %X.
 //
 //    The analysis will find that the value coming around the backedge has
 //    the following SCEV:
 //         BEValue = ((sext i32 (trunc i64 %X to i32) to i64) + %Step)
 //    Upon concluding that this matches the desired pattern, the function
 //    will return the pair {NewAddRec, SmallPredsVec} where:
 //         NewAddRec = {%Start,+,%Step}
 //         SmallPredsVec = {P1, P2, P3} as follows:
 //           P1(WrapPred): AR: {trunc(%Start),+,(trunc %Step)}<nsw> Flags: <nssw>
 //           P2(EqualPred): %Start == (sext i32 (trunc i64 %Start to i32) to i64)
 //           P3(EqualPred): %Step == (sext i32 (trunc i64 %Step to i32) to i64)
 //    The returned pair means that SymbolicPHI can be rewritten into NewAddRec
 //    under the predicates {P1,P2,P3}.
 //    This predicated rewrite will be cached in PredicatedSCEVRewrites:
 //         PredicatedSCEVRewrites[{%X,L}] = {NewAddRec, {P1,P2,P3)}
 //
 // TODO's:
 //
 // 1) Extend the Induction descriptor to also support inductions that involve
 //    casts: When needed (namely, when we are called in the context of the
 //    vectorizer induction analysis), a Set of cast instructions will be
 //    populated by this method, and provided back to isInductionPHI. This is
 //    needed to allow the vectorizer to properly record them to be ignored by
 //    the cost model and to avoid vectorizing them (otherwise these casts,
 //    which are redundant under the runtime overflow checks, will be
 //    vectorized, which can be costly).
 //
 // 2) Support additional induction/PHISCEV patterns: We also want to support
 //    inductions where the sext-trunc / zext-trunc operations (partly) occur
 //    after the induction update operation (the induction increment):
 //
 //      (Trunc iy (SExt/ZExt ix (%SymbolicPHI + InvariantAccum) to iy) to ix)
 //    which correspond to a phi->add->trunc->sext/zext->phi update chain.
 //
 //      (Trunc iy ((SExt/ZExt ix (%SymbolicPhi) to iy) + InvariantAccum) to ix)
 //    which correspond to a phi->trunc->add->sext/zext->phi update chain.
 //
 // 3) Outline common code with createAddRecFromPHI to avoid duplication.
 Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>>
 ScalarEvolution::createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI) {
   SmallVector<const SCEVPredicate *, 3> Predicates;
 
   // *** Part1: Analyze if we have a phi-with-cast pattern for which we can
   // return an AddRec expression under some predicate.
 
   auto *PN = cast<PHINode>(SymbolicPHI->getValue());
   const Loop *L = isIntegerLoopHeaderPHI(PN, LI);
   assert(L && "Expecting an integer loop header phi");
 
   // The loop may have multiple entrances or multiple exits; we can analyze
   // this phi as an addrec if it has a unique entry value and a unique
   // backedge value.
   Value *BEValueV = nullptr, *StartValueV = nullptr;
   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
     Value *V = PN->getIncomingValue(i);
     if (L->contains(PN->getIncomingBlock(i))) {
       if (!BEValueV) {
         BEValueV = V;
       } else if (BEValueV != V) {
         BEValueV = nullptr;
         break;
       }
     } else if (!StartValueV) {
       StartValueV = V;
     } else if (StartValueV != V) {
       StartValueV = nullptr;
       break;
     }
   }
   if (!BEValueV || !StartValueV)
     return None;
 
   const SCEV *BEValue = getSCEV(BEValueV);
 
   // If the value coming around the backedge is an add with the symbolic
   // value we just inserted, possibly with casts that we can ignore under
   // an appropriate runtime guard, then we found a simple induction variable!
   const auto *Add = dyn_cast<SCEVAddExpr>(BEValue);
   if (!Add)
     return None;
 
   // If there is a single occurrence of the symbolic value, possibly
   // casted, replace it with a recurrence.
   unsigned FoundIndex = Add->getNumOperands();
   Type *TruncTy = nullptr;
   bool Signed;
   for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
     if ((TruncTy =
              isSimpleCastedPHI(Add->getOperand(i), SymbolicPHI, Signed, *this)))
       if (FoundIndex == e) {
         FoundIndex = i;
         break;
       }
 
   if (FoundIndex == Add->getNumOperands())
     return None;
 
   // Create an add with everything but the specified operand.
   SmallVector<const SCEV *, 8> Ops;
   for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
     if (i != FoundIndex)
       Ops.push_back(Add->getOperand(i));
   const SCEV *Accum = getAddExpr(Ops);
 
   // The runtime checks will not be valid if the step amount is
   // varying inside the loop.
   if (!isLoopInvariant(Accum, L))
     return None;
 
   // *** Part2: Create the predicates
 
   // Analysis was successful: we have a phi-with-cast pattern for which we
   // can return an AddRec expression under the following predicates:
   //
   // P1: A Wrap predicate that guarantees that Trunc(Start) + i*Trunc(Accum)
   //     fits within the truncated type (does not overflow) for i = 0 to n-1.
   // P2: An Equal predicate that guarantees that
   //     Start = (Ext ix (Trunc iy (Start) to ix) to iy)
   // P3: An Equal predicate that guarantees that
   //     Accum = (Ext ix (Trunc iy (Accum) to ix) to iy)
   //
   // As we next prove, the above predicates guarantee that:
   //     Start + i*Accum = (Ext ix (Trunc iy ( Start + i*Accum ) to ix) to iy)
   //
   //
   // More formally, we want to prove that:
   //     Expr(i+1) = Start + (i+1) * Accum
   //               = (Ext ix (Trunc iy (Expr(i)) to ix) to iy) + Accum
   //
   // Given that:
   // 1) Expr(0) = Start
   // 2) Expr(1) = Start + Accum
   //            = (Ext ix (Trunc iy (Start) to ix) to iy) + Accum :: from P2
   // 3) Induction hypothesis (step i):
   //    Expr(i) = (Ext ix (Trunc iy (Expr(i-1)) to ix) to iy) + Accum
   //
   // Proof:
   //  Expr(i+1) =
   //   = Start + (i+1)*Accum
   //   = (Start + i*Accum) + Accum
   //   = Expr(i) + Accum
   //   = (Ext ix (Trunc iy (Expr(i-1)) to ix) to iy) + Accum + Accum
   //                                                             :: from step i
   //
   //   = (Ext ix (Trunc iy (Start + (i-1)*Accum) to ix) to iy) + Accum + Accum
   //
   //   = (Ext ix (Trunc iy (Start + (i-1)*Accum) to ix) to iy)
   //     + (Ext ix (Trunc iy (Accum) to ix) to iy)
   //     + Accum                                                     :: from P3
   //
   //   = (Ext ix (Trunc iy ((Start + (i-1)*Accum) + Accum) to ix) to iy)
   //     + Accum                            :: from P1: Ext(x)+Ext(y)=>Ext(x+y)
   //
   //   = (Ext ix (Trunc iy (Start + i*Accum) to ix) to iy) + Accum
   //   = (Ext ix (Trunc iy (Expr(i)) to ix) to iy) + Accum
   //
   // By induction, the same applies to all iterations 1<=i<n:
   //
 
   // Create a truncated addrec for which we will add a no overflow check (P1).
   const SCEV *StartVal = getSCEV(StartValueV);
   const SCEV *PHISCEV =
       getAddRecExpr(getTruncateExpr(StartVal, TruncTy),
                     getTruncateExpr(Accum, TruncTy), L, SCEV::FlagAnyWrap);
 
   // PHISCEV can be either a SCEVConstant or a SCEVAddRecExpr.
   // ex: If truncated Accum is 0 and StartVal is a constant, then PHISCEV
   // will be constant.
   //
   //  If PHISCEV is a constant, then P1 degenerates into P2 or P3, so we don't
   // add P1.
   if (const auto *AR = dyn_cast<SCEVAddRecExpr>(PHISCEV)) {
     SCEVWrapPredicate::IncrementWrapFlags AddedFlags =
         Signed ? SCEVWrapPredicate::IncrementNSSW
                : SCEVWrapPredicate::IncrementNUSW;
     const SCEVPredicate *AddRecPred = getWrapPredicate(AR, AddedFlags);
     Predicates.push_back(AddRecPred);
   }
 
   // Create the Equal Predicates P2,P3:
 
   // It is possible that the predicates P2 and/or P3 are computable at
   // compile time due to StartVal and/or Accum being constants.
   // If either one is, then we can check that now and escape if either P2
   // or P3 is false.
 
   // Construct the extended SCEV: (Ext ix (Trunc iy (Expr) to ix) to iy)
   // for each of StartVal and Accum
   auto getExtendedExpr = [&](const SCEV *Expr, 
                              bool CreateSignExtend) -> const SCEV * {
     assert(isLoopInvariant(Expr, L) && "Expr is expected to be invariant");
     const SCEV *TruncatedExpr = getTruncateExpr(Expr, TruncTy);
     const SCEV *ExtendedExpr =
         CreateSignExtend ? getSignExtendExpr(TruncatedExpr, Expr->getType())
                          : getZeroExtendExpr(TruncatedExpr, Expr->getType());
     return ExtendedExpr;
   };
 
   // Given:
   //  ExtendedExpr = (Ext ix (Trunc iy (Expr) to ix) to iy
   //               = getExtendedExpr(Expr)
   // Determine whether the predicate P: Expr == ExtendedExpr
   // is known to be false at compile time
   auto PredIsKnownFalse = [&](const SCEV *Expr,
                               const SCEV *ExtendedExpr) -> bool {
     return Expr != ExtendedExpr &&
            isKnownPredicate(ICmpInst::ICMP_NE, Expr, ExtendedExpr);
   };
 
   const SCEV *StartExtended = getExtendedExpr(StartVal, Signed);
   if (PredIsKnownFalse(StartVal, StartExtended)) {
     DEBUG(dbgs() << "P2 is compile-time false\n";);
     return None;
   }
 
   // The Step is always Signed (because the overflow checks are either
   // NSSW or NUSW)
   const SCEV *AccumExtended = getExtendedExpr(Accum, /*CreateSignExtend=*/true);
   if (PredIsKnownFalse(Accum, AccumExtended)) {
     DEBUG(dbgs() << "P3 is compile-time false\n";);
     return None;
   }
 
   auto AppendPredicate = [&](const SCEV *Expr,
                              const SCEV *ExtendedExpr) -> void {
     if (Expr != ExtendedExpr &&
         !isKnownPredicate(ICmpInst::ICMP_EQ, Expr, ExtendedExpr)) {
       const SCEVPredicate *Pred = getEqualPredicate(Expr, ExtendedExpr);
       DEBUG (dbgs() << "Added Predicate: " << *Pred);
       Predicates.push_back(Pred);
     }
   };
 
   AppendPredicate(StartVal, StartExtended);
   AppendPredicate(Accum, AccumExtended);
 
   // *** Part3: Predicates are ready. Now go ahead and create the new addrec in
   // which the casts had been folded away. The caller can rewrite SymbolicPHI
   // into NewAR if it will also add the runtime overflow checks specified in
   // Predicates.
   auto *NewAR = getAddRecExpr(StartVal, Accum, L, SCEV::FlagAnyWrap);
 
   std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>> PredRewrite =
       std::make_pair(NewAR, Predicates);
   // Remember the result of the analysis for this SCEV at this locayyytion.
   PredicatedSCEVRewrites[{SymbolicPHI, L}] = PredRewrite;
   return PredRewrite;
 }
 
 Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>>
 ScalarEvolution::createAddRecFromPHIWithCasts(const SCEVUnknown *SymbolicPHI) {
   auto *PN = cast<PHINode>(SymbolicPHI->getValue());
   const Loop *L = isIntegerLoopHeaderPHI(PN, LI);
   if (!L)
     return None;
 
   // Check to see if we already analyzed this PHI.
   auto I = PredicatedSCEVRewrites.find({SymbolicPHI, L});
   if (I != PredicatedSCEVRewrites.end()) {
     std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>> Rewrite =
         I->second;
     // Analysis was done before and failed to create an AddRec:
     if (Rewrite.first == SymbolicPHI)
       return None;
     // Analysis was done before and succeeded to create an AddRec under
     // a predicate:
     assert(isa<SCEVAddRecExpr>(Rewrite.first) && "Expected an AddRec");
     assert(!(Rewrite.second).empty() && "Expected to find Predicates");
     return Rewrite;
   }
 
   Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>>
     Rewrite = createAddRecFromPHIWithCastsImpl(SymbolicPHI);
 
   // Record in the cache that the analysis failed
   if (!Rewrite) {
     SmallVector<const SCEVPredicate *, 3> Predicates;
     PredicatedSCEVRewrites[{SymbolicPHI, L}] = {SymbolicPHI, Predicates};
     return None;
   }
 
   return Rewrite;
 }
 
 // FIXME: This utility is currently required because the Rewriter currently 
 // does not rewrite this expression: 
 // {0, +, (sext ix (trunc iy to ix) to iy)} 
 // into {0, +, %step},
 // even when the following Equal predicate exists: 
 // "%step == (sext ix (trunc iy to ix) to iy)".
 bool PredicatedScalarEvolution::areAddRecsEqualWithPreds(
     const SCEVAddRecExpr *AR1, const SCEVAddRecExpr *AR2) const {
   if (AR1 == AR2)
     return true;
 
   auto areExprsEqual = [&](const SCEV *Expr1, const SCEV *Expr2) -> bool {
     if (Expr1 != Expr2 && !Preds.implies(SE.getEqualPredicate(Expr1, Expr2)) &&
         !Preds.implies(SE.getEqualPredicate(Expr2, Expr1)))
       return false;
     return true;
   };
 
   if (!areExprsEqual(AR1->getStart(), AR2->getStart()) ||
       !areExprsEqual(AR1->getStepRecurrence(SE), AR2->getStepRecurrence(SE)))
     return false;
   return true;
 }
 
 /// A helper function for createAddRecFromPHI to handle simple cases.
 ///
 /// This function tries to find an AddRec expression for the simplest (yet most
 /// common) cases: PN = PHI(Start, OP(Self, LoopInvariant)).
 /// If it fails, createAddRecFromPHI will use a more general, but slow,
 /// technique for finding the AddRec expression.
 const SCEV *ScalarEvolution::createSimpleAffineAddRec(PHINode *PN,
                                                       Value *BEValueV,
                                                       Value *StartValueV) {
   const Loop *L = LI.getLoopFor(PN->getParent());
   assert(L && L->getHeader() == PN->getParent());
   assert(BEValueV && StartValueV);
 
   auto BO = MatchBinaryOp(BEValueV, DT);
   if (!BO)
     return nullptr;
 
   if (BO->Opcode != Instruction::Add)
     return nullptr;
 
   const SCEV *Accum = nullptr;
   if (BO->LHS == PN && L->isLoopInvariant(BO->RHS))
     Accum = getSCEV(BO->RHS);
   else if (BO->RHS == PN && L->isLoopInvariant(BO->LHS))
     Accum = getSCEV(BO->LHS);
 
   if (!Accum)
     return nullptr;
 
   SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
   if (BO->IsNUW)
     Flags = setFlags(Flags, SCEV::FlagNUW);
   if (BO->IsNSW)
     Flags = setFlags(Flags, SCEV::FlagNSW);
 
   const SCEV *StartVal = getSCEV(StartValueV);
   const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags);
 
   ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
 
   // We can add Flags to the post-inc expression only if we
   // know that it is *undefined behavior* for BEValueV to
   // overflow.
   if (auto *BEInst = dyn_cast<Instruction>(BEValueV))
     if (isLoopInvariant(Accum, L) && isAddRecNeverPoison(BEInst, L))
       (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags);
 
   return PHISCEV;
 }
 
 const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) {
   const Loop *L = LI.getLoopFor(PN->getParent());
   if (!L || L->getHeader() != PN->getParent())
     return nullptr;
 
   // The loop may have multiple entrances or multiple exits; we can analyze
   // this phi as an addrec if it has a unique entry value and a unique
   // backedge value.
   Value *BEValueV = nullptr, *StartValueV = nullptr;
   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
     Value *V = PN->getIncomingValue(i);
     if (L->contains(PN->getIncomingBlock(i))) {
       if (!BEValueV) {
         BEValueV = V;
       } else if (BEValueV != V) {
         BEValueV = nullptr;
         break;
       }
     } else if (!StartValueV) {
       StartValueV = V;
     } else if (StartValueV != V) {
       StartValueV = nullptr;
       break;
     }
   }
   if (!BEValueV || !StartValueV)
     return nullptr;
 
   assert(ValueExprMap.find_as(PN) == ValueExprMap.end() &&
          "PHI node already processed?");
 
   // First, try to find AddRec expression without creating a fictituos symbolic
   // value for PN.
   if (auto *S = createSimpleAffineAddRec(PN, BEValueV, StartValueV))
     return S;
 
   // Handle PHI node value symbolically.
   const SCEV *SymbolicName = getUnknown(PN);
   ValueExprMap.insert({SCEVCallbackVH(PN, this), SymbolicName});
 
   // Using this symbolic name for the PHI, analyze the value coming around
   // the back-edge.
   const SCEV *BEValue = getSCEV(BEValueV);
 
   // NOTE: If BEValue is loop invariant, we know that the PHI node just
   // has a special value for the first iteration of the loop.
 
   // If the value coming around the backedge is an add with the symbolic
   // value we just inserted, then we found a simple induction variable!
   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BEValue)) {
     // If there is a single occurrence of the symbolic value, replace it
     // with a recurrence.
     unsigned FoundIndex = Add->getNumOperands();
     for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
       if (Add->getOperand(i) == SymbolicName)
         if (FoundIndex == e) {
           FoundIndex = i;
           break;
         }
 
     if (FoundIndex != Add->getNumOperands()) {
       // Create an add with everything but the specified operand.
       SmallVector<const SCEV *, 8> Ops;
       for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
         if (i != FoundIndex)
           Ops.push_back(SCEVBackedgeConditionFolder::rewrite(Add->getOperand(i),
                                                              L, *this));
       const SCEV *Accum = getAddExpr(Ops);
 
       // This is not a valid addrec if the step amount is varying each
       // loop iteration, but is not itself an addrec in this loop.
       if (isLoopInvariant(Accum, L) ||
           (isa<SCEVAddRecExpr>(Accum) &&
            cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) {
         SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
 
         if (auto BO = MatchBinaryOp(BEValueV, DT)) {
           if (BO->Opcode == Instruction::Add && BO->LHS == PN) {
             if (BO->IsNUW)
               Flags = setFlags(Flags, SCEV::FlagNUW);
             if (BO->IsNSW)
               Flags = setFlags(Flags, SCEV::FlagNSW);
           }
         } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(BEValueV)) {
           // If the increment is an inbounds GEP, then we know the address
           // space cannot be wrapped around. We cannot make any guarantee
           // about signed or unsigned overflow because pointers are
           // unsigned but we may have a negative index from the base
           // pointer. We can guarantee that no unsigned wrap occurs if the
           // indices form a positive value.
           if (GEP->isInBounds() && GEP->getOperand(0) == PN) {
             Flags = setFlags(Flags, SCEV::FlagNW);
 
             const SCEV *Ptr = getSCEV(GEP->getPointerOperand());
             if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr)))
               Flags = setFlags(Flags, SCEV::FlagNUW);
           }
 
           // We cannot transfer nuw and nsw flags from subtraction
           // operations -- sub nuw X, Y is not the same as add nuw X, -Y
           // for instance.
         }
 
         const SCEV *StartVal = getSCEV(StartValueV);
         const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags);
 
         // Okay, for the entire analysis of this edge we assumed the PHI
         // to be symbolic.  We now need to go back and purge all of the
         // entries for the scalars that use the symbolic expression.
         forgetSymbolicName(PN, SymbolicName);
         ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
 
         // We can add Flags to the post-inc expression only if we
         // know that it is *undefined behavior* for BEValueV to
         // overflow.
         if (auto *BEInst = dyn_cast<Instruction>(BEValueV))
           if (isLoopInvariant(Accum, L) && isAddRecNeverPoison(BEInst, L))
             (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags);
 
         return PHISCEV;
       }
     }
   } else {
     // Otherwise, this could be a loop like this:
     //     i = 0;  for (j = 1; ..; ++j) { ....  i = j; }
     // In this case, j = {1,+,1}  and BEValue is j.
     // Because the other in-value of i (0) fits the evolution of BEValue
     // i really is an addrec evolution.
     //
     // We can generalize this saying that i is the shifted value of BEValue
     // by one iteration:
     //   PHI(f(0), f({1,+,1})) --> f({0,+,1})
     const SCEV *Shifted = SCEVShiftRewriter::rewrite(BEValue, L, *this);
     const SCEV *Start = SCEVInitRewriter::rewrite(Shifted, L, *this);
     if (Shifted != getCouldNotCompute() &&
         Start != getCouldNotCompute()) {
       const SCEV *StartVal = getSCEV(StartValueV);
       if (Start == StartVal) {
         // Okay, for the entire analysis of this edge we assumed the PHI
         // to be symbolic.  We now need to go back and purge all of the
         // entries for the scalars that use the symbolic expression.
         forgetSymbolicName(PN, SymbolicName);
         ValueExprMap[SCEVCallbackVH(PN, this)] = Shifted;
         return Shifted;
       }
     }
   }
 
   // Remove the temporary PHI node SCEV that has been inserted while intending
   // to create an AddRecExpr for this PHI node. We can not keep this temporary
   // as it will prevent later (possibly simpler) SCEV expressions to be added
   // to the ValueExprMap.
   eraseValueFromMap(PN);
 
   return nullptr;
 }
 
 // Checks if the SCEV S is available at BB.  S is considered available at BB
 // if S can be materialized at BB without introducing a fault.
 static bool IsAvailableOnEntry(const Loop *L, DominatorTree &DT, const SCEV *S,
                                BasicBlock *BB) {
   struct CheckAvailable {
     bool TraversalDone = false;
     bool Available = true;
 
     const Loop *L = nullptr;  // The loop BB is in (can be nullptr)
     BasicBlock *BB = nullptr;
     DominatorTree &DT;
 
     CheckAvailable(const Loop *L, BasicBlock *BB, DominatorTree &DT)
       : L(L), BB(BB), DT(DT) {}
 
     bool setUnavailable() {
       TraversalDone = true;
       Available = false;
       return false;
     }
 
     bool follow(const SCEV *S) {
       switch (S->getSCEVType()) {
       case scConstant: case scTruncate: case scZeroExtend: case scSignExtend:
       case scAddExpr: case scMulExpr: case scUMaxExpr: case scSMaxExpr:
         // These expressions are available if their operand(s) is/are.
         return true;
 
       case scAddRecExpr: {
         // We allow add recurrences that are on the loop BB is in, or some
         // outer loop.  This guarantees availability because the value of the
         // add recurrence at BB is simply the "current" value of the induction
         // variable.  We can relax this in the future; for instance an add
         // recurrence on a sibling dominating loop is also available at BB.
         const auto *ARLoop = cast<SCEVAddRecExpr>(S)->getLoop();
         if (L && (ARLoop == L || ARLoop->contains(L)))
           return true;
 
         return setUnavailable();
       }
 
       case scUnknown: {
         // For SCEVUnknown, we check for simple dominance.
         const auto *SU = cast<SCEVUnknown>(S);
         Value *V = SU->getValue();
 
         if (isa<Argument>(V))
           return false;
 
         if (isa<Instruction>(V) && DT.dominates(cast<Instruction>(V), BB))
           return false;
 
         return setUnavailable();
       }
 
       case scUDivExpr:
       case scCouldNotCompute:
         // We do not try to smart about these at all.
         return setUnavailable();
       }
       llvm_unreachable("switch should be fully covered!");
     }
 
     bool isDone() { return TraversalDone; }
   };
 
   CheckAvailable CA(L, BB, DT);
   SCEVTraversal<CheckAvailable> ST(CA);
 
   ST.visitAll(S);
   return CA.Available;
 }
 
 // Try to match a control flow sequence that branches out at BI and merges back
 // at Merge into a "C ? LHS : RHS" select pattern.  Return true on a successful
 // match.
 static bool BrPHIToSelect(DominatorTree &DT, BranchInst *BI, PHINode *Merge,
                           Value *&C, Value *&LHS, Value *&RHS) {
   C = BI->getCondition();
 
   BasicBlockEdge LeftEdge(BI->getParent(), BI->getSuccessor(0));
   BasicBlockEdge RightEdge(BI->getParent(), BI->getSuccessor(1));
 
   if (!LeftEdge.isSingleEdge())
     return false;
 
   assert(RightEdge.isSingleEdge() && "Follows from LeftEdge.isSingleEdge()");
 
   Use &LeftUse = Merge->getOperandUse(0);
   Use &RightUse = Merge->getOperandUse(1);
 
   if (DT.dominates(LeftEdge, LeftUse) && DT.dominates(RightEdge, RightUse)) {
     LHS = LeftUse;
     RHS = RightUse;
     return true;
   }
 
   if (DT.dominates(LeftEdge, RightUse) && DT.dominates(RightEdge, LeftUse)) {
     LHS = RightUse;
     RHS = LeftUse;
     return true;
   }
 
   return false;
 }
 
 const SCEV *ScalarEvolution::createNodeFromSelectLikePHI(PHINode *PN) {
   auto IsReachable =
       [&](BasicBlock *BB) { return DT.isReachableFromEntry(BB); };
   if (PN->getNumIncomingValues() == 2 && all_of(PN->blocks(), IsReachable)) {
     const Loop *L = LI.getLoopFor(PN->getParent());
 
     // We don't want to break LCSSA, even in a SCEV expression tree.
     for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
       if (LI.getLoopFor(PN->getIncomingBlock(i)) != L)
         return nullptr;
 
     // Try to match
     //
     //  br %cond, label %left, label %right
     // left:
     //  br label %merge
     // right:
     //  br label %merge
     // merge:
     //  V = phi [ %x, %left ], [ %y, %right ]
     //
     // as "select %cond, %x, %y"
 
     BasicBlock *IDom = DT[PN->getParent()]->getIDom()->getBlock();
     assert(IDom && "At least the entry block should dominate PN");
 
     auto *BI = dyn_cast<BranchInst>(IDom->getTerminator());
     Value *Cond = nullptr, *LHS = nullptr, *RHS = nullptr;
 
     if (BI && BI->isConditional() &&
         BrPHIToSelect(DT, BI, PN, Cond, LHS, RHS) &&
         IsAvailableOnEntry(L, DT, getSCEV(LHS), PN->getParent()) &&
         IsAvailableOnEntry(L, DT, getSCEV(RHS), PN->getParent()))
       return createNodeForSelectOrPHI(PN, Cond, LHS, RHS);
   }
 
   return nullptr;
 }
 
 const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
   if (const SCEV *S = createAddRecFromPHI(PN))
     return S;
 
   if (const SCEV *S = createNodeFromSelectLikePHI(PN))
     return S;
 
   // If the PHI has a single incoming value, follow that value, unless the
   // PHI's incoming blocks are in a different loop, in which case doing so
   // risks breaking LCSSA form. Instcombine would normally zap these, but
   // it doesn't have DominatorTree information, so it may miss cases.
   if (Value *V = SimplifyInstruction(PN, {getDataLayout(), &TLI, &DT, &AC}))
     if (LI.replacementPreservesLCSSAForm(PN, V))
       return getSCEV(V);
 
   // If it's not a loop phi, we can't handle it yet.
   return getUnknown(PN);
 }
 
 const SCEV *ScalarEvolution::createNodeForSelectOrPHI(Instruction *I,
                                                       Value *Cond,
                                                       Value *TrueVal,
                                                       Value *FalseVal) {
   // Handle "constant" branch or select. This can occur for instance when a
   // loop pass transforms an inner loop and moves on to process the outer loop.
   if (auto *CI = dyn_cast<ConstantInt>(Cond))
     return getSCEV(CI->isOne() ? TrueVal : FalseVal);
 
   // Try to match some simple smax or umax patterns.
   auto *ICI = dyn_cast<ICmpInst>(Cond);
   if (!ICI)
     return getUnknown(I);
 
   Value *LHS = ICI->getOperand(0);
   Value *RHS = ICI->getOperand(1);
 
   switch (ICI->getPredicate()) {
   case ICmpInst::ICMP_SLT:
   case ICmpInst::ICMP_SLE:
     std::swap(LHS, RHS);
     LLVM_FALLTHROUGH;
   case ICmpInst::ICMP_SGT:
   case ICmpInst::ICMP_SGE:
     // a >s b ? a+x : b+x  ->  smax(a, b)+x
     // a >s b ? b+x : a+x  ->  smin(a, b)+x
     if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType())) {
       const SCEV *LS = getNoopOrSignExtend(getSCEV(LHS), I->getType());
       const SCEV *RS = getNoopOrSignExtend(getSCEV(RHS), I->getType());
       const SCEV *LA = getSCEV(TrueVal);
       const SCEV *RA = getSCEV(FalseVal);
       const SCEV *LDiff = getMinusSCEV(LA, LS);
       const SCEV *RDiff = getMinusSCEV(RA, RS);
       if (LDiff == RDiff)
         return getAddExpr(getSMaxExpr(LS, RS), LDiff);
       LDiff = getMinusSCEV(LA, RS);
       RDiff = getMinusSCEV(RA, LS);
       if (LDiff == RDiff)
         return getAddExpr(getSMinExpr(LS, RS), LDiff);
     }
     break;
   case ICmpInst::ICMP_ULT:
   case ICmpInst::ICMP_ULE:
     std::swap(LHS, RHS);
     LLVM_FALLTHROUGH;
   case ICmpInst::ICMP_UGT:
   case ICmpInst::ICMP_UGE:
     // a >u b ? a+x : b+x  ->  umax(a, b)+x
     // a >u b ? b+x : a+x  ->  umin(a, b)+x
     if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType())) {
       const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType());
       const SCEV *RS = getNoopOrZeroExtend(getSCEV(RHS), I->getType());
       const SCEV *LA = getSCEV(TrueVal);
       const SCEV *RA = getSCEV(FalseVal);
       const SCEV *LDiff = getMinusSCEV(LA, LS);
       const SCEV *RDiff = getMinusSCEV(RA, RS);
       if (LDiff == RDiff)
         return getAddExpr(getUMaxExpr(LS, RS), LDiff);
       LDiff = getMinusSCEV(LA, RS);
       RDiff = getMinusSCEV(RA, LS);
       if (LDiff == RDiff)
         return getAddExpr(getUMinExpr(LS, RS), LDiff);
     }
     break;
   case ICmpInst::ICMP_NE:
     // n != 0 ? n+x : 1+x  ->  umax(n, 1)+x
     if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType()) &&
         isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) {
       const SCEV *One = getOne(I->getType());
       const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType());
       const SCEV *LA = getSCEV(TrueVal);
       const SCEV *RA = getSCEV(FalseVal);
       const SCEV *LDiff = getMinusSCEV(LA, LS);
       const SCEV *RDiff = getMinusSCEV(RA, One);
       if (LDiff == RDiff)
         return getAddExpr(getUMaxExpr(One, LS), LDiff);
     }
     break;
   case ICmpInst::ICMP_EQ:
     // n == 0 ? 1+x : n+x  ->  umax(n, 1)+x
     if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType()) &&
         isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) {
       const SCEV *One = getOne(I->getType());
       const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType());
       const SCEV *LA = getSCEV(TrueVal);
       const SCEV *RA = getSCEV(FalseVal);
       const SCEV *LDiff = getMinusSCEV(LA, One);
       const SCEV *RDiff = getMinusSCEV(RA, LS);
       if (LDiff == RDiff)
         return getAddExpr(getUMaxExpr(One, LS), LDiff);
     }
     break;
   default:
     break;
   }
 
   return getUnknown(I);
 }
 
 /// Expand GEP instructions into add and multiply operations. This allows them
 /// to be analyzed by regular SCEV code.
 const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
   // Don't attempt to analyze GEPs over unsized objects.
   if (!GEP->getSourceElementType()->isSized())
     return getUnknown(GEP);
 
   SmallVector<const SCEV *, 4> IndexExprs;
   for (auto Index = GEP->idx_begin(); Index != GEP->idx_end(); ++Index)
     IndexExprs.push_back(getSCEV(*Index));
   return getGEPExpr(GEP, IndexExprs);
 }
 
 uint32_t ScalarEvolution::GetMinTrailingZerosImpl(const SCEV *S) {
   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
     return C->getAPInt().countTrailingZeros();
 
   if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(S))
     return std::min(GetMinTrailingZeros(T->getOperand()),
                     (uint32_t)getTypeSizeInBits(T->getType()));
 
   if (const SCEVZeroExtendExpr *E = dyn_cast<SCEVZeroExtendExpr>(S)) {
     uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
     return OpRes == getTypeSizeInBits(E->getOperand()->getType())
                ? getTypeSizeInBits(E->getType())
                : OpRes;
   }
 
   if (const SCEVSignExtendExpr *E = dyn_cast<SCEVSignExtendExpr>(S)) {
     uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
     return OpRes == getTypeSizeInBits(E->getOperand()->getType())
                ? getTypeSizeInBits(E->getType())
                : OpRes;
   }
 
   if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
     // The result is the min of all operands results.
     uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0));
     for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
       MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i)));
     return MinOpRes;
   }
 
   if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
     // The result is the sum of all operands results.
     uint32_t SumOpRes = GetMinTrailingZeros(M->getOperand(0));
     uint32_t BitWidth = getTypeSizeInBits(M->getType());
     for (unsigned i = 1, e = M->getNumOperands();
          SumOpRes != BitWidth && i != e; ++i)
       SumOpRes =
           std::min(SumOpRes + GetMinTrailingZeros(M->getOperand(i)), BitWidth);
     return SumOpRes;
   }
 
   if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) {
     // The result is the min of all operands results.
     uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0));
     for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
       MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i)));
     return MinOpRes;
   }
 
   if (const SCEVSMaxExpr *M = dyn_cast<SCEVSMaxExpr>(S)) {
     // The result is the min of all operands results.
     uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0));
     for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
       MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i)));
     return MinOpRes;
   }
 
   if (const SCEVUMaxExpr *M = dyn_cast<SCEVUMaxExpr>(S)) {
     // The result is the min of all operands results.
     uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0));
     for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
       MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i)));
     return MinOpRes;
   }
 
   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
     // For a SCEVUnknown, ask ValueTracking.
     KnownBits Known = computeKnownBits(U->getValue(), getDataLayout(), 0, &AC, nullptr, &DT);
     return Known.countMinTrailingZeros();
   }
 
   // SCEVUDivExpr
   return 0;
 }
 
 uint32_t ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
   auto I = MinTrailingZerosCache.find(S);
   if (I != MinTrailingZerosCache.end())
     return I->second;
 
   uint32_t Result = GetMinTrailingZerosImpl(S);
   auto InsertPair = MinTrailingZerosCache.insert({S, Result});
   assert(InsertPair.second && "Should insert a new key");
   return InsertPair.first->second;
 }
 
 /// Helper method to assign a range to V from metadata present in the IR.
 static Optional<ConstantRange> GetRangeFromMetadata(Value *V) {
   if (Instruction *I = dyn_cast<Instruction>(V))
     if (MDNode *MD = I->getMetadata(LLVMContext::MD_range))
       return getConstantRangeFromMetadata(*MD);
 
   return None;
 }
 
 /// Determine the range for a particular SCEV.  If SignHint is
 /// HINT_RANGE_UNSIGNED (resp. HINT_RANGE_SIGNED) then getRange prefers ranges
 /// with a "cleaner" unsigned (resp. signed) representation.
 const ConstantRange &
 ScalarEvolution::getRangeRef(const SCEV *S,
                              ScalarEvolution::RangeSignHint SignHint) {
   DenseMap<const SCEV *, ConstantRange> &Cache =
       SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED ? UnsignedRanges
                                                        : SignedRanges;
 
   // See if we've computed this range already.
   DenseMap<const SCEV *, ConstantRange>::iterator I = Cache.find(S);
   if (I != Cache.end())
     return I->second;
 
   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
     return setRange(C, SignHint, ConstantRange(C->getAPInt()));
 
   unsigned BitWidth = getTypeSizeInBits(S->getType());
   ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true);
 
   // If the value has known zeros, the maximum value will have those known zeros
   // as well.
   uint32_t TZ = GetMinTrailingZeros(S);
   if (TZ != 0) {
     if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED)
       ConservativeResult =
           ConstantRange(APInt::getMinValue(BitWidth),
                         APInt::getMaxValue(BitWidth).lshr(TZ).shl(TZ) + 1);
     else
       ConservativeResult = ConstantRange(
           APInt::getSignedMinValue(BitWidth),
           APInt::getSignedMaxValue(BitWidth).ashr(TZ).shl(TZ) + 1);
   }
 
   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
     ConstantRange X = getRangeRef(Add->getOperand(0), SignHint);
     for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
       X = X.add(getRangeRef(Add->getOperand(i), SignHint));
     return setRange(Add, SignHint, ConservativeResult.intersectWith(X));
   }
 
   if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
     ConstantRange X = getRangeRef(Mul->getOperand(0), SignHint);
     for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
       X = X.multiply(getRangeRef(Mul->getOperand(i), SignHint));
     return setRange(Mul, SignHint, ConservativeResult.intersectWith(X));
   }
 
   if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
     ConstantRange X = getRangeRef(SMax->getOperand(0), SignHint);
     for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
       X = X.smax(getRangeRef(SMax->getOperand(i), SignHint));
     return setRange(SMax, SignHint, ConservativeResult.intersectWith(X));
   }
 
   if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
     ConstantRange X = getRangeRef(UMax->getOperand(0), SignHint);
     for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
       X = X.umax(getRangeRef(UMax->getOperand(i), SignHint));
     return setRange(UMax, SignHint, ConservativeResult.intersectWith(X));
   }
 
   if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
     ConstantRange X = getRangeRef(UDiv->getLHS(), SignHint);
     ConstantRange Y = getRangeRef(UDiv->getRHS(), SignHint);
     return setRange(UDiv, SignHint,
                     ConservativeResult.intersectWith(X.udiv(Y)));
   }
 
   if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
     ConstantRange X = getRangeRef(ZExt->getOperand(), SignHint);
     return setRange(ZExt, SignHint,
                     ConservativeResult.intersectWith(X.zeroExtend(BitWidth)));
   }
 
   if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
     ConstantRange X = getRangeRef(SExt->getOperand(), SignHint);
     return setRange(SExt, SignHint,
                     ConservativeResult.intersectWith(X.signExtend(BitWidth)));
   }
 
   if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
     ConstantRange X = getRangeRef(Trunc->getOperand(), SignHint);
     return setRange(Trunc, SignHint,
                     ConservativeResult.intersectWith(X.truncate(BitWidth)));
   }
 
   if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
     // If there's no unsigned wrap, the value will never be less than its
     // initial value.
     if (AddRec->hasNoUnsignedWrap())
       if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart()))
         if (!C->getValue()->isZero())
           ConservativeResult = ConservativeResult.intersectWith(
               ConstantRange(C->getAPInt(), APInt(BitWidth, 0)));
 
     // If there's no signed wrap, and all the operands have the same sign or
     // zero, the value won't ever change sign.
     if (AddRec->hasNoSignedWrap()) {
       bool AllNonNeg = true;
       bool AllNonPos = true;
       for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
         if (!isKnownNonNegative(AddRec->getOperand(i))) AllNonNeg = false;
         if (!isKnownNonPositive(AddRec->getOperand(i))) AllNonPos = false;
       }
       if (AllNonNeg)
         ConservativeResult = ConservativeResult.intersectWith(
           ConstantRange(APInt(BitWidth, 0),
                         APInt::getSignedMinValue(BitWidth)));
       else if (AllNonPos)
         ConservativeResult = ConservativeResult.intersectWith(
           ConstantRange(APInt::getSignedMinValue(BitWidth),
                         APInt(BitWidth, 1)));
     }
 
     // TODO: non-affine addrec
     if (AddRec->isAffine()) {
       const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
       if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
           getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
         auto RangeFromAffine = getRangeForAffineAR(
             AddRec->getStart(), AddRec->getStepRecurrence(*this), MaxBECount,
             BitWidth);
         if (!RangeFromAffine.isFullSet())
           ConservativeResult =
               ConservativeResult.intersectWith(RangeFromAffine);
 
         auto RangeFromFactoring = getRangeViaFactoring(
             AddRec->getStart(), AddRec->getStepRecurrence(*this), MaxBECount,
             BitWidth);
         if (!RangeFromFactoring.isFullSet())
           ConservativeResult =
               ConservativeResult.intersectWith(RangeFromFactoring);
       }
     }
 
     return setRange(AddRec, SignHint, std::move(ConservativeResult));
   }
 
   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
     // Check if the IR explicitly contains !range metadata.
     Optional<ConstantRange> MDRange = GetRangeFromMetadata(U->getValue());
     if (MDRange.hasValue())
       ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue());
 
     // Split here to avoid paying the compile-time cost of calling both
     // computeKnownBits and ComputeNumSignBits.  This restriction can be lifted
     // if needed.
     const DataLayout &DL = getDataLayout();
     if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) {
       // For a SCEVUnknown, ask ValueTracking.
       KnownBits Known = computeKnownBits(U->getValue(), DL, 0, &AC, nullptr, &DT);
       if (Known.One != ~Known.Zero + 1)
         ConservativeResult =
             ConservativeResult.intersectWith(ConstantRange(Known.One,
                                                            ~Known.Zero + 1));
     } else {
       assert(SignHint == ScalarEvolution::HINT_RANGE_SIGNED &&
              "generalize as needed!");
       unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, &AC, nullptr, &DT);
       if (NS > 1)
         ConservativeResult = ConservativeResult.intersectWith(
             ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1),
                           APInt::getSignedMaxValue(BitWidth).ashr(NS - 1) + 1));
     }
 
     return setRange(U, SignHint, std::move(ConservativeResult));
   }
 
   return setRange(S, SignHint, std::move(ConservativeResult));
 }
 
 // Given a StartRange, Step and MaxBECount for an expression compute a range of
 // values that the expression can take. Initially, the expression has a value
 // from StartRange and then is changed by Step up to MaxBECount times. Signed
 // argument defines if we treat Step as signed or unsigned.
 static ConstantRange getRangeForAffineARHelper(APInt Step,
                                                const ConstantRange &StartRange,
                                                const APInt &MaxBECount,
                                                unsigned BitWidth, bool Signed) {
   // If either Step or MaxBECount is 0, then the expression won't change, and we
   // just need to return the initial range.
   if (Step == 0 || MaxBECount == 0)
     return StartRange;
 
   // If we don't know anything about the initial value (i.e. StartRange is
   // FullRange), then we don't know anything about the final range either.
   // Return FullRange.
   if (StartRange.isFullSet())
     return ConstantRange(BitWidth, /* isFullSet = */ true);
 
   // If Step is signed and negative, then we use its absolute value, but we also
   // note that we're moving in the opposite direction.
   bool Descending = Signed && Step.isNegative();
 
   if (Signed)
     // This is correct even for INT_SMIN. Let's look at i8 to illustrate this:
     // abs(INT_SMIN) = abs(-128) = abs(0x80) = -0x80 = 0x80 = 128.
     // This equations hold true due to the well-defined wrap-around behavior of
     // APInt.
     Step = Step.abs();
 
   // Check if Offset is more than full span of BitWidth. If it is, the
   // expression is guaranteed to overflow.
   if (APInt::getMaxValue(StartRange.getBitWidth()).udiv(Step).ult(MaxBECount))
     return ConstantRange(BitWidth, /* isFullSet = */ true);
 
   // Offset is by how much the expression can change. Checks above guarantee no
   // overflow here.
   APInt Offset = Step * MaxBECount;
 
   // Minimum value of the final range will match the minimal value of StartRange
   // if the expression is increasing and will be decreased by Offset otherwise.
   // Maximum value of the final range will match the maximal value of StartRange
   // if the expression is decreasing and will be increased by Offset otherwise.
   APInt StartLower = StartRange.getLower();
   APInt StartUpper = StartRange.getUpper() - 1;
   APInt MovedBoundary = Descending ? (StartLower - std::move(Offset))
                                    : (StartUpper + std::move(Offset));
 
   // It's possible that the new minimum/maximum value will fall into the initial
   // range (due to wrap around). This means that the expression can take any
   // value in this bitwidth, and we have to return full range.
   if (StartRange.contains(MovedBoundary))
     return ConstantRange(BitWidth, /* isFullSet = */ true);
 
   APInt NewLower =
       Descending ? std::move(MovedBoundary) : std::move(StartLower);
   APInt NewUpper =
       Descending ? std::move(StartUpper) : std::move(MovedBoundary);
   NewUpper += 1;
 
   // If we end up with full range, return a proper full range.
   if (NewLower == NewUpper)
     return ConstantRange(BitWidth, /* isFullSet = */ true);
 
   // No overflow detected, return [StartLower, StartUpper + Offset + 1) range.
   return ConstantRange(std::move(NewLower), std::move(NewUpper));
 }
 
 ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start,
                                                    const SCEV *Step,
                                                    const SCEV *MaxBECount,
                                                    unsigned BitWidth) {
   assert(!isa<SCEVCouldNotCompute>(MaxBECount) &&
          getTypeSizeInBits(MaxBECount->getType()) <= BitWidth &&
          "Precondition!");
 
   MaxBECount = getNoopOrZeroExtend(MaxBECount, Start->getType());
   APInt MaxBECountValue = getUnsignedRangeMax(MaxBECount);
 
   // First, consider step signed.
   ConstantRange StartSRange = getSignedRange(Start);
   ConstantRange StepSRange = getSignedRange(Step);
 
   // If Step can be both positive and negative, we need to find ranges for the
   // maximum absolute step values in both directions and union them.
   ConstantRange SR =
       getRangeForAffineARHelper(StepSRange.getSignedMin(), StartSRange,
                                 MaxBECountValue, BitWidth, /* Signed = */ true);
   SR = SR.unionWith(getRangeForAffineARHelper(StepSRange.getSignedMax(),
                                               StartSRange, MaxBECountValue,
                                               BitWidth, /* Signed = */ true));
 
   // Next, consider step unsigned.
   ConstantRange UR = getRangeForAffineARHelper(
       getUnsignedRangeMax(Step), getUnsignedRange(Start),
       MaxBECountValue, BitWidth, /* Signed = */ false);
 
   // Finally, intersect signed and unsigned ranges.
   return SR.intersectWith(UR);
 }
 
 ConstantRange ScalarEvolution::getRangeViaFactoring(const SCEV *Start,
                                                     const SCEV *Step,
                                                     const SCEV *MaxBECount,
                                                     unsigned BitWidth) {
   //    RangeOf({C?A:B,+,C?P:Q}) == RangeOf(C?{A,+,P}:{B,+,Q})
   // == RangeOf({A,+,P}) union RangeOf({B,+,Q})
 
   struct SelectPattern {
     Value *Condition = nullptr;
     APInt TrueValue;
     APInt FalseValue;
 
     explicit SelectPattern(ScalarEvolution &SE, unsigned BitWidth,
                            const SCEV *S) {
       Optional<unsigned> CastOp;
       APInt Offset(BitWidth, 0);
 
       assert(SE.getTypeSizeInBits(S->getType()) == BitWidth &&
              "Should be!");
 
       // Peel off a constant offset:
       if (auto *SA = dyn_cast<SCEVAddExpr>(S)) {
         // In the future we could consider being smarter here and handle
         // {Start+Step,+,Step} too.
         if (SA->getNumOperands() != 2 || !isa<SCEVConstant>(SA->getOperand(0)))
           return;
 
         Offset = cast<SCEVConstant>(SA->getOperand(0))->getAPInt();
         S = SA->getOperand(1);
       }
 
       // Peel off a cast operation
       if (auto *SCast = dyn_cast<SCEVCastExpr>(S)) {
         CastOp = SCast->getSCEVType();
         S = SCast->getOperand();
       }
 
       using namespace llvm::PatternMatch;
 
       auto *SU = dyn_cast<SCEVUnknown>(S);
       const APInt *TrueVal, *FalseVal;
       if (!SU ||
           !match(SU->getValue(), m_Select(m_Value(Condition), m_APInt(TrueVal),
                                           m_APInt(FalseVal)))) {
         Condition = nullptr;
         return;
       }
 
       TrueValue = *TrueVal;
       FalseValue = *FalseVal;
 
       // Re-apply the cast we peeled off earlier
       if (CastOp.hasValue())
         switch (*CastOp) {
         default:
           llvm_unreachable("Unknown SCEV cast type!");
 
         case scTruncate:
           TrueValue = TrueValue.trunc(BitWidth);
           FalseValue = FalseValue.trunc(BitWidth);
           break;
         case scZeroExtend:
           TrueValue = TrueValue.zext(BitWidth);
           FalseValue = FalseValue.zext(BitWidth);
           break;
         case scSignExtend:
           TrueValue = TrueValue.sext(BitWidth);
           FalseValue = FalseValue.sext(BitWidth);
           break;
         }
 
       // Re-apply the constant offset we peeled off earlier
       TrueValue += Offset;
       FalseValue += Offset;
     }
 
     bool isRecognized() { return Condition != nullptr; }
   };
 
   SelectPattern StartPattern(*this, BitWidth, Start);
   if (!StartPattern.isRecognized())
     return ConstantRange(BitWidth, /* isFullSet = */ true);
 
   SelectPattern StepPattern(*this, BitWidth, Step);
   if (!StepPattern.isRecognized())
     return ConstantRange(BitWidth, /* isFullSet = */ true);
 
   if (StartPattern.Condition != StepPattern.Condition) {
     // We don't handle this case today; but we could, by considering four
     // possibilities below instead of two. I'm not sure if there are cases where
     // that will help over what getRange already does, though.
     return ConstantRange(BitWidth, /* isFullSet = */ true);
   }
 
   // NB! Calling ScalarEvolution::getConstant is fine, but we should not try to
   // construct arbitrary general SCEV expressions here.  This function is called
   // from deep in the call stack, and calling getSCEV (on a sext instruction,
   // say) can end up caching a suboptimal value.
 
   // FIXME: without the explicit `this` receiver below, MSVC errors out with
   // C2352 and C2512 (otherwise it isn't needed).
 
   const SCEV *TrueStart = this->getConstant(StartPattern.TrueValue);
   const SCEV *TrueStep = this->getConstant(StepPattern.TrueValue);
   const SCEV *FalseStart = this->getConstant(StartPattern.FalseValue);
   const SCEV *FalseStep = this->getConstant(StepPattern.FalseValue);
 
   ConstantRange TrueRange =
       this->getRangeForAffineAR(TrueStart, TrueStep, MaxBECount, BitWidth);
   ConstantRange FalseRange =
       this->getRangeForAffineAR(FalseStart, FalseStep, MaxBECount, BitWidth);
 
   return TrueRange.unionWith(FalseRange);
 }
 
 SCEV::NoWrapFlags ScalarEvolution::getNoWrapFlagsFromUB(const Value *V) {
   if (isa<ConstantExpr>(V)) return SCEV::FlagAnyWrap;
   const BinaryOperator *BinOp = cast<BinaryOperator>(V);
 
   // Return early if there are no flags to propagate to the SCEV.
   SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
   if (BinOp->hasNoUnsignedWrap())
     Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW);
   if (BinOp->hasNoSignedWrap())
     Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW);
   if (Flags == SCEV::FlagAnyWrap)
     return SCEV::FlagAnyWrap;
 
   return isSCEVExprNeverPoison(BinOp) ? Flags : SCEV::FlagAnyWrap;
 }
 
 bool ScalarEvolution::isSCEVExprNeverPoison(const Instruction *I) {
   // Here we check that I is in the header of the innermost loop containing I,
   // since we only deal with instructions in the loop header. The actual loop we
   // need to check later will come from an add recurrence, but getting that
   // requires computing the SCEV of the operands, which can be expensive. This
   // check we can do cheaply to rule out some cases early.
   Loop *InnermostContainingLoop = LI.getLoopFor(I->getParent());
   if (InnermostContainingLoop == nullptr ||
       InnermostContainingLoop->getHeader() != I->getParent())
     return false;
 
   // Only proceed if we can prove that I does not yield poison.
   if (!programUndefinedIfFullPoison(I))
     return false;
 
   // At this point we know that if I is executed, then it does not wrap
   // according to at least one of NSW or NUW. If I is not executed, then we do
   // not know if the calculation that I represents would wrap. Multiple
   // instructions can map to the same SCEV. If we apply NSW or NUW from I to
   // the SCEV, we must guarantee no wrapping for that SCEV also when it is
   // derived from other instructions that map to the same SCEV. We cannot make
   // that guarantee for cases where I is not executed. So we need to find the
   // loop that I is considered in relation to and prove that I is executed for
   // every iteration of that loop. That implies that the value that I
   // calculates does not wrap anywhere in the loop, so then we can apply the
   // flags to the SCEV.
   //
   // We check isLoopInvariant to disambiguate in case we are adding recurrences
   // from different loops, so that we know which loop to prove that I is
   // executed in.
   for (unsigned OpIndex = 0; OpIndex < I->getNumOperands(); ++OpIndex) {
     // I could be an extractvalue from a call to an overflow intrinsic.
     // TODO: We can do better here in some cases.
     if (!isSCEVable(I->getOperand(OpIndex)->getType()))
       return false;
     const SCEV *Op = getSCEV(I->getOperand(OpIndex));
     if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
       bool AllOtherOpsLoopInvariant = true;
       for (unsigned OtherOpIndex = 0; OtherOpIndex < I->getNumOperands();
            ++OtherOpIndex) {
         if (OtherOpIndex != OpIndex) {
           const SCEV *OtherOp = getSCEV(I->getOperand(OtherOpIndex));
           if (!isLoopInvariant(OtherOp, AddRec->getLoop())) {
             AllOtherOpsLoopInvariant = false;
             break;
           }
         }
       }
       if (AllOtherOpsLoopInvariant &&
           isGuaranteedToExecuteForEveryIteration(I, AddRec->getLoop()))
         return true;
     }
   }
   return false;
 }
 
 bool ScalarEvolution::isAddRecNeverPoison(const Instruction *I, const Loop *L) {
   // If we know that \c I can never be poison period, then that's enough.
   if (isSCEVExprNeverPoison(I))
     return true;
 
   // For an add recurrence specifically, we assume that infinite loops without
   // side effects are undefined behavior, and then reason as follows:
   //
   // If the add recurrence is poison in any iteration, it is poison on all
   // future iterations (since incrementing poison yields poison). If the result
   // of the add recurrence is fed into the loop latch condition and the loop
   // does not contain any throws or exiting blocks other than the latch, we now
   // have the ability to "choose" whether the backedge is taken or not (by
   // choosing a sufficiently evil value for the poison feeding into the branch)
   // for every iteration including and after the one in which \p I first became
   // poison.  There are two possibilities (let's call the iteration in which \p
   // I first became poison as K):
   //
   //  1. In the set of iterations including and after K, the loop body executes
   //     no side effects.  In this case executing the backege an infinte number
   //     of times will yield undefined behavior.
   //
   //  2. In the set of iterations including and after K, the loop body executes
   //     at least one side effect.  In this case, that specific instance of side
   //     effect is control dependent on poison, which also yields undefined
   //     behavior.
 
   auto *ExitingBB = L->getExitingBlock();
   auto *LatchBB = L->getLoopLatch();
   if (!ExitingBB || !LatchBB || ExitingBB != LatchBB)
     return false;
 
   SmallPtrSet<const Instruction *, 16> Pushed;
   SmallVector<const Instruction *, 8> PoisonStack;
 
   // We start by assuming \c I, the post-inc add recurrence, is poison.  Only
   // things that are known to be fully poison under that assumption go on the
   // PoisonStack.
   Pushed.insert(I);
   PoisonStack.push_back(I);
 
   bool LatchControlDependentOnPoison = false;
   while (!PoisonStack.empty() && !LatchControlDependentOnPoison) {
     const Instruction *Poison = PoisonStack.pop_back_val();
 
     for (auto *PoisonUser : Poison->users()) {
       if (propagatesFullPoison(cast<Instruction>(PoisonUser))) {
         if (Pushed.insert(cast<Instruction>(PoisonUser)).second)
           PoisonStack.push_back(cast<Instruction>(PoisonUser));
       } else if (auto *BI = dyn_cast<BranchInst>(PoisonUser)) {
         assert(BI->isConditional() && "Only possibility!");
         if (BI->getParent() == LatchBB) {
           LatchControlDependentOnPoison = true;
           break;
         }
       }
     }
   }
 
   return LatchControlDependentOnPoison && loopHasNoAbnormalExits(L);
 }
 
 ScalarEvolution::LoopProperties
 ScalarEvolution::getLoopProperties(const Loop *L) {
   using LoopProperties = ScalarEvolution::LoopProperties;
 
   auto Itr = LoopPropertiesCache.find(L);
   if (Itr == LoopPropertiesCache.end()) {
     auto HasSideEffects = [](Instruction *I) {
       if (auto *SI = dyn_cast<StoreInst>(I))
         return !SI->isSimple();
 
       return I->mayHaveSideEffects();
     };
 
     LoopProperties LP = {/* HasNoAbnormalExits */ true,
                          /*HasNoSideEffects*/ true};
 
     for (auto *BB : L->getBlocks())
       for (auto &I : *BB) {
         if (!isGuaranteedToTransferExecutionToSuccessor(&I))
           LP.HasNoAbnormalExits = false;
         if (HasSideEffects(&I))
           LP.HasNoSideEffects = false;
         if (!LP.HasNoAbnormalExits && !LP.HasNoSideEffects)
           break; // We're already as pessimistic as we can get.
       }
 
     auto InsertPair = LoopPropertiesCache.insert({L, LP});
     assert(InsertPair.second && "We just checked!");
     Itr = InsertPair.first;
   }
 
   return Itr->second;
 }
 
 const SCEV *ScalarEvolution::createSCEV(Value *V) {
   if (!isSCEVable(V->getType()))
     return getUnknown(V);
 
   if (Instruction *I = dyn_cast<Instruction>(V)) {
     // Don't attempt to analyze instructions in blocks that aren't
     // reachable. Such instructions don't matter, and they aren't required
     // to obey basic rules for definitions dominating uses which this
     // analysis depends on.
     if (!DT.isReachableFromEntry(I->getParent()))
       return getUnknown(V);
   } else if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
     return getConstant(CI);
   else if (isa<ConstantPointerNull>(V))
     return getZero(V->getType());
   else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
     return GA->isInterposable() ? getUnknown(V) : getSCEV(GA->getAliasee());
   else if (!isa<ConstantExpr>(V))
     return getUnknown(V);
 
   Operator *U = cast<Operator>(V);
   if (auto BO = MatchBinaryOp(U, DT)) {
     switch (BO->Opcode) {
     case Instruction::Add: {
       // The simple thing to do would be to just call getSCEV on both operands
       // and call getAddExpr with the result. However if we're looking at a
       // bunch of things all added together, this can be quite inefficient,
       // because it leads to N-1 getAddExpr calls for N ultimate operands.
       // Instead, gather up all the operands and make a single getAddExpr call.
       // LLVM IR canonical form means we need only traverse the left operands.
       SmallVector<const SCEV *, 4> AddOps;
       do {
         if (BO->Op) {
           if (auto *OpSCEV = getExistingSCEV(BO->Op)) {
             AddOps.push_back(OpSCEV);
             break;
           }
 
           // If a NUW or NSW flag can be applied to the SCEV for this
           // addition, then compute the SCEV for this addition by itself
           // with a separate call to getAddExpr. We need to do that
           // instead of pushing the operands of the addition onto AddOps,
           // since the flags are only known to apply to this particular
           // addition - they may not apply to other additions that can be
           // formed with operands from AddOps.
           const SCEV *RHS = getSCEV(BO->RHS);
           SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(BO->Op);
           if (Flags != SCEV::FlagAnyWrap) {
             const SCEV *LHS = getSCEV(BO->LHS);
             if (BO->Opcode == Instruction::Sub)
               AddOps.push_back(getMinusSCEV(LHS, RHS, Flags));
             else
               AddOps.push_back(getAddExpr(LHS, RHS, Flags));
             break;
           }
         }
 
         if (BO->Opcode == Instruction::Sub)
           AddOps.push_back(getNegativeSCEV(getSCEV(BO->RHS)));
         else
           AddOps.push_back(getSCEV(BO->RHS));
 
         auto NewBO = MatchBinaryOp(BO->LHS, DT);
         if (!NewBO || (NewBO->Opcode != Instruction::Add &&
                        NewBO->Opcode != Instruction::Sub)) {
           AddOps.push_back(getSCEV(BO->LHS));
           break;
         }
         BO = NewBO;
       } while (true);
 
       return getAddExpr(AddOps);
     }
 
     case Instruction::Mul: {
       SmallVector<const SCEV *, 4> MulOps;
       do {
         if (BO->Op) {
           if (auto *OpSCEV = getExistingSCEV(BO->Op)) {
             MulOps.push_back(OpSCEV);
             break;
           }
 
           SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(BO->Op);
           if (Flags != SCEV::FlagAnyWrap) {
             MulOps.push_back(
                 getMulExpr(getSCEV(BO->LHS), getSCEV(BO->RHS), Flags));
             break;
           }
         }
 
         MulOps.push_back(getSCEV(BO->RHS));
         auto NewBO = MatchBinaryOp(BO->LHS, DT);
         if (!NewBO || NewBO->Opcode != Instruction::Mul) {
           MulOps.push_back(getSCEV(BO->LHS));
           break;
         }
         BO = NewBO;
       } while (true);
 
       return getMulExpr(MulOps);
     }
     case Instruction::UDiv:
       return getUDivExpr(getSCEV(BO->LHS), getSCEV(BO->RHS));
     case Instruction::URem:
       return getURemExpr(getSCEV(BO->LHS), getSCEV(BO->RHS));
     case Instruction::Sub: {
       SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
       if (BO->Op)
         Flags = getNoWrapFlagsFromUB(BO->Op);
       return getMinusSCEV(getSCEV(BO->LHS), getSCEV(BO->RHS), Flags);
     }
     case Instruction::And:
       // For an expression like x&255 that merely masks off the high bits,
       // use zext(trunc(x)) as the SCEV expression.
       if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS)) {
         if (CI->isZero())
           return getSCEV(BO->RHS);
         if (CI->isMinusOne())
           return getSCEV(BO->LHS);
         const APInt &A = CI->getValue();
 
         // Instcombine's ShrinkDemandedConstant may strip bits out of
         // constants, obscuring what would otherwise be a low-bits mask.
         // Use computeKnownBits to compute what ShrinkDemandedConstant
         // knew about to reconstruct a low-bits mask value.
         unsigned LZ = A.countLeadingZeros();
         unsigned TZ = A.countTrailingZeros();
         unsigned BitWidth = A.getBitWidth();
         KnownBits Known(BitWidth);
         computeKnownBits(BO->LHS, Known, getDataLayout(),
                          0, &AC, nullptr, &DT);
 
         APInt EffectiveMask =
             APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ);
         if ((LZ != 0 || TZ != 0) && !((~A & ~Known.Zero) & EffectiveMask)) {
           const SCEV *MulCount = getConstant(APInt::getOneBitSet(BitWidth, TZ));
           const SCEV *LHS = getSCEV(BO->LHS);
           const SCEV *ShiftedLHS = nullptr;
           if (auto *LHSMul = dyn_cast<SCEVMulExpr>(LHS)) {
             if (auto *OpC = dyn_cast<SCEVConstant>(LHSMul->getOperand(0))) {
               // For an expression like (x * 8) & 8, simplify the multiply.
               unsigned MulZeros = OpC->getAPInt().countTrailingZeros();
               unsigned GCD = std::min(MulZeros, TZ);
               APInt DivAmt = APInt::getOneBitSet(BitWidth, TZ - GCD);
               SmallVector<const SCEV*, 4> MulOps;
               MulOps.push_back(getConstant(OpC->getAPInt().lshr(GCD)));
               MulOps.append(LHSMul->op_begin() + 1, LHSMul->op_end());
               auto *NewMul = getMulExpr(MulOps, LHSMul->getNoWrapFlags());
               ShiftedLHS = getUDivExpr(NewMul, getConstant(DivAmt));
             }
           }
           if (!ShiftedLHS)
             ShiftedLHS = getUDivExpr(LHS, MulCount);
           return getMulExpr(
               getZeroExtendExpr(
                   getTruncateExpr(ShiftedLHS,
                       IntegerType::get(getContext(), BitWidth - LZ - TZ)),
                   BO->LHS->getType()),
               MulCount);
         }
       }
       break;
 
     case Instruction::Or:
       // If the RHS of the Or is a constant, we may have something like:
       // X*4+1 which got turned into X*4|1.  Handle this as an Add so loop
       // optimizations will transparently handle this case.
       //
       // In order for this transformation to be safe, the LHS must be of the
       // form X*(2^n) and the Or constant must be less than 2^n.
       if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS)) {
         const SCEV *LHS = getSCEV(BO->LHS);
         const APInt &CIVal = CI->getValue();
         if (GetMinTrailingZeros(LHS) >=
             (CIVal.getBitWidth() - CIVal.countLeadingZeros())) {
           // Build a plain add SCEV.
           const SCEV *S = getAddExpr(LHS, getSCEV(CI));
           // If the LHS of the add was an addrec and it has no-wrap flags,
           // transfer the no-wrap flags, since an or won't introduce a wrap.
           if (const SCEVAddRecExpr *NewAR = dyn_cast<SCEVAddRecExpr>(S)) {
             const SCEVAddRecExpr *OldAR = cast<SCEVAddRecExpr>(LHS);
             const_cast<SCEVAddRecExpr *>(NewAR)->setNoWrapFlags(
                 OldAR->getNoWrapFlags());
           }
           return S;
         }
       }
       break;
 
     case Instruction::Xor:
       if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS)) {
         // If the RHS of xor is -1, then this is a not operation.
         if (CI->isMinusOne())
           return getNotSCEV(getSCEV(BO->LHS));
 
         // Model xor(and(x, C), C) as and(~x, C), if C is a low-bits mask.
         // This is a variant of the check for xor with -1, and it handles
         // the case where instcombine has trimmed non-demanded bits out
         // of an xor with -1.
         if (auto *LBO = dyn_cast<BinaryOperator>(BO->LHS))
           if (ConstantInt *LCI = dyn_cast<ConstantInt>(LBO->getOperand(1)))
             if (LBO->getOpcode() == Instruction::And &&
                 LCI->getValue() == CI->getValue())
               if (const SCEVZeroExtendExpr *Z =
                       dyn_cast<SCEVZeroExtendExpr>(getSCEV(BO->LHS))) {
                 Type *UTy = BO->LHS->getType();
                 const SCEV *Z0 = Z->getOperand();
                 Type *Z0Ty = Z0->getType();
                 unsigned Z0TySize = getTypeSizeInBits(Z0Ty);
 
                 // If C is a low-bits mask, the zero extend is serving to
                 // mask off the high bits. Complement the operand and
                 // re-apply the zext.
                 if (CI->getValue().isMask(Z0TySize))
                   return getZeroExtendExpr(getNotSCEV(Z0), UTy);
 
                 // If C is a single bit, it may be in the sign-bit position
                 // before the zero-extend. In this case, represent the xor
                 // using an add, which is equivalent, and re-apply the zext.
                 APInt Trunc = CI->getValue().trunc(Z0TySize);
                 if (Trunc.zext(getTypeSizeInBits(UTy)) == CI->getValue() &&
                     Trunc.isSignMask())
                   return getZeroExtendExpr(getAddExpr(Z0, getConstant(Trunc)),
                                            UTy);
               }
       }
       break;
 
   case Instruction::Shl:
     // Turn shift left of a constant amount into a multiply.
     if (ConstantInt *SA = dyn_cast<ConstantInt>(BO->RHS)) {
       uint32_t BitWidth = cast<IntegerType>(SA->getType())->getBitWidth();
 
       // If the shift count is not less than the bitwidth, the result of
       // the shift is undefined. Don't try to analyze it, because the
       // resolution chosen here may differ from the resolution chosen in
       // other parts of the compiler.
       if (SA->getValue().uge(BitWidth))
         break;
 
       // It is currently not resolved how to interpret NSW for left
       // shift by BitWidth - 1, so we avoid applying flags in that
       // case. Remove this check (or this comment) once the situation
       // is resolved. See
       // http://lists.llvm.org/pipermail/llvm-dev/2015-April/084195.html
       // and http://reviews.llvm.org/D8890 .
       auto Flags = SCEV::FlagAnyWrap;
       if (BO->Op && SA->getValue().ult(BitWidth - 1))
         Flags = getNoWrapFlagsFromUB(BO->Op);
 
       Constant *X = ConstantInt::get(getContext(),
         APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
       return getMulExpr(getSCEV(BO->LHS), getSCEV(X), Flags);
     }
     break;
 
     case Instruction::AShr: {
       // AShr X, C, where C is a constant.
       ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS);
       if (!CI)
         break;
 
       Type *OuterTy = BO->LHS->getType();
       uint64_t BitWidth = getTypeSizeInBits(OuterTy);
       // If the shift count is not less than the bitwidth, the result of
       // the shift is undefined. Don't try to analyze it, because the
       // resolution chosen here may differ from the resolution chosen in
       // other parts of the compiler.
       if (CI->getValue().uge(BitWidth))
         break;
 
       if (CI->isZero())
         return getSCEV(BO->LHS); // shift by zero --> noop
 
       uint64_t AShrAmt = CI->getZExtValue();
       Type *TruncTy = IntegerType::get(getContext(), BitWidth - AShrAmt);
 
       Operator *L = dyn_cast<Operator>(BO->LHS);
       if (L && L->getOpcode() == Instruction::Shl) {
         // X = Shl A, n
         // Y = AShr X, m
         // Both n and m are constant.
 
         const SCEV *ShlOp0SCEV = getSCEV(L->getOperand(0));
         if (L->getOperand(1) == BO->RHS)
           // For a two-shift sext-inreg, i.e. n = m,
           // use sext(trunc(x)) as the SCEV expression.
           return getSignExtendExpr(
               getTruncateExpr(ShlOp0SCEV, TruncTy), OuterTy);
 
         ConstantInt *ShlAmtCI = dyn_cast<ConstantInt>(L->getOperand(1));
         if (ShlAmtCI && ShlAmtCI->getValue().ult(BitWidth)) {
           uint64_t ShlAmt = ShlAmtCI->getZExtValue();
           if (ShlAmt > AShrAmt) {
             // When n > m, use sext(mul(trunc(x), 2^(n-m)))) as the SCEV
             // expression. We already checked that ShlAmt < BitWidth, so
             // the multiplier, 1 << (ShlAmt - AShrAmt), fits into TruncTy as
             // ShlAmt - AShrAmt < Amt.
             APInt Mul = APInt::getOneBitSet(BitWidth - AShrAmt,
                                             ShlAmt - AShrAmt);
             return getSignExtendExpr(
                 getMulExpr(getTruncateExpr(ShlOp0SCEV, TruncTy),
                 getConstant(Mul)), OuterTy);
           }
         }
       }
       break;
     }
     }
   }
 
   switch (U->getOpcode()) {
   case Instruction::Trunc:
     return getTruncateExpr(getSCEV(U->getOperand(0)), U->getType());
 
   case Instruction::ZExt:
     return getZeroExtendExpr(getSCEV(U->getOperand(0)), U->getType());
 
   case Instruction::SExt:
     if (auto BO = MatchBinaryOp(U->getOperand(0), DT)) {
       // The NSW flag of a subtract does not always survive the conversion to
       // A + (-1)*B.  By pushing sign extension onto its operands we are much
       // more likely to preserve NSW and allow later AddRec optimisations.
       //
       // NOTE: This is effectively duplicating this logic from getSignExtend:
       //   sext((A + B + ...)<nsw>) --> (sext(A) + sext(B) + ...)<nsw>
       // but by that point the NSW information has potentially been lost.
       if (BO->Opcode == Instruction::Sub && BO->IsNSW) {
         Type *Ty = U->getType();
         auto *V1 = getSignExtendExpr(getSCEV(BO->LHS), Ty);
         auto *V2 = getSignExtendExpr(getSCEV(BO->RHS), Ty);
         return getMinusSCEV(V1, V2, SCEV::FlagNSW);
       }
     }
     return getSignExtendExpr(getSCEV(U->getOperand(0)), U->getType());
 
   case Instruction::BitCast:
     // BitCasts are no-op casts so we just eliminate the cast.
     if (isSCEVable(U->getType()) && isSCEVable(U->getOperand(0)->getType()))
       return getSCEV(U->getOperand(0));
     break;
 
   // It's tempting to handle inttoptr and ptrtoint as no-ops, however this can
   // lead to pointer expressions which cannot safely be expanded to GEPs,
   // because ScalarEvolution doesn't respect the GEP aliasing rules when
   // simplifying integer expressions.
 
   case Instruction::GetElementPtr:
     return createNodeForGEP(cast<GEPOperator>(U));
 
   case Instruction::PHI:
     return createNodeForPHI(cast<PHINode>(U));
 
   case Instruction::Select:
     // U can also be a select constant expr, which let fall through.  Since
     // createNodeForSelect only works for a condition that is an `ICmpInst`, and
     // constant expressions cannot have instructions as operands, we'd have
     // returned getUnknown for a select constant expressions anyway.
     if (isa<Instruction>(U))
       return createNodeForSelectOrPHI(cast<Instruction>(U), U->getOperand(0),
                                       U->getOperand(1), U->getOperand(2));
     break;
 
   case Instruction::Call:
   case Instruction::Invoke:
     if (Value *RV = CallSite(U).getReturnedArgOperand())
       return getSCEV(RV);
     break;
   }
 
   return getUnknown(V);
 }
 
 //===----------------------------------------------------------------------===//
 //                   Iteration Count Computation Code
 //
 
 static unsigned getConstantTripCount(const SCEVConstant *ExitCount) {
   if (!ExitCount)
     return 0;
 
   ConstantInt *ExitConst = ExitCount->getValue();
 
   // Guard against huge trip counts.
   if (ExitConst->getValue().getActiveBits() > 32)
     return 0;
 
   // In case of integer overflow, this returns 0, which is correct.
   return ((unsigned)ExitConst->getZExtValue()) + 1;
 }
 
 unsigned ScalarEvolution::getSmallConstantTripCount(const Loop *L) {
   if (BasicBlock *ExitingBB = L->getExitingBlock())
     return getSmallConstantTripCount(L, ExitingBB);
 
   // No trip count information for multiple exits.
   return 0;
 }
 
 unsigned ScalarEvolution::getSmallConstantTripCount(const Loop *L,
                                                     BasicBlock *ExitingBlock) {
   assert(ExitingBlock && "Must pass a non-null exiting block!");
   assert(L->isLoopExiting(ExitingBlock) &&
          "Exiting block must actually branch out of the loop!");
   const SCEVConstant *ExitCount =
       dyn_cast<SCEVConstant>(getExitCount(L, ExitingBlock));
   return getConstantTripCount(ExitCount);
 }
 
 unsigned ScalarEvolution::getSmallConstantMaxTripCount(const Loop *L) {
   const auto *MaxExitCount =
       dyn_cast<SCEVConstant>(getMaxBackedgeTakenCount(L));
   return getConstantTripCount(MaxExitCount);
 }
 
 unsigned ScalarEvolution::getSmallConstantTripMultiple(const Loop *L) {
   if (BasicBlock *ExitingBB = L->getExitingBlock())
     return getSmallConstantTripMultiple(L, ExitingBB);
 
   // No trip multiple information for multiple exits.
   return 0;
 }
 
 /// Returns the largest constant divisor of the trip count of this loop as a
 /// normal unsigned value, if possible. This means that the actual trip count is
 /// always a multiple of the returned value (don't forget the trip count could
 /// very well be zero as well!).
 ///
 /// Returns 1 if the trip count is unknown or not guaranteed to be the
 /// multiple of a constant (which is also the case if the trip count is simply
 /// constant, use getSmallConstantTripCount for that case), Will also return 1
 /// if the trip count is very large (>= 2^32).
 ///
 /// As explained in the comments for getSmallConstantTripCount, this assumes
 /// that control exits the loop via ExitingBlock.
 unsigned
 ScalarEvolution::getSmallConstantTripMultiple(const Loop *L,
                                               BasicBlock *ExitingBlock) {
   assert(ExitingBlock && "Must pass a non-null exiting block!");
   assert(L->isLoopExiting(ExitingBlock) &&
          "Exiting block must actually branch out of the loop!");
   const SCEV *ExitCount = getExitCount(L, ExitingBlock);
   if (ExitCount == getCouldNotCompute())
     return 1;
 
   // Get the trip count from the BE count by adding 1.
   const SCEV *TCExpr = getAddExpr(ExitCount, getOne(ExitCount->getType()));
 
   const SCEVConstant *TC = dyn_cast<SCEVConstant>(TCExpr);
   if (!TC)
     // Attempt to factor more general cases. Returns the greatest power of
     // two divisor. If overflow happens, the trip count expression is still
     // divisible by the greatest power of 2 divisor returned.
     return 1U << std::min((uint32_t)31, GetMinTrailingZeros(TCExpr));
 
   ConstantInt *Result = TC->getValue();
 
   // Guard against huge trip counts (this requires checking
   // for zero to handle the case where the trip count == -1 and the
   // addition wraps).
   if (!Result || Result->getValue().getActiveBits() > 32 ||
       Result->getValue().getActiveBits() == 0)
     return 1;
 
   return (unsigned)Result->getZExtValue();
 }
 
 /// Get the expression for the number of loop iterations for which this loop is
 /// guaranteed not to exit via ExitingBlock. Otherwise return
 /// SCEVCouldNotCompute.
 const SCEV *ScalarEvolution::getExitCount(const Loop *L,
                                           BasicBlock *ExitingBlock) {
   return getBackedgeTakenInfo(L).getExact(ExitingBlock, this);
 }
 
 const SCEV *
 ScalarEvolution::getPredicatedBackedgeTakenCount(const Loop *L,
                                                  SCEVUnionPredicate &Preds) {
   return getPredicatedBackedgeTakenInfo(L).getExact(this, &Preds);
 }
 
 const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L) {
   return getBackedgeTakenInfo(L).getExact(this);
 }
 
 /// Similar to getBackedgeTakenCount, except return the least SCEV value that is
 /// known never to be less than the actual backedge taken count.
 const SCEV *ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) {
   return getBackedgeTakenInfo(L).getMax(this);
 }
 
 bool ScalarEvolution::isBackedgeTakenCountMaxOrZero(const Loop *L) {
   return getBackedgeTakenInfo(L).isMaxOrZero(this);
 }
 
 /// Push PHI nodes in the header of the given loop onto the given Worklist.
 static void
 PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) {
   BasicBlock *Header = L->getHeader();
 
   // Push all Loop-header PHIs onto the Worklist stack.
   for (PHINode &PN : Header->phis())
     Worklist.push_back(&PN);
 }
 
 const ScalarEvolution::BackedgeTakenInfo &
 ScalarEvolution::getPredicatedBackedgeTakenInfo(const Loop *L) {
   auto &BTI = getBackedgeTakenInfo(L);
   if (BTI.hasFullInfo())
     return BTI;
 
   auto Pair = PredicatedBackedgeTakenCounts.insert({L, BackedgeTakenInfo()});
 
   if (!Pair.second)
     return Pair.first->second;
 
   BackedgeTakenInfo Result =
       computeBackedgeTakenCount(L, /*AllowPredicates=*/true);
 
   return PredicatedBackedgeTakenCounts.find(L)->second = std::move(Result);
 }
 
 const ScalarEvolution::BackedgeTakenInfo &
 ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
   // Initially insert an invalid entry for this loop. If the insertion
   // succeeds, proceed to actually compute a backedge-taken count and
   // update the value. The temporary CouldNotCompute value tells SCEV
   // code elsewhere that it shouldn't attempt to request a new
   // backedge-taken count, which could result in infinite recursion.
   std::pair<DenseMap<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair =
       BackedgeTakenCounts.insert({L, BackedgeTakenInfo()});
   if (!Pair.second)
     return Pair.first->second;
 
   // computeBackedgeTakenCount may allocate memory for its result. Inserting it
   // into the BackedgeTakenCounts map transfers ownership. Otherwise, the result
   // must be cleared in this scope.
   BackedgeTakenInfo Result = computeBackedgeTakenCount(L);
 
   if (Result.getExact(this) != getCouldNotCompute()) {
     assert(isLoopInvariant(Result.getExact(this), L) &&
            isLoopInvariant(Result.getMax(this), L) &&
            "Computed backedge-taken count isn't loop invariant for loop!");
     ++NumTripCountsComputed;
   }
   else if (Result.getMax(this) == getCouldNotCompute() &&
            isa<PHINode>(L->getHeader()->begin())) {
     // Only count loops that have phi nodes as not being computable.
     ++NumTripCountsNotComputed;
   }
 
   // Now that we know more about the trip count for this loop, forget any
   // existing SCEV values for PHI nodes in this loop since they are only
   // conservative estimates made without the benefit of trip count
   // information. This is similar to the code in forgetLoop, except that
   // it handles SCEVUnknown PHI nodes specially.
   if (Result.hasAnyInfo()) {
     SmallVector<Instruction *, 16> Worklist;
     PushLoopPHIs(L, Worklist);
 
     SmallPtrSet<Instruction *, 8> Discovered;
     while (!Worklist.empty()) {
       Instruction *I = Worklist.pop_back_val();
 
       ValueExprMapType::iterator It =
         ValueExprMap.find_as(static_cast<Value *>(I));
       if (It != ValueExprMap.end()) {
         const SCEV *Old = It->second;
 
         // SCEVUnknown for a PHI either means that it has an unrecognized
         // structure, or it's a PHI that's in the progress of being computed
         // by createNodeForPHI.  In the former case, additional loop trip
         // count information isn't going to change anything. In the later
         // case, createNodeForPHI will perform the necessary updates on its
         // own when it gets to that point.
         if (!isa<PHINode>(I) || !isa<SCEVUnknown>(Old)) {
           eraseValueFromMap(It->first);
           forgetMemoizedResults(Old);
         }
         if (PHINode *PN = dyn_cast<PHINode>(I))
           ConstantEvolutionLoopExitValue.erase(PN);
       }
 
       // Since we don't need to invalidate anything for correctness and we're
       // only invalidating to make SCEV's results more precise, we get to stop
       // early to avoid invalidating too much.  This is especially important in
       // cases like:
       //
       //   %v = f(pn0, pn1) // pn0 and pn1 used through some other phi node
       // loop0:
       //   %pn0 = phi
       //   ...
       // loop1:
       //   %pn1 = phi
       //   ...
       //
       // where both loop0 and loop1's backedge taken count uses the SCEV
       // expression for %v.  If we don't have the early stop below then in cases
       // like the above, getBackedgeTakenInfo(loop1) will clear out the trip
       // count for loop0 and getBackedgeTakenInfo(loop0) will clear out the trip
       // count for loop1, effectively nullifying SCEV's trip count cache.
       for (auto *U : I->users())
         if (auto *I = dyn_cast<Instruction>(U)) {
           auto *LoopForUser = LI.getLoopFor(I->getParent());
           if (LoopForUser && L->contains(LoopForUser) &&
               Discovered.insert(I).second)
             Worklist.push_back(I);
         }
     }
   }
 
   // Re-lookup the insert position, since the call to
   // computeBackedgeTakenCount above could result in a
   // recusive call to getBackedgeTakenInfo (on a different
   // loop), which would invalidate the iterator computed
   // earlier.
   return BackedgeTakenCounts.find(L)->second = std::move(Result);
 }
 
 void ScalarEvolution::forgetLoop(const Loop *L) {
   // Drop any stored trip count value.
   auto RemoveLoopFromBackedgeMap =
       [](DenseMap<const Loop *, BackedgeTakenInfo> &Map, const Loop *L) {
         auto BTCPos = Map.find(L);
         if (BTCPos != Map.end()) {
           BTCPos->second.clear();
           Map.erase(BTCPos);
         }
       };
 
   SmallVector<const Loop *, 16> LoopWorklist(1, L);
   SmallVector<Instruction *, 32> Worklist;
   SmallPtrSet<Instruction *, 16> Visited;
 
   // Iterate over all the loops and sub-loops to drop SCEV information.
   while (!LoopWorklist.empty()) {
     auto *CurrL = LoopWorklist.pop_back_val();
 
     RemoveLoopFromBackedgeMap(BackedgeTakenCounts, CurrL);
     RemoveLoopFromBackedgeMap(PredicatedBackedgeTakenCounts, CurrL);
 
     // Drop information about predicated SCEV rewrites for this loop.
     for (auto I = PredicatedSCEVRewrites.begin();
          I != PredicatedSCEVRewrites.end();) {
       std::pair<const SCEV *, const Loop *> Entry = I->first;
       if (Entry.second == CurrL)
         PredicatedSCEVRewrites.erase(I++);
       else
         ++I;
     }
 
     auto LoopUsersItr = LoopUsers.find(CurrL);
     if (LoopUsersItr != LoopUsers.end()) {
       for (auto *S : LoopUsersItr->second)
         forgetMemoizedResults(S);
       LoopUsers.erase(LoopUsersItr);
     }
 
     // Drop information about expressions based on loop-header PHIs.
     PushLoopPHIs(CurrL, Worklist);
 
     while (!Worklist.empty()) {
       Instruction *I = Worklist.pop_back_val();
       if (!Visited.insert(I).second)
         continue;
 
       ValueExprMapType::iterator It =
           ValueExprMap.find_as(static_cast<Value *>(I));
       if (It != ValueExprMap.end()) {
         eraseValueFromMap(It->first);
         forgetMemoizedResults(It->second);
         if (PHINode *PN = dyn_cast<PHINode>(I))
           ConstantEvolutionLoopExitValue.erase(PN);
       }
 
       PushDefUseChildren(I, Worklist);
     }
 
     LoopPropertiesCache.erase(CurrL);
     // Forget all contained loops too, to avoid dangling entries in the
     // ValuesAtScopes map.
     LoopWorklist.append(CurrL->begin(), CurrL->end());
   }
 }
 
 void ScalarEvolution::forgetValue(Value *V) {
   Instruction *I = dyn_cast<Instruction>(V);
   if (!I) return;
 
   // Drop information about expressions based on loop-header PHIs.
   SmallVector<Instruction *, 16> Worklist;
   Worklist.push_back(I);
 
   SmallPtrSet<Instruction *, 8> Visited;
   while (!Worklist.empty()) {
     I = Worklist.pop_back_val();
     if (!Visited.insert(I).second)
       continue;
 
     ValueExprMapType::iterator It =
       ValueExprMap.find_as(static_cast<Value *>(I));
     if (It != ValueExprMap.end()) {
       eraseValueFromMap(It->first);
       forgetMemoizedResults(It->second);
       if (PHINode *PN = dyn_cast<PHINode>(I))
         ConstantEvolutionLoopExitValue.erase(PN);
     }
 
     PushDefUseChildren(I, Worklist);
   }
 }
 
 /// Get the exact loop backedge taken count considering all loop exits. A
 /// computable result can only be returned for loops with a single exit.
 /// Returning the minimum taken count among all exits is incorrect because one
 /// of the loop's exit limit's may have been skipped. howFarToZero assumes that
 /// the limit of each loop test is never skipped. This is a valid assumption as
 /// long as the loop exits via that test. For precise results, it is the
 /// caller's responsibility to specify the relevant loop exit using
 /// getExact(ExitingBlock, SE).
 const SCEV *
 ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE,
                                              SCEVUnionPredicate *Preds) const {
   // If any exits were not computable, the loop is not computable.
   if (!isComplete() || ExitNotTaken.empty())
     return SE->getCouldNotCompute();
 
   const SCEV *BECount = nullptr;
   for (auto &ENT : ExitNotTaken) {
     assert(ENT.ExactNotTaken != SE->getCouldNotCompute() && "bad exit SCEV");
 
     if (!BECount)
       BECount = ENT.ExactNotTaken;
     else if (BECount != ENT.ExactNotTaken)
       return SE->getCouldNotCompute();
     if (Preds && !ENT.hasAlwaysTruePredicate())
       Preds->add(ENT.Predicate.get());
 
     assert((Preds || ENT.hasAlwaysTruePredicate()) &&
            "Predicate should be always true!");
   }
 
   assert(BECount && "Invalid not taken count for loop exit");
   return BECount;
 }
 
 /// Get the exact not taken count for this loop exit.
 const SCEV *
 ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock,
                                              ScalarEvolution *SE) const {
   for (auto &ENT : ExitNotTaken)
     if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePredicate())
       return ENT.ExactNotTaken;
 
   return SE->getCouldNotCompute();
 }
 
 /// getMax - Get the max backedge taken count for the loop.
 const SCEV *
 ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const {
   auto PredicateNotAlwaysTrue = [](const ExitNotTakenInfo &ENT) {
     return !ENT.hasAlwaysTruePredicate();
   };
 
   if (any_of(ExitNotTaken, PredicateNotAlwaysTrue) || !getMax())
     return SE->getCouldNotCompute();
 
   assert((isa<SCEVCouldNotCompute>(getMax()) || isa<SCEVConstant>(getMax())) &&
          "No point in having a non-constant max backedge taken count!");
   return getMax();
 }
 
 bool ScalarEvolution::BackedgeTakenInfo::isMaxOrZero(ScalarEvolution *SE) const {
   auto PredicateNotAlwaysTrue = [](const ExitNotTakenInfo &ENT) {
     return !ENT.hasAlwaysTruePredicate();
   };
   return MaxOrZero && !any_of(ExitNotTaken, PredicateNotAlwaysTrue);
 }
 
 bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S,
                                                     ScalarEvolution *SE) const {
   if (getMax() && getMax() != SE->getCouldNotCompute() &&
       SE->hasOperand(getMax(), S))
     return true;
 
   for (auto &ENT : ExitNotTaken)
     if (ENT.ExactNotTaken != SE->getCouldNotCompute() &&
         SE->hasOperand(ENT.ExactNotTaken, S))
       return true;
 
   return false;
 }
 
 ScalarEvolution::ExitLimit::ExitLimit(const SCEV *E)
     : ExactNotTaken(E), MaxNotTaken(E) {
   assert((isa<SCEVCouldNotCompute>(MaxNotTaken) ||
           isa<SCEVConstant>(MaxNotTaken)) &&
          "No point in having a non-constant max backedge taken count!");
 }
 
 ScalarEvolution::ExitLimit::ExitLimit(
     const SCEV *E, const SCEV *M, bool MaxOrZero,
     ArrayRef<const SmallPtrSetImpl<const SCEVPredicate *> *> PredSetList)
     : ExactNotTaken(E), MaxNotTaken(M), MaxOrZero(MaxOrZero) {
   assert((isa<SCEVCouldNotCompute>(ExactNotTaken) ||
           !isa<SCEVCouldNotCompute>(MaxNotTaken)) &&
          "Exact is not allowed to be less precise than Max");
   assert((isa<SCEVCouldNotCompute>(MaxNotTaken) ||
           isa<SCEVConstant>(MaxNotTaken)) &&
          "No point in having a non-constant max backedge taken count!");
   for (auto *PredSet : PredSetList)
     for (auto *P : *PredSet)
       addPredicate(P);
 }
 
 ScalarEvolution::ExitLimit::ExitLimit(
     const SCEV *E, const SCEV *M, bool MaxOrZero,
     const SmallPtrSetImpl<const SCEVPredicate *> &PredSet)
     : ExitLimit(E, M, MaxOrZero, {&PredSet}) {
   assert((isa<SCEVCouldNotCompute>(MaxNotTaken) ||
           isa<SCEVConstant>(MaxNotTaken)) &&
          "No point in having a non-constant max backedge taken count!");
 }
 
 ScalarEvolution::ExitLimit::ExitLimit(const SCEV *E, const SCEV *M,
                                       bool MaxOrZero)
     : ExitLimit(E, M, MaxOrZero, None) {
   assert((isa<SCEVCouldNotCompute>(MaxNotTaken) ||
           isa<SCEVConstant>(MaxNotTaken)) &&
          "No point in having a non-constant max backedge taken count!");
 }
 
 /// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each
 /// computable exit into a persistent ExitNotTakenInfo array.
 ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
     SmallVectorImpl<ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo>
         &&ExitCounts,
     bool Complete, const SCEV *MaxCount, bool MaxOrZero)
     : MaxAndComplete(MaxCount, Complete), MaxOrZero(MaxOrZero) {
   using EdgeExitInfo = ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo;
 
   ExitNotTaken.reserve(ExitCounts.size());
   std::transform(
       ExitCounts.begin(), ExitCounts.end(), std::back_inserter(ExitNotTaken),
       [&](const EdgeExitInfo &EEI) {
         BasicBlock *ExitBB = EEI.first;
         const ExitLimit &EL = EEI.second;
         if (EL.Predicates.empty())
           return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, nullptr);
 
         std::unique_ptr<SCEVUnionPredicate> Predicate(new SCEVUnionPredicate);
         for (auto *Pred : EL.Predicates)
           Predicate->add(Pred);
 
         return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, std::move(Predicate));
       });
   assert((isa<SCEVCouldNotCompute>(MaxCount) || isa<SCEVConstant>(MaxCount)) &&
          "No point in having a non-constant max backedge taken count!");
 }
 
 /// Invalidate this result and free the ExitNotTakenInfo array.
 void ScalarEvolution::BackedgeTakenInfo::clear() {
   ExitNotTaken.clear();
 }
 
 /// Compute the number of times the backedge of the specified loop will execute.
 ScalarEvolution::BackedgeTakenInfo
 ScalarEvolution::computeBackedgeTakenCount(const Loop *L,
                                            bool AllowPredicates) {
   SmallVector<BasicBlock *, 8> ExitingBlocks;
   L->getExitingBlocks(ExitingBlocks);
 
   using EdgeExitInfo = ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo;
 
   SmallVector<EdgeExitInfo, 4> ExitCounts;
   bool CouldComputeBECount = true;
   BasicBlock *Latch = L->getLoopLatch(); // may be NULL.
   const SCEV *MustExitMaxBECount = nullptr;
   const SCEV *MayExitMaxBECount = nullptr;
   bool MustExitMaxOrZero = false;
 
   // Compute the ExitLimit for each loop exit. Use this to populate ExitCounts
   // and compute maxBECount.
   // Do a union of all the predicates here.
   for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
     BasicBlock *ExitBB = ExitingBlocks[i];
     ExitLimit EL = computeExitLimit(L, ExitBB, AllowPredicates);
 
     assert((AllowPredicates || EL.Predicates.empty()) &&
            "Predicated exit limit when predicates are not allowed!");
 
     // 1. For each exit that can be computed, add an entry to ExitCounts.
     // CouldComputeBECount is true only if all exits can be computed.
     if (EL.ExactNotTaken == getCouldNotCompute())
       // We couldn't compute an exact value for this exit, so
       // we won't be able to compute an exact value for the loop.
       CouldComputeBECount = false;
     else
       ExitCounts.emplace_back(ExitBB, EL);
 
     // 2. Derive the loop's MaxBECount from each exit's max number of
     // non-exiting iterations. Partition the loop exits into two kinds:
     // LoopMustExits and LoopMayExits.
     //
     // If the exit dominates the loop latch, it is a LoopMustExit otherwise it
     // is a LoopMayExit.  If any computable LoopMustExit is found, then
     // MaxBECount is the minimum EL.MaxNotTaken of computable
     // LoopMustExits. Otherwise, MaxBECount is conservatively the maximum
     // EL.MaxNotTaken, where CouldNotCompute is considered greater than any
     // computable EL.MaxNotTaken.
     if (EL.MaxNotTaken != getCouldNotCompute() && Latch &&
         DT.dominates(ExitBB, Latch)) {
       if (!MustExitMaxBECount) {
         MustExitMaxBECount = EL.MaxNotTaken;
         MustExitMaxOrZero = EL.MaxOrZero;
       } else {
         MustExitMaxBECount =
             getUMinFromMismatchedTypes(MustExitMaxBECount, EL.MaxNotTaken);
       }
     } else if (MayExitMaxBECount != getCouldNotCompute()) {
       if (!MayExitMaxBECount || EL.MaxNotTaken == getCouldNotCompute())
         MayExitMaxBECount = EL.MaxNotTaken;
       else {
         MayExitMaxBECount =
             getUMaxFromMismatchedTypes(MayExitMaxBECount, EL.MaxNotTaken);
       }
     }
   }
   const SCEV *MaxBECount = MustExitMaxBECount ? MustExitMaxBECount :
     (MayExitMaxBECount ? MayExitMaxBECount : getCouldNotCompute());
   // The loop backedge will be taken the maximum or zero times if there's
   // a single exit that must be taken the maximum or zero times.
   bool MaxOrZero = (MustExitMaxOrZero && ExitingBlocks.size() == 1);
   return BackedgeTakenInfo(std::move(ExitCounts), CouldComputeBECount,
                            MaxBECount, MaxOrZero);
 }
 
 ScalarEvolution::ExitLimit
 ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock,
                                       bool AllowPredicates) {
   // Okay, we've chosen an exiting block.  See what condition causes us to exit
   // at this block and remember the exit block and whether all other targets
   // lead to the loop header.
   bool MustExecuteLoopHeader = true;
   BasicBlock *Exit = nullptr;
   for (auto *SBB : successors(ExitingBlock))
     if (!L->contains(SBB)) {
       if (Exit) // Multiple exit successors.
         return getCouldNotCompute();
       Exit = SBB;
     } else if (SBB != L->getHeader()) {
       MustExecuteLoopHeader = false;
     }
 
   // At this point, we know we have a conditional branch that determines whether
   // the loop is exited.  However, we don't know if the branch is executed each
   // time through the loop.  If not, then the execution count of the branch will
   // not be equal to the trip count of the loop.
   //
   // Currently we check for this by checking to see if the Exit branch goes to
   // the loop header.  If so, we know it will always execute the same number of
   // times as the loop.  We also handle the case where the exit block *is* the
   // loop header.  This is common for un-rotated loops.
   //
   // If both of those tests fail, walk up the unique predecessor chain to the
   // header, stopping if there is an edge that doesn't exit the loop. If the
   // header is reached, the execution count of the branch will be equal to the
   // trip count of the loop.
   //
   //  More extensive analysis could be done to handle more cases here.
   //
   if (!MustExecuteLoopHeader && ExitingBlock != L->getHeader()) {
     // The simple checks failed, try climbing the unique predecessor chain
     // up to the header.
     bool Ok = false;
     for (BasicBlock *BB = ExitingBlock; BB; ) {
       BasicBlock *Pred = BB->getUniquePredecessor();
       if (!Pred)
         return getCouldNotCompute();
       TerminatorInst *PredTerm = Pred->getTerminator();
       for (const BasicBlock *PredSucc : PredTerm->successors()) {
         if (PredSucc == BB)
           continue;
         // If the predecessor has a successor that isn't BB and isn't
         // outside the loop, assume the worst.
         if (L->contains(PredSucc))
           return getCouldNotCompute();
       }
       if (Pred == L->getHeader()) {
         Ok = true;
         break;
       }
       BB = Pred;
     }
     if (!Ok)
       return getCouldNotCompute();
   }
 
   bool IsOnlyExit = (L->getExitingBlock() != nullptr);
   TerminatorInst *Term = ExitingBlock->getTerminator();
   if (BranchInst *BI = dyn_cast<BranchInst>(Term)) {
     assert(BI->isConditional() && "If unconditional, it can't be in loop!");
     // Proceed to the next level to examine the exit condition expression.
     return computeExitLimitFromCond(
         L, BI->getCondition(), BI->getSuccessor(0), BI->getSuccessor(1),
         /*ControlsExit=*/IsOnlyExit, AllowPredicates);
   }
 
   if (SwitchInst *SI = dyn_cast<SwitchInst>(Term))
     return computeExitLimitFromSingleExitSwitch(L, SI, Exit,
                                                 /*ControlsExit=*/IsOnlyExit);
 
   return getCouldNotCompute();
 }
 
 ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCond(
     const Loop *L, Value *ExitCond, BasicBlock *TBB, BasicBlock *FBB,
     bool ControlsExit, bool AllowPredicates) {
   ScalarEvolution::ExitLimitCacheTy Cache(L, TBB, FBB, AllowPredicates);
   return computeExitLimitFromCondCached(Cache, L, ExitCond, TBB, FBB,
                                         ControlsExit, AllowPredicates);
 }
 
 Optional<ScalarEvolution::ExitLimit>
 ScalarEvolution::ExitLimitCache::find(const Loop *L, Value *ExitCond,
                                       BasicBlock *TBB, BasicBlock *FBB,
                                       bool ControlsExit, bool AllowPredicates) {
   (void)this->L;
   (void)this->TBB;
   (void)this->FBB;
   (void)this->AllowPredicates;
 
   assert(this->L == L && this->TBB == TBB && this->FBB == FBB &&
          this->AllowPredicates == AllowPredicates &&
          "Variance in assumed invariant key components!");
   auto Itr = TripCountMap.find({ExitCond, ControlsExit});
   if (Itr == TripCountMap.end())
     return None;
   return Itr->second;
 }
 
 void ScalarEvolution::ExitLimitCache::insert(const Loop *L, Value *ExitCond,
                                              BasicBlock *TBB, BasicBlock *FBB,
                                              bool ControlsExit,
                                              bool AllowPredicates,
                                              const ExitLimit &EL) {
   assert(this->L == L && this->TBB == TBB && this->FBB == FBB &&
          this->AllowPredicates == AllowPredicates &&
          "Variance in assumed invariant key components!");
 
   auto InsertResult = TripCountMap.insert({{ExitCond, ControlsExit}, EL});
   assert(InsertResult.second && "Expected successful insertion!");
   (void)InsertResult;
 }
 
 ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondCached(
     ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, BasicBlock *TBB,
     BasicBlock *FBB, bool ControlsExit, bool AllowPredicates) {
 
   if (auto MaybeEL =
           Cache.find(L, ExitCond, TBB, FBB, ControlsExit, AllowPredicates))
     return *MaybeEL;
 
   ExitLimit EL = computeExitLimitFromCondImpl(Cache, L, ExitCond, TBB, FBB,
                                               ControlsExit, AllowPredicates);
   Cache.insert(L, ExitCond, TBB, FBB, ControlsExit, AllowPredicates, EL);
   return EL;
 }
 
 ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl(
     ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, BasicBlock *TBB,
     BasicBlock *FBB, bool ControlsExit, bool AllowPredicates) {
   // Check if the controlling expression for this loop is an And or Or.
   if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) {
     if (BO->getOpcode() == Instruction::And) {
       // Recurse on the operands of the and.
       bool EitherMayExit = L->contains(TBB);
       ExitLimit EL0 = computeExitLimitFromCondCached(
           Cache, L, BO->getOperand(0), TBB, FBB, ControlsExit && !EitherMayExit,
           AllowPredicates);
       ExitLimit EL1 = computeExitLimitFromCondCached(
           Cache, L, BO->getOperand(1), TBB, FBB, ControlsExit && !EitherMayExit,
           AllowPredicates);
       const SCEV *BECount = getCouldNotCompute();
       const SCEV *MaxBECount = getCouldNotCompute();
       if (EitherMayExit) {
         // Both conditions must be true for the loop to continue executing.
         // Choose the less conservative count.
         if (EL0.ExactNotTaken == getCouldNotCompute() ||
             EL1.ExactNotTaken == getCouldNotCompute())
           BECount = getCouldNotCompute();
         else
           BECount =
               getUMinFromMismatchedTypes(EL0.ExactNotTaken, EL1.ExactNotTaken);
         if (EL0.MaxNotTaken == getCouldNotCompute())
           MaxBECount = EL1.MaxNotTaken;
         else if (EL1.MaxNotTaken == getCouldNotCompute())
           MaxBECount = EL0.MaxNotTaken;
         else
           MaxBECount =
               getUMinFromMismatchedTypes(EL0.MaxNotTaken, EL1.MaxNotTaken);
       } else {
         // Both conditions must be true at the same time for the loop to exit.
         // For now, be conservative.
         assert(L->contains(FBB) && "Loop block has no successor in loop!");
         if (EL0.MaxNotTaken == EL1.MaxNotTaken)
           MaxBECount = EL0.MaxNotTaken;
         if (EL0.ExactNotTaken == EL1.ExactNotTaken)
           BECount = EL0.ExactNotTaken;
       }
 
       // There are cases (e.g. PR26207) where computeExitLimitFromCond is able
       // to be more aggressive when computing BECount than when computing
       // MaxBECount.  In these cases it is possible for EL0.ExactNotTaken and
       // EL1.ExactNotTaken to match, but for EL0.MaxNotTaken and EL1.MaxNotTaken
       // to not.
       if (isa<SCEVCouldNotCompute>(MaxBECount) &&
           !isa<SCEVCouldNotCompute>(BECount))
         MaxBECount = getConstant(getUnsignedRangeMax(BECount));
 
       return ExitLimit(BECount, MaxBECount, false,
                        {&EL0.Predicates, &EL1.Predicates});
     }
     if (BO->getOpcode() == Instruction::Or) {
       // Recurse on the operands of the or.
       bool EitherMayExit = L->contains(FBB);
       ExitLimit EL0 = computeExitLimitFromCondCached(
           Cache, L, BO->getOperand(0), TBB, FBB, ControlsExit && !EitherMayExit,
           AllowPredicates);
       ExitLimit EL1 = computeExitLimitFromCondCached(
           Cache, L, BO->getOperand(1), TBB, FBB, ControlsExit && !EitherMayExit,
           AllowPredicates);
       const SCEV *BECount = getCouldNotCompute();
       const SCEV *MaxBECount = getCouldNotCompute();
       if (EitherMayExit) {
         // Both conditions must be false for the loop to continue executing.
         // Choose the less conservative count.
         if (EL0.ExactNotTaken == getCouldNotCompute() ||
             EL1.ExactNotTaken == getCouldNotCompute())
           BECount = getCouldNotCompute();
         else
           BECount =
               getUMinFromMismatchedTypes(EL0.ExactNotTaken, EL1.ExactNotTaken);
         if (EL0.MaxNotTaken == getCouldNotCompute())
           MaxBECount = EL1.MaxNotTaken;
         else if (EL1.MaxNotTaken == getCouldNotCompute())
           MaxBECount = EL0.MaxNotTaken;
         else
           MaxBECount =
               getUMinFromMismatchedTypes(EL0.MaxNotTaken, EL1.MaxNotTaken);
       } else {
         // Both conditions must be false at the same time for the loop to exit.
         // For now, be conservative.
         assert(L->contains(TBB) && "Loop block has no successor in loop!");
         if (EL0.MaxNotTaken == EL1.MaxNotTaken)
           MaxBECount = EL0.MaxNotTaken;
         if (EL0.ExactNotTaken == EL1.ExactNotTaken)
           BECount = EL0.ExactNotTaken;
       }
 
       return ExitLimit(BECount, MaxBECount, false,
                        {&EL0.Predicates, &EL1.Predicates});
     }
   }
 
   // With an icmp, it may be feasible to compute an exact backedge-taken count.
   // Proceed to the next level to examine the icmp.
   if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond)) {
     ExitLimit EL =
         computeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit);
     if (EL.hasFullInfo() || !AllowPredicates)
       return EL;
 
     // Try again, but use SCEV predicates this time.
     return computeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit,
                                     /*AllowPredicates=*/true);
   }
 
   // Check for a constant condition. These are normally stripped out by
   // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to
   // preserve the CFG and is temporarily leaving constant conditions
   // in place.
   if (ConstantInt *CI = dyn_cast<ConstantInt>(ExitCond)) {
     if (L->contains(FBB) == !CI->getZExtValue())
       // The backedge is always taken.
       return getCouldNotCompute();
     else
       // The backedge is never taken.
       return getZero(CI->getType());
   }
 
   // If it's not an integer or pointer comparison then compute it the hard way.
   return computeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
 }
 
 ScalarEvolution::ExitLimit
 ScalarEvolution::computeExitLimitFromICmp(const Loop *L,
                                           ICmpInst *ExitCond,
                                           BasicBlock *TBB,
                                           BasicBlock *FBB,
                                           bool ControlsExit,
                                           bool AllowPredicates) {
   // If the condition was exit on true, convert the condition to exit on false
   ICmpInst::Predicate Pred;
   if (!L->contains(FBB))
     Pred = ExitCond->getPredicate();
   else
     Pred = ExitCond->getInversePredicate();
   const ICmpInst::Predicate OriginalPred = Pred;
 
   // Handle common loops like: for (X = "string"; *X; ++X)
   if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0)))
     if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) {
       ExitLimit ItCnt =
         computeLoadConstantCompareExitLimit(LI, RHS, L, Pred);
       if (ItCnt.hasAnyInfo())
         return ItCnt;
     }
 
   const SCEV *LHS = getSCEV(ExitCond->getOperand(0));
   const SCEV *RHS = getSCEV(ExitCond->getOperand(1));
 
   // Try to evaluate any dependencies out of the loop.
   LHS = getSCEVAtScope(LHS, L);
   RHS = getSCEVAtScope(RHS, L);
 
   // At this point, we would like to compute how many iterations of the
   // loop the predicate will return true for these inputs.
   if (isLoopInvariant(LHS, L) && !isLoopInvariant(RHS, L)) {
     // If there is a loop-invariant, force it into the RHS.
     std::swap(LHS, RHS);
     Pred = ICmpInst::getSwappedPredicate(Pred);
   }
 
   // Simplify the operands before analyzing them.
   (void)SimplifyICmpOperands(Pred, LHS, RHS);
 
   // If we have a comparison of a chrec against a constant, try to use value
   // ranges to answer this query.
   if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS))
     if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS))
       if (AddRec->getLoop() == L) {
         // Form the constant range.
         ConstantRange CompRange =
             ConstantRange::makeExactICmpRegion(Pred, RHSC->getAPInt());
 
         const SCEV *Ret = AddRec->getNumIterationsInRange(CompRange, *this);
         if (!isa<SCEVCouldNotCompute>(Ret)) return Ret;
       }
 
   switch (Pred) {
   case ICmpInst::ICMP_NE: {                     // while (X != Y)
     // Convert to: while (X-Y != 0)
     ExitLimit EL = howFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit,
                                 AllowPredicates);
     if (EL.hasAnyInfo()) return EL;
     break;
   }
   case ICmpInst::ICMP_EQ: {                     // while (X == Y)
     // Convert to: while (X-Y == 0)
     ExitLimit EL = howFarToNonZero(getMinusSCEV(LHS, RHS), L);
     if (EL.hasAnyInfo()) return EL;
     break;
   }
   case ICmpInst::ICMP_SLT:
   case ICmpInst::ICMP_ULT: {                    // while (X < Y)
     bool IsSigned = Pred == ICmpInst::ICMP_SLT;
     ExitLimit EL = howManyLessThans(LHS, RHS, L, IsSigned, ControlsExit,
                                     AllowPredicates);
     if (EL.hasAnyInfo()) return EL;
     break;
   }
   case ICmpInst::ICMP_SGT:
   case ICmpInst::ICMP_UGT: {                    // while (X > Y)
     bool IsSigned = Pred == ICmpInst::ICMP_SGT;
     ExitLimit EL =
         howManyGreaterThans(LHS, RHS, L, IsSigned, ControlsExit,
                             AllowPredicates);
     if (EL.hasAnyInfo()) return EL;
     break;
   }
   default:
     break;
   }
 
   auto *ExhaustiveCount =
       computeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
 
   if (!isa<SCEVCouldNotCompute>(ExhaustiveCount))
     return ExhaustiveCount;
 
   return computeShiftCompareExitLimit(ExitCond->getOperand(0),
                                       ExitCond->getOperand(1), L, OriginalPred);
 }
 
 ScalarEvolution::ExitLimit
 ScalarEvolution::computeExitLimitFromSingleExitSwitch(const Loop *L,
                                                       SwitchInst *Switch,
                                                       BasicBlock *ExitingBlock,
                                                       bool ControlsExit) {
   assert(!L->contains(ExitingBlock) && "Not an exiting block!");
 
   // Give up if the exit is the default dest of a switch.
   if (Switch->getDefaultDest() == ExitingBlock)
     return getCouldNotCompute();
 
   assert(L->contains(Switch->getDefaultDest()) &&
          "Default case must not exit the loop!");
   const SCEV *LHS = getSCEVAtScope(Switch->getCondition(), L);
   const SCEV *RHS = getConstant(Switch->findCaseDest(ExitingBlock));
 
   // while (X != Y) --> while (X-Y != 0)
   ExitLimit EL = howFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit);
   if (EL.hasAnyInfo())
     return EL;
 
   return getCouldNotCompute();
 }
 
 static ConstantInt *
 EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C,
                                 ScalarEvolution &SE) {
   const SCEV *InVal = SE.getConstant(C);
   const SCEV *Val = AddRec->evaluateAtIteration(InVal, SE);
   assert(isa<SCEVConstant>(Val) &&
          "Evaluation of SCEV at constant didn't fold correctly?");
   return cast<SCEVConstant>(Val)->getValue();
 }
 
 /// Given an exit condition of 'icmp op load X, cst', try to see if we can
 /// compute the backedge execution count.
 ScalarEvolution::ExitLimit
 ScalarEvolution::computeLoadConstantCompareExitLimit(
   LoadInst *LI,
   Constant *RHS,
   const Loop *L,
   ICmpInst::Predicate predicate) {
   if (LI->isVolatile()) return getCouldNotCompute();
 
   // Check to see if the loaded pointer is a getelementptr of a global.
   // TODO: Use SCEV instead of manually grubbing with GEPs.
   GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0));
   if (!GEP) return getCouldNotCompute();
 
   // Make sure that it is really a constant global we are gepping, with an
   // initializer, and make sure the first IDX is really 0.
   GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0));
   if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() ||
       GEP->getNumOperands() < 3 || !isa<Constant>(GEP->getOperand(1)) ||
       !cast<Constant>(GEP->getOperand(1))->isNullValue())
     return getCouldNotCompute();
 
   // Okay, we allow one non-constant index into the GEP instruction.
   Value *VarIdx = nullptr;
   std::vector<Constant*> Indexes;
   unsigned VarIdxNum = 0;
   for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i)
     if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
       Indexes.push_back(CI);
     } else if (!isa<ConstantInt>(GEP->getOperand(i))) {
       if (VarIdx) return getCouldNotCompute();  // Multiple non-constant idx's.
       VarIdx = GEP->getOperand(i);
       VarIdxNum = i-2;
       Indexes.push_back(nullptr);
     }
 
   // Loop-invariant loads may be a byproduct of loop optimization. Skip them.
   if (!VarIdx)
     return getCouldNotCompute();
 
   // Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant.
   // Check to see if X is a loop variant variable value now.
   const SCEV *Idx = getSCEV(VarIdx);
   Idx = getSCEVAtScope(Idx, L);
 
   // We can only recognize very limited forms of loop index expressions, in
   // particular, only affine AddRec's like {C1,+,C2}.
   const SCEVAddRecExpr *IdxExpr = dyn_cast<SCEVAddRecExpr>(Idx);
   if (!IdxExpr || !IdxExpr->isAffine() || isLoopInvariant(IdxExpr, L) ||
       !isa<SCEVConstant>(IdxExpr->getOperand(0)) ||
       !isa<SCEVConstant>(IdxExpr->getOperand(1)))
     return getCouldNotCompute();
 
   unsigned MaxSteps = MaxBruteForceIterations;
   for (unsigned IterationNum = 0; IterationNum != MaxSteps; ++IterationNum) {
     ConstantInt *ItCst = ConstantInt::get(
                            cast<IntegerType>(IdxExpr->getType()), IterationNum);
     ConstantInt *Val = EvaluateConstantChrecAtConstant(IdxExpr, ItCst, *this);
 
     // Form the GEP offset.
     Indexes[VarIdxNum] = Val;
 
     Constant *Result = ConstantFoldLoadThroughGEPIndices(GV->getInitializer(),
                                                          Indexes);
     if (!Result) break;  // Cannot compute!
 
     // Evaluate the condition for this iteration.
     Result = ConstantExpr::getICmp(predicate, Result, RHS);
     if (!isa<ConstantInt>(Result)) break;  // Couldn't decide for sure
     if (cast<ConstantInt>(Result)->getValue().isMinValue()) {
       ++NumArrayLenItCounts;
       return getConstant(ItCst);   // Found terminating iteration!
     }
   }
   return getCouldNotCompute();
 }
 
 ScalarEvolution::ExitLimit ScalarEvolution::computeShiftCompareExitLimit(
     Value *LHS, Value *RHSV, const Loop *L, ICmpInst::Predicate Pred) {
   ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV);
   if (!RHS)
     return getCouldNotCompute();
 
   const BasicBlock *Latch = L->getLoopLatch();
   if (!Latch)
     return getCouldNotCompute();
 
   const BasicBlock *Predecessor = L->getLoopPredecessor();
   if (!Predecessor)
     return getCouldNotCompute();
 
   // Return true if V is of the form "LHS `shift_op` <positive constant>".
   // Return LHS in OutLHS and shift_opt in OutOpCode.
   auto MatchPositiveShift =
       [](Value *V, Value *&OutLHS, Instruction::BinaryOps &OutOpCode) {
 
     using namespace PatternMatch;
 
     ConstantInt *ShiftAmt;
     if (match(V, m_LShr(m_Value(OutLHS), m_ConstantInt(ShiftAmt))))
       OutOpCode = Instruction::LShr;
     else if (match(V, m_AShr(m_Value(OutLHS), m_ConstantInt(ShiftAmt))))
       OutOpCode = Instruction::AShr;
     else if (match(V, m_Shl(m_Value(OutLHS), m_ConstantInt(ShiftAmt))))
       OutOpCode = Instruction::Shl;
     else
       return false;
 
     return ShiftAmt->getValue().isStrictlyPositive();
   };
 
   // Recognize a "shift recurrence" either of the form %iv or of %iv.shifted in
   //
   // loop:
   //   %iv = phi i32 [ %iv.shifted, %loop ], [ %val, %preheader ]
   //   %iv.shifted = lshr i32 %iv, <positive constant>
   //
   // Return true on a successful match.  Return the corresponding PHI node (%iv
   // above) in PNOut and the opcode of the shift operation in OpCodeOut.
   auto MatchShiftRecurrence =
       [&](Value *V, PHINode *&PNOut, Instruction::BinaryOps &OpCodeOut) {
     Optional<Instruction::BinaryOps> PostShiftOpCode;
 
     {
       Instruction::BinaryOps OpC;
       Value *V;
 
       // If we encounter a shift instruction, "peel off" the shift operation,
       // and remember that we did so.  Later when we inspect %iv's backedge
       // value, we will make sure that the backedge value uses the same
       // operation.
       //
       // Note: the peeled shift operation does not have to be the same
       // instruction as the one feeding into the PHI's backedge value.  We only
       // really care about it being the same *kind* of shift instruction --
       // that's all that is required for our later inferences to hold.
       if (MatchPositiveShift(LHS, V, OpC)) {
         PostShiftOpCode = OpC;
         LHS = V;
       }
     }
 
     PNOut = dyn_cast<PHINode>(LHS);
     if (!PNOut || PNOut->getParent() != L->getHeader())
       return false;
 
     Value *BEValue = PNOut->getIncomingValueForBlock(Latch);
     Value *OpLHS;
 
     return
         // The backedge value for the PHI node must be a shift by a positive
         // amount
         MatchPositiveShift(BEValue, OpLHS, OpCodeOut) &&
 
         // of the PHI node itself
         OpLHS == PNOut &&
 
         // and the kind of shift should be match the kind of shift we peeled
         // off, if any.
         (!PostShiftOpCode.hasValue() || *PostShiftOpCode == OpCodeOut);
   };
 
   PHINode *PN;
   Instruction::BinaryOps OpCode;
   if (!MatchShiftRecurrence(LHS, PN, OpCode))
     return getCouldNotCompute();
 
   const DataLayout &DL = getDataLayout();
 
   // The key rationale for this optimization is that for some kinds of shift
   // recurrences, the value of the recurrence "stabilizes" to either 0 or -1
   // within a finite number of iterations.  If the condition guarding the
   // backedge (in the sense that the backedge is taken if the condition is true)
   // is false for the value the shift recurrence stabilizes to, then we know
   // that the backedge is taken only a finite number of times.
 
   ConstantInt *StableValue = nullptr;
   switch (OpCode) {
   default:
     llvm_unreachable("Impossible case!");
 
   case Instruction::AShr: {
     // {K,ashr,<positive-constant>} stabilizes to signum(K) in at most
     // bitwidth(K) iterations.
     Value *FirstValue = PN->getIncomingValueForBlock(Predecessor);
     KnownBits Known = computeKnownBits(FirstValue, DL, 0, nullptr,
                                        Predecessor->getTerminator(), &DT);
     auto *Ty = cast<IntegerType>(RHS->getType());
     if (Known.isNonNegative())
       StableValue = ConstantInt::get(Ty, 0);
     else if (Known.isNegative())
       StableValue = ConstantInt::get(Ty, -1, true);
     else
       return getCouldNotCompute();
 
     break;
   }
   case Instruction::LShr:
   case Instruction::Shl:
     // Both {K,lshr,<positive-constant>} and {K,shl,<positive-constant>}
     // stabilize to 0 in at most bitwidth(K) iterations.
     StableValue = ConstantInt::get(cast<IntegerType>(RHS->getType()), 0);
     break;
   }
 
   auto *Result =
       ConstantFoldCompareInstOperands(Pred, StableValue, RHS, DL, &TLI);
   assert(Result->getType()->isIntegerTy(1) &&
          "Otherwise cannot be an operand to a branch instruction");
 
   if (Result->isZeroValue()) {
     unsigned BitWidth = getTypeSizeInBits(RHS->getType());
     const SCEV *UpperBound =
         getConstant(getEffectiveSCEVType(RHS->getType()), BitWidth);
     return ExitLimit(getCouldNotCompute(), UpperBound, false);
   }
 
   return getCouldNotCompute();
 }
 
 /// Return true if we can constant fold an instruction of the specified type,
 /// assuming that all operands were constants.
 static bool CanConstantFold(const Instruction *I) {
   if (isa<BinaryOperator>(I) || isa<CmpInst>(I) ||
       isa<SelectInst>(I) || isa<CastInst>(I) || isa<GetElementPtrInst>(I) ||
       isa<LoadInst>(I))
     return true;
 
   if (const CallInst *CI = dyn_cast<CallInst>(I))
     if (const Function *F = CI->getCalledFunction())
       return canConstantFoldCallTo(CI, F);
   return false;
 }
 
 /// Determine whether this instruction can constant evolve within this loop
 /// assuming its operands can all constant evolve.
 static bool canConstantEvolve(Instruction *I, const Loop *L) {
   // An instruction outside of the loop can't be derived from a loop PHI.
   if (!L->contains(I)) return false;
 
   if (isa<PHINode>(I)) {
     // We don't currently keep track of the control flow needed to evaluate
     // PHIs, so we cannot handle PHIs inside of loops.
     return L->getHeader() == I->getParent();
   }
 
   // If we won't be able to constant fold this expression even if the operands
   // are constants, bail early.
   return CanConstantFold(I);
 }
 
 /// getConstantEvolvingPHIOperands - Implement getConstantEvolvingPHI by
 /// recursing through each instruction operand until reaching a loop header phi.
 static PHINode *
 getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L,
                                DenseMap<Instruction *, PHINode *> &PHIMap,
                                unsigned Depth) {
   if (Depth > MaxConstantEvolvingDepth)
     return nullptr;
 
   // Otherwise, we can evaluate this instruction if all of its operands are
   // constant or derived from a PHI node themselves.
   PHINode *PHI = nullptr;
   for (Value *Op : UseInst->operands()) {
     if (isa<Constant>(Op)) continue;
 
     Instruction *OpInst = dyn_cast<Instruction>(Op);
     if (!OpInst || !canConstantEvolve(OpInst, L)) return nullptr;
 
     PHINode *P = dyn_cast<PHINode>(OpInst);
     if (!P)
       // If this operand is already visited, reuse the prior result.
       // We may have P != PHI if this is the deepest point at which the
       // inconsistent paths meet.
       P = PHIMap.lookup(OpInst);
     if (!P) {
       // Recurse and memoize the results, whether a phi is found or not.
       // This recursive call invalidates pointers into PHIMap.
       P = getConstantEvolvingPHIOperands(OpInst, L, PHIMap, Depth + 1);
       PHIMap[OpInst] = P;
     }
     if (!P)
       return nullptr;  // Not evolving from PHI
     if (PHI && PHI != P)
       return nullptr;  // Evolving from multiple different PHIs.
     PHI = P;
   }
   // This is a expression evolving from a constant PHI!
   return PHI;
 }
 
 /// getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node
 /// in the loop that V is derived from.  We allow arbitrary operations along the
 /// way, but the operands of an operation must either be constants or a value
 /// derived from a constant PHI.  If this expression does not fit with these
 /// constraints, return null.
 static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
   Instruction *I = dyn_cast<Instruction>(V);
   if (!I || !canConstantEvolve(I, L)) return nullptr;
 
   if (PHINode *PN = dyn_cast<PHINode>(I))
     return PN;
 
   // Record non-constant instructions contained by the loop.
   DenseMap<Instruction *, PHINode *> PHIMap;
   return getConstantEvolvingPHIOperands(I, L, PHIMap, 0);
 }
 
 /// EvaluateExpression - Given an expression that passes the
 /// getConstantEvolvingPHI predicate, evaluate its value assuming the PHI node
 /// in the loop has the value PHIVal.  If we can't fold this expression for some
 /// reason, return null.
 static Constant *EvaluateExpression(Value *V, const Loop *L,
                                     DenseMap<Instruction *, Constant *> &Vals,
                                     const DataLayout &DL,
                                     const TargetLibraryInfo *TLI) {
   // Convenient constant check, but redundant for recursive calls.
   if (Constant *C = dyn_cast<Constant>(V)) return C;
   Instruction *I = dyn_cast<Instruction>(V);
   if (!I) return nullptr;
 
   if (Constant *C = Vals.lookup(I)) return C;
 
   // An instruction inside the loop depends on a value outside the loop that we
   // weren't given a mapping for, or a value such as a call inside the loop.
   if (!canConstantEvolve(I, L)) return nullptr;
 
   // An unmapped PHI can be due to a branch or another loop inside this loop,
   // or due to this not being the initial iteration through a loop where we
   // couldn't compute the evolution of this particular PHI last time.
   if (isa<PHINode>(I)) return nullptr;
 
   std::vector<Constant*> Operands(I->getNumOperands());
 
   for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
     Instruction *Operand = dyn_cast<Instruction>(I->getOperand(i));
     if (!Operand) {
       Operands[i] = dyn_cast<Constant>(I->getOperand(i));
       if (!Operands[i]) return nullptr;
       continue;
     }
     Constant *C = EvaluateExpression(Operand, L, Vals, DL, TLI);
     Vals[Operand] = C;
     if (!C) return nullptr;
     Operands[i] = C;
   }
 
   if (CmpInst *CI = dyn_cast<CmpInst>(I))
     return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
                                            Operands[1], DL, TLI);
   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
     if (!LI->isVolatile())
       return ConstantFoldLoadFromConstPtr(Operands[0], LI->getType(), DL);
   }
   return ConstantFoldInstOperands(I, Operands, DL, TLI);
 }
 
 
 // If every incoming value to PN except the one for BB is a specific Constant,
 // return that, else return nullptr.
 static Constant *getOtherIncomingValue(PHINode *PN, BasicBlock *BB) {
   Constant *IncomingVal = nullptr;
 
   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
     if (PN->getIncomingBlock(i) == BB)
       continue;
 
     auto *CurrentVal = dyn_cast<Constant>(PN->getIncomingValue(i));
     if (!CurrentVal)
       return nullptr;
 
     if (IncomingVal != CurrentVal) {
       if (IncomingVal)
         return nullptr;
       IncomingVal = CurrentVal;
     }
   }
 
   return IncomingVal;
 }
 
 /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
 /// in the header of its containing loop, we know the loop executes a
 /// constant number of times, and the PHI node is just a recurrence
 /// involving constants, fold it.
 Constant *
 ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
                                                    const APInt &BEs,
                                                    const Loop *L) {
   auto I = ConstantEvolutionLoopExitValue.find(PN);
   if (I != ConstantEvolutionLoopExitValue.end())
     return I->second;
 
   if (BEs.ugt(MaxBruteForceIterations))
     return ConstantEvolutionLoopExitValue[PN] = nullptr;  // Not going to evaluate it.
 
   Constant *&RetVal = ConstantEvolutionLoopExitValue[PN];
 
   DenseMap<Instruction *, Constant *> CurrentIterVals;
   BasicBlock *Header = L->getHeader();
   assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!");
 
   BasicBlock *Latch = L->getLoopLatch();
   if (!Latch)
     return nullptr;
 
   for (PHINode &PHI : Header->phis()) {
     if (auto *StartCST = getOtherIncomingValue(&PHI, Latch))
       CurrentIterVals[&PHI] = StartCST;
   }
   if (!CurrentIterVals.count(PN))
     return RetVal = nullptr;
 
   Value *BEValue = PN->getIncomingValueForBlock(Latch);
 
   // Execute the loop symbolically to determine the exit value.
   assert(BEs.getActiveBits() < CHAR_BIT * sizeof(unsigned) &&
          "BEs is <= MaxBruteForceIterations which is an 'unsigned'!");
 
   unsigned NumIterations = BEs.getZExtValue(); // must be in range
   unsigned IterationNum = 0;
   const DataLayout &DL = getDataLayout();
   for (; ; ++IterationNum) {
     if (IterationNum == NumIterations)
       return RetVal = CurrentIterVals[PN];  // Got exit value!
 
     // Compute the value of the PHIs for the next iteration.
     // EvaluateExpression adds non-phi values to the CurrentIterVals map.
     DenseMap<Instruction *, Constant *> NextIterVals;
     Constant *NextPHI =
         EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI);
     if (!NextPHI)
       return nullptr;        // Couldn't evaluate!
     NextIterVals[PN] = NextPHI;
 
     bool StoppedEvolving = NextPHI == CurrentIterVals[PN];
 
     // Also evaluate the other PHI nodes.  However, we don't get to stop if we
     // cease to be able to evaluate one of them or if they stop evolving,
     // because that doesn't necessarily prevent us from computing PN.
     SmallVector<std::pair<PHINode *, Constant *>, 8> PHIsToCompute;
     for (const auto &I : CurrentIterVals) {
       PHINode *PHI = dyn_cast<PHINode>(I.first);
       if (!PHI || PHI == PN || PHI->getParent() != Header) continue;
       PHIsToCompute.emplace_back(PHI, I.second);
     }
     // We use two distinct loops because EvaluateExpression may invalidate any
     // iterators into CurrentIterVals.
     for (const auto &I : PHIsToCompute) {
       PHINode *PHI = I.first;
       Constant *&NextPHI = NextIterVals[PHI];
       if (!NextPHI) {   // Not already computed.
         Value *BEValue = PHI->getIncomingValueForBlock(Latch);
         NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI);
       }
       if (NextPHI != I.second)
         StoppedEvolving = false;
     }
 
     // If all entries in CurrentIterVals == NextIterVals then we can stop
     // iterating, the loop can't continue to change.
     if (StoppedEvolving)
       return RetVal = CurrentIterVals[PN];
 
     CurrentIterVals.swap(NextIterVals);
   }
 }
 
 const SCEV *ScalarEvolution::computeExitCountExhaustively(const Loop *L,
                                                           Value *Cond,
                                                           bool ExitWhen) {
   PHINode *PN = getConstantEvolvingPHI(Cond, L);
   if (!PN) return getCouldNotCompute();
 
   // If the loop is canonicalized, the PHI will have exactly two entries.
   // That's the only form we support here.
   if (PN->getNumIncomingValues() != 2) return getCouldNotCompute();
 
   DenseMap<Instruction *, Constant *> CurrentIterVals;
   BasicBlock *Header = L->getHeader();
   assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!");
 
   BasicBlock *Latch = L->getLoopLatch();
   assert(Latch && "Should follow from NumIncomingValues == 2!");
 
   for (PHINode &PHI : Header->phis()) {
     if (auto *StartCST = getOtherIncomingValue(&PHI, Latch))
       CurrentIterVals[&PHI] = StartCST;
   }
   if (!CurrentIterVals.count(PN))
     return getCouldNotCompute();
 
   // Okay, we find a PHI node that defines the trip count of this loop.  Execute
   // the loop symbolically to determine when the condition gets a value of
   // "ExitWhen".
   unsigned MaxIterations = MaxBruteForceIterations;   // Limit analysis.
   const DataLayout &DL = getDataLayout();
   for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){
     auto *CondVal = dyn_cast_or_null<ConstantInt>(
         EvaluateExpression(Cond, L, CurrentIterVals, DL, &TLI));
 
     // Couldn't symbolically evaluate.
     if (!CondVal) return getCouldNotCompute();
 
     if (CondVal->getValue() == uint64_t(ExitWhen)) {
       ++NumBruteForceTripCountsComputed;
       return getConstant(Type::getInt32Ty(getContext()), IterationNum);
     }
 
     // Update all the PHI nodes for the next iteration.
     DenseMap<Instruction *, Constant *> NextIterVals;
 
     // Create a list of which PHIs we need to compute. We want to do this before
     // calling EvaluateExpression on them because that may invalidate iterators
     // into CurrentIterVals.
     SmallVector<PHINode *, 8> PHIsToCompute;
     for (const auto &I : CurrentIterVals) {
       PHINode *PHI = dyn_cast<PHINode>(I.first);
       if (!PHI || PHI->getParent() != Header) continue;
       PHIsToCompute.push_back(PHI);
     }
     for (PHINode *PHI : PHIsToCompute) {
       Constant *&NextPHI = NextIterVals[PHI];
       if (NextPHI) continue;    // Already computed!
 
       Value *BEValue = PHI->getIncomingValueForBlock(Latch);
       NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI);
     }
     CurrentIterVals.swap(NextIterVals);
   }
 
   // Too many iterations were needed to evaluate.
   return getCouldNotCompute();
 }
 
 const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
   SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values =
       ValuesAtScopes[V];
   // Check to see if we've folded this expression at this loop before.
   for (auto &LS : Values)
     if (LS.first == L)
       return LS.second ? LS.second : V;
 
   Values.emplace_back(L, nullptr);
 
   // Otherwise compute it.
   const SCEV *C = computeSCEVAtScope(V, L);
   for (auto &LS : reverse(ValuesAtScopes[V]))
     if (LS.first == L) {
       LS.second = C;
       break;
     }
   return C;
 }
 
 /// This builds up a Constant using the ConstantExpr interface.  That way, we
 /// will return Constants for objects which aren't represented by a
 /// SCEVConstant, because SCEVConstant is restricted to ConstantInt.
 /// Returns NULL if the SCEV isn't representable as a Constant.
 static Constant *BuildConstantFromSCEV(const SCEV *V) {
   switch (static_cast<SCEVTypes>(V->getSCEVType())) {
     case scCouldNotCompute:
     case scAddRecExpr:
       break;
     case scConstant:
       return cast<SCEVConstant>(V)->getValue();
     case scUnknown:
       return dyn_cast<Constant>(cast<SCEVUnknown>(V)->getValue());
     case scSignExtend: {
       const SCEVSignExtendExpr *SS = cast<SCEVSignExtendExpr>(V);
       if (Constant *CastOp = BuildConstantFromSCEV(SS->getOperand()))
         return ConstantExpr::getSExt(CastOp, SS->getType());
       break;
     }
     case scZeroExtend: {
       const SCEVZeroExtendExpr *SZ = cast<SCEVZeroExtendExpr>(V);
       if (Constant *CastOp = BuildConstantFromSCEV(SZ->getOperand()))
         return ConstantExpr::getZExt(CastOp, SZ->getType());
       break;
     }
     case scTruncate: {
       const SCEVTruncateExpr *ST = cast<SCEVTruncateExpr>(V);
       if (Constant *CastOp = BuildConstantFromSCEV(ST->getOperand()))
         return ConstantExpr::getTrunc(CastOp, ST->getType());
       break;
     }
     case scAddExpr: {
       const SCEVAddExpr *SA = cast<SCEVAddExpr>(V);
       if (Constant *C = BuildConstantFromSCEV(SA->getOperand(0))) {
         if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) {
           unsigned AS = PTy->getAddressSpace();
           Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS);
           C = ConstantExpr::getBitCast(C, DestPtrTy);
         }
         for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) {
           Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i));
           if (!C2) return nullptr;
 
           // First pointer!
           if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) {
             unsigned AS = C2->getType()->getPointerAddressSpace();
             std::swap(C, C2);
             Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS);
             // The offsets have been converted to bytes.  We can add bytes to an
             // i8* by GEP with the byte count in the first index.
             C = ConstantExpr::getBitCast(C, DestPtrTy);
           }
 
           // Don't bother trying to sum two pointers. We probably can't
           // statically compute a load that results from it anyway.
           if (C2->getType()->isPointerTy())
             return nullptr;
 
           if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) {
             if (PTy->getElementType()->isStructTy())
               C2 = ConstantExpr::getIntegerCast(
                   C2, Type::getInt32Ty(C->getContext()), true);
             C = ConstantExpr::getGetElementPtr(PTy->getElementType(), C, C2);
           } else
             C = ConstantExpr::getAdd(C, C2);
         }
         return C;
       }
       break;
     }
     case scMulExpr: {
       const SCEVMulExpr *SM = cast<SCEVMulExpr>(V);
       if (Constant *C = BuildConstantFromSCEV(SM->getOperand(0))) {
         // Don't bother with pointers at all.
         if (C->getType()->isPointerTy()) return nullptr;
         for (unsigned i = 1, e = SM->getNumOperands(); i != e; ++i) {
           Constant *C2 = BuildConstantFromSCEV(SM->getOperand(i));
           if (!C2 || C2->getType()->isPointerTy()) return nullptr;
           C = ConstantExpr::getMul(C, C2);
         }
         return C;
       }
       break;
     }
     case scUDivExpr: {
       const SCEVUDivExpr *SU = cast<SCEVUDivExpr>(V);
       if (Constant *LHS = BuildConstantFromSCEV(SU->getLHS()))
         if (Constant *RHS = BuildConstantFromSCEV(SU->getRHS()))
           if (LHS->getType() == RHS->getType())
             return ConstantExpr::getUDiv(LHS, RHS);
       break;
     }
     case scSMaxExpr:
     case scUMaxExpr:
       break; // TODO: smax, umax.
   }
   return nullptr;
 }
 
 const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
   if (isa<SCEVConstant>(V)) return V;
 
   // If this instruction is evolved from a constant-evolving PHI, compute the
   // exit value from the loop without using SCEVs.
   if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V)) {
     if (Instruction *I = dyn_cast<Instruction>(SU->getValue())) {
       const Loop *LI = this->LI[I->getParent()];
       if (LI && LI->getParentLoop() == L)  // Looking for loop exit value.
         if (PHINode *PN = dyn_cast<PHINode>(I))
           if (PN->getParent() == LI->getHeader()) {
             // Okay, there is no closed form solution for the PHI node.  Check
             // to see if the loop that contains it has a known backedge-taken
             // count.  If so, we may be able to force computation of the exit
             // value.
             const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI);
             if (const SCEVConstant *BTCC =
                   dyn_cast<SCEVConstant>(BackedgeTakenCount)) {
 
               // This trivial case can show up in some degenerate cases where
               // the incoming IR has not yet been fully simplified.
               if (BTCC->getValue()->isZero()) {
                 Value *InitValue = nullptr;
                 bool MultipleInitValues = false;
                 for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
                   if (!LI->contains(PN->getIncomingBlock(i))) {
                     if (!InitValue)
                       InitValue = PN->getIncomingValue(i);
                     else if (InitValue != PN->getIncomingValue(i)) {
                       MultipleInitValues = true;
                       break;
                     }
                   }
                   if (!MultipleInitValues && InitValue)
                     return getSCEV(InitValue);
                 }
               }
               // Okay, we know how many times the containing loop executes.  If
               // this is a constant evolving PHI node, get the final value at
               // the specified iteration number.
               Constant *RV =
                   getConstantEvolutionLoopExitValue(PN, BTCC->getAPInt(), LI);
               if (RV) return getSCEV(RV);
             }
           }
 
       // Okay, this is an expression that we cannot symbolically evaluate
       // into a SCEV.  Check to see if it's possible to symbolically evaluate
       // the arguments into constants, and if so, try to constant propagate the
       // result.  This is particularly useful for computing loop exit values.
       if (CanConstantFold(I)) {
         SmallVector<Constant *, 4> Operands;
         bool MadeImprovement = false;
         for (Value *Op : I->operands()) {
           if (Constant *C = dyn_cast<Constant>(Op)) {
             Operands.push_back(C);
             continue;
           }
 
           // If any of the operands is non-constant and if they are
           // non-integer and non-pointer, don't even try to analyze them
           // with scev techniques.
           if (!isSCEVable(Op->getType()))
             return V;
 
           const SCEV *OrigV = getSCEV(Op);
           const SCEV *OpV = getSCEVAtScope(OrigV, L);
           MadeImprovement |= OrigV != OpV;
 
           Constant *C = BuildConstantFromSCEV(OpV);
           if (!C) return V;
           if (C->getType() != Op->getType())
             C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
                                                               Op->getType(),
                                                               false),
                                       C, Op->getType());
           Operands.push_back(C);
         }
 
         // Check to see if getSCEVAtScope actually made an improvement.
         if (MadeImprovement) {
           Constant *C = nullptr;
           const DataLayout &DL = getDataLayout();
           if (const CmpInst *CI = dyn_cast<CmpInst>(I))
             C = ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
                                                 Operands[1], DL, &TLI);
           else if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
             if (!LI->isVolatile())
               C = ConstantFoldLoadFromConstPtr(Operands[0], LI->getType(), DL);
           } else
             C = ConstantFoldInstOperands(I, Operands, DL, &TLI);
           if (!C) return V;
           return getSCEV(C);
         }
       }
     }
 
     // This is some other type of SCEVUnknown, just return it.
     return V;
   }
 
   if (const SCEVCommutativeExpr *Comm = dyn_cast<SCEVCommutativeExpr>(V)) {
     // Avoid performing the look-up in the common case where the specified
     // expression has no loop-variant portions.
     for (unsigned i = 0, e = Comm->getNumOperands(); i != e; ++i) {
       const SCEV *OpAtScope = getSCEVAtScope(Comm->getOperand(i), L);
       if (OpAtScope != Comm->getOperand(i)) {
         // Okay, at least one of these operands is loop variant but might be
         // foldable.  Build a new instance of the folded commutative expression.
         SmallVector<const SCEV *, 8> NewOps(Comm->op_begin(),
                                             Comm->op_begin()+i);
         NewOps.push_back(OpAtScope);
 
         for (++i; i != e; ++i) {
           OpAtScope = getSCEVAtScope(Comm->getOperand(i), L);
           NewOps.push_back(OpAtScope);
         }
         if (isa<SCEVAddExpr>(Comm))
           return getAddExpr(NewOps);
         if (isa<SCEVMulExpr>(Comm))
           return getMulExpr(NewOps);
         if (isa<SCEVSMaxExpr>(Comm))
           return getSMaxExpr(NewOps);
         if (isa<SCEVUMaxExpr>(Comm))
           return getUMaxExpr(NewOps);
         llvm_unreachable("Unknown commutative SCEV type!");
       }
     }
     // If we got here, all operands are loop invariant.
     return Comm;
   }
 
   if (const SCEVUDivExpr *Div = dyn_cast<SCEVUDivExpr>(V)) {
     const SCEV *LHS = getSCEVAtScope(Div->getLHS(), L);
     const SCEV *RHS = getSCEVAtScope(Div->getRHS(), L);
     if (LHS == Div->getLHS() && RHS == Div->getRHS())
       return Div;   // must be loop invariant
     return getUDivExpr(LHS, RHS);
   }
 
   // If this is a loop recurrence for a loop that does not contain L, then we
   // are dealing with the final value computed by the loop.
   if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V)) {
     // First, attempt to evaluate each operand.
     // Avoid performing the look-up in the common case where the specified
     // expression has no loop-variant portions.
     for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
       const SCEV *OpAtScope = getSCEVAtScope(AddRec->getOperand(i), L);
       if (OpAtScope == AddRec->getOperand(i))
         continue;
 
       // Okay, at least one of these operands is loop variant but might be
       // foldable.  Build a new instance of the folded commutative expression.
       SmallVector<const SCEV *, 8> NewOps(AddRec->op_begin(),
                                           AddRec->op_begin()+i);
       NewOps.push_back(OpAtScope);
       for (++i; i != e; ++i)
         NewOps.push_back(getSCEVAtScope(AddRec->getOperand(i), L));
 
       const SCEV *FoldedRec =
         getAddRecExpr(NewOps, AddRec->getLoop(),
                       AddRec->getNoWrapFlags(SCEV::FlagNW));
       AddRec = dyn_cast<SCEVAddRecExpr>(FoldedRec);
       // The addrec may be folded to a nonrecurrence, for example, if the
       // induction variable is multiplied by zero after constant folding. Go
       // ahead and return the folded value.
       if (!AddRec)
         return FoldedRec;
       break;
     }
 
     // If the scope is outside the addrec's loop, evaluate it by using the
     // loop exit value of the addrec.
     if (!AddRec->getLoop()->contains(L)) {
       // To evaluate this recurrence, we need to know how many times the AddRec
       // loop iterates.  Compute this now.
       const SCEV *BackedgeTakenCount = getBackedgeTakenCount(AddRec->getLoop());
       if (BackedgeTakenCount == getCouldNotCompute()) return AddRec;
 
       // Then, evaluate the AddRec.
       return AddRec->evaluateAtIteration(BackedgeTakenCount, *this);
     }
 
     return AddRec;
   }
 
   if (const SCEVZeroExtendExpr *Cast = dyn_cast<SCEVZeroExtendExpr>(V)) {
     const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
     if (Op == Cast->getOperand())
       return Cast;  // must be loop invariant
     return getZeroExtendExpr(Op, Cast->getType());
   }
 
   if (const SCEVSignExtendExpr *Cast = dyn_cast<SCEVSignExtendExpr>(V)) {
     const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
     if (Op == Cast->getOperand())
       return Cast;  // must be loop invariant
     return getSignExtendExpr(Op, Cast->getType());
   }
 
   if (const SCEVTruncateExpr *Cast = dyn_cast<SCEVTruncateExpr>(V)) {
     const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
     if (Op == Cast->getOperand())
       return Cast;  // must be loop invariant
     return getTruncateExpr(Op, Cast->getType());
   }
 
   llvm_unreachable("Unknown SCEV type!");
 }
 
 const SCEV *ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) {
   return getSCEVAtScope(getSCEV(V), L);
 }
 
 /// Finds the minimum unsigned root of the following equation:
 ///
 ///     A * X = B (mod N)
 ///
 /// where N = 2^BW and BW is the common bit width of A and B. The signedness of
 /// A and B isn't important.
 ///
 /// If the equation does not have a solution, SCEVCouldNotCompute is returned.
 static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const SCEV *B,
                                                ScalarEvolution &SE) {
   uint32_t BW = A.getBitWidth();
   assert(BW == SE.getTypeSizeInBits(B->getType()));
   assert(A != 0 && "A must be non-zero.");
 
   // 1. D = gcd(A, N)
   //
   // The gcd of A and N may have only one prime factor: 2. The number of
   // trailing zeros in A is its multiplicity
   uint32_t Mult2 = A.countTrailingZeros();
   // D = 2^Mult2
 
   // 2. Check if B is divisible by D.
   //
   // B is divisible by D if and only if the multiplicity of prime factor 2 for B
   // is not less than multiplicity of this prime factor for D.
   if (SE.GetMinTrailingZeros(B) < Mult2)
     return SE.getCouldNotCompute();
 
   // 3. Compute I: the multiplicative inverse of (A / D) in arithmetic
   // modulo (N / D).
   //
   // If D == 1, (N / D) == N == 2^BW, so we need one extra bit to represent
   // (N / D) in general. The inverse itself always fits into BW bits, though,
   // so we immediately truncate it.
   APInt AD = A.lshr(Mult2).zext(BW + 1);  // AD = A / D
   APInt Mod(BW + 1, 0);
   Mod.setBit(BW - Mult2);  // Mod = N / D
   APInt I = AD.multiplicativeInverse(Mod).trunc(BW);
 
   // 4. Compute the minimum unsigned root of the equation:
   // I * (B / D) mod (N / D)
   // To simplify the computation, we factor out the divide by D:
   // (I * B mod N) / D
   const SCEV *D = SE.getConstant(APInt::getOneBitSet(BW, Mult2));
   return SE.getUDivExactExpr(SE.getMulExpr(B, SE.getConstant(I)), D);
 }
 
 /// Find the roots of the quadratic equation for the given quadratic chrec
 /// {L,+,M,+,N}.  This returns either the two roots (which might be the same) or
 /// two SCEVCouldNotCompute objects.
 static Optional<std::pair<const SCEVConstant *,const SCEVConstant *>>
 SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
   assert(AddRec->getNumOperands() == 3 && "This is not a quadratic chrec!");
   const SCEVConstant *LC = dyn_cast<SCEVConstant>(AddRec->getOperand(0));
   const SCEVConstant *MC = dyn_cast<SCEVConstant>(AddRec->getOperand(1));
   const SCEVConstant *NC = dyn_cast<SCEVConstant>(AddRec->getOperand(2));
 
   // We currently can only solve this if the coefficients are constants.
   if (!LC || !MC || !NC)
     return None;
 
   uint32_t BitWidth = LC->getAPInt().getBitWidth();
   const APInt &L = LC->getAPInt();
   const APInt &M = MC->getAPInt();
   const APInt &N = NC->getAPInt();
   APInt Two(BitWidth, 2);
 
   // Convert from chrec coefficients to polynomial coefficients AX^2+BX+C
 
   // The A coefficient is N/2
   APInt A = N.sdiv(Two);
 
   // The B coefficient is M-N/2
   APInt B = M;
   B -= A; // A is the same as N/2.
 
   // The C coefficient is L.
   const APInt& C = L;
 
   // Compute the B^2-4ac term.
   APInt SqrtTerm = B;
   SqrtTerm *= B;
   SqrtTerm -= 4 * (A * C);
 
   if (SqrtTerm.isNegative()) {
     // The loop is provably infinite.
     return None;
   }
 
   // Compute sqrt(B^2-4ac). This is guaranteed to be the nearest
   // integer value or else APInt::sqrt() will assert.
   APInt SqrtVal = SqrtTerm.sqrt();
 
   // Compute the two solutions for the quadratic formula.
   // The divisions must be performed as signed divisions.
   APInt NegB = -std::move(B);
   APInt TwoA = std::move(A);
   TwoA <<= 1;
   if (TwoA.isNullValue())
     return None;
 
   LLVMContext &Context = SE.getContext();
 
   ConstantInt *Solution1 =
     ConstantInt::get(Context, (NegB + SqrtVal).sdiv(TwoA));
   ConstantInt *Solution2 =
     ConstantInt::get(Context, (NegB - SqrtVal).sdiv(TwoA));
 
   return std::make_pair(cast<SCEVConstant>(SE.getConstant(Solution1)),
                         cast<SCEVConstant>(SE.getConstant(Solution2)));
 }
 
 ScalarEvolution::ExitLimit
 ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
                               bool AllowPredicates) {
 
   // This is only used for loops with a "x != y" exit test. The exit condition
   // is now expressed as a single expression, V = x-y. So the exit test is
   // effectively V != 0.  We know and take advantage of the fact that this
   // expression only being used in a comparison by zero context.
 
   SmallPtrSet<const SCEVPredicate *, 4> Predicates;
   // If the value is a constant
   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
     // If the value is already zero, the branch will execute zero times.
     if (C->getValue()->isZero()) return C;
     return getCouldNotCompute();  // Otherwise it will loop infinitely.
   }
 
   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V);
   if (!AddRec && AllowPredicates)
     // Try to make this an AddRec using runtime tests, in the first X
     // iterations of this loop, where X is the SCEV expression found by the
     // algorithm below.
     AddRec = convertSCEVToAddRecWithPredicates(V, L, Predicates);
 
   if (!AddRec || AddRec->getLoop() != L)
     return getCouldNotCompute();
 
   // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of
   // the quadratic equation to solve it.
   if (AddRec->isQuadratic() && AddRec->getType()->isIntegerTy()) {
     if (auto Roots = SolveQuadraticEquation(AddRec, *this)) {
       const SCEVConstant *R1 = Roots->first;
       const SCEVConstant *R2 = Roots->second;
       // Pick the smallest positive root value.
       if (ConstantInt *CB = dyn_cast<ConstantInt>(ConstantExpr::getICmp(
               CmpInst::ICMP_ULT, R1->getValue(), R2->getValue()))) {
         if (!CB->getZExtValue())
           std::swap(R1, R2); // R1 is the minimum root now.
 
         // We can only use this value if the chrec ends up with an exact zero
         // value at this index.  When solving for "X*X != 5", for example, we
         // should not accept a root of 2.
         const SCEV *Val = AddRec->evaluateAtIteration(R1, *this);
         if (Val->isZero())
           // We found a quadratic root!
           return ExitLimit(R1, R1, false, Predicates);
       }
     }
     return getCouldNotCompute();
   }
 
   // Otherwise we can only handle this if it is affine.
   if (!AddRec->isAffine())
     return getCouldNotCompute();
 
   // If this is an affine expression, the execution count of this branch is
   // the minimum unsigned root of the following equation:
   //
   //     Start + Step*N = 0 (mod 2^BW)
   //
   // equivalent to:
   //
   //             Step*N = -Start (mod 2^BW)
   //
   // where BW is the common bit width of Start and Step.
 
   // Get the initial value for the loop.
   const SCEV *Start = getSCEVAtScope(AddRec->getStart(), L->getParentLoop());
   const SCEV *Step = getSCEVAtScope(AddRec->getOperand(1), L->getParentLoop());
 
   // For now we handle only constant steps.
   //
   // TODO: Handle a nonconstant Step given AddRec<NUW>. If the
   // AddRec is NUW, then (in an unsigned sense) it cannot be counting up to wrap
   // to 0, it must be counting down to equal 0. Consequently, N = Start / -Step.
   // We have not yet seen any such cases.
   const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step);
   if (!StepC || StepC->getValue()->isZero())
     return getCouldNotCompute();
 
   // For positive steps (counting up until unsigned overflow):
   //   N = -Start/Step (as unsigned)
   // For negative steps (counting down to zero):
   //   N = Start/-Step
   // First compute the unsigned distance from zero in the direction of Step.
   bool CountDown = StepC->getAPInt().isNegative();
   const SCEV *Distance = CountDown ? Start : getNegativeSCEV(Start);
 
   // Handle unitary steps, which cannot wraparound.
   // 1*N = -Start; -1*N = Start (mod 2^BW), so:
   //   N = Distance (as unsigned)
   if (StepC->getValue()->isOne() || StepC->getValue()->isMinusOne()) {
     APInt MaxBECount = getUnsignedRangeMax(Distance);
 
     // When a loop like "for (int i = 0; i != n; ++i) { /* body */ }" is rotated,
     // we end up with a loop whose backedge-taken count is n - 1.  Detect this
     // case, and see if we can improve the bound.
     //
     // Explicitly handling this here is necessary because getUnsignedRange
     // isn't context-sensitive; it doesn't know that we only care about the
     // range inside the loop.
     const SCEV *Zero = getZero(Distance->getType());
     const SCEV *One = getOne(Distance->getType());
     const SCEV *DistancePlusOne = getAddExpr(Distance, One);
     if (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, DistancePlusOne, Zero)) {
       // If Distance + 1 doesn't overflow, we can compute the maximum distance
       // as "unsigned_max(Distance + 1) - 1".
       ConstantRange CR = getUnsignedRange(DistancePlusOne);
       MaxBECount = APIntOps::umin(MaxBECount, CR.getUnsignedMax() - 1);
     }
     return ExitLimit(Distance, getConstant(MaxBECount), false, Predicates);
   }
 
   // If the condition controls loop exit (the loop exits only if the expression
   // is true) and the addition is no-wrap we can use unsigned divide to
   // compute the backedge count.  In this case, the step may not divide the
   // distance, but we don't care because if the condition is "missed" the loop
   // will have undefined behavior due to wrapping.
   if (ControlsExit && AddRec->hasNoSelfWrap() &&
       loopHasNoAbnormalExits(AddRec->getLoop())) {
     const SCEV *Exact =
         getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
     const SCEV *Max =
         Exact == getCouldNotCompute()
             ? Exact
             : getConstant(getUnsignedRangeMax(Exact));
     return ExitLimit(Exact, Max, false, Predicates);
   }
 
   // Solve the general equation.
   const SCEV *E = SolveLinEquationWithOverflow(StepC->getAPInt(),
                                                getNegativeSCEV(Start), *this);
   const SCEV *M = E == getCouldNotCompute()
                       ? E
                       : getConstant(getUnsignedRangeMax(E));
   return ExitLimit(E, M, false, Predicates);
 }
 
 ScalarEvolution::ExitLimit
 ScalarEvolution::howFarToNonZero(const SCEV *V, const Loop *L) {
   // Loops that look like: while (X == 0) are very strange indeed.  We don't
   // handle them yet except for the trivial case.  This could be expanded in the
   // future as needed.
 
   // If the value is a constant, check to see if it is known to be non-zero
   // already.  If so, the backedge will execute zero times.
   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
     if (!C->getValue()->isZero())
       return getZero(C->getType());
     return getCouldNotCompute();  // Otherwise it will loop infinitely.
   }
 
   // We could implement others, but I really doubt anyone writes loops like
   // this, and if they did, they would already be constant folded.
   return getCouldNotCompute();
 }
 
 std::pair<BasicBlock *, BasicBlock *>
 ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) {
   // If the block has a unique predecessor, then there is no path from the
   // predecessor to the block that does not go through the direct edge
   // from the predecessor to the block.
   if (BasicBlock *Pred = BB->getSinglePredecessor())
     return {Pred, BB};
 
   // A loop's header is defined to be a block that dominates the loop.
   // If the header has a unique predecessor outside the loop, it must be
   // a block that has exactly one successor that can reach the loop.
   if (Loop *L = LI.getLoopFor(BB))
     return {L->getLoopPredecessor(), L->getHeader()};
 
   return {nullptr, nullptr};
 }
 
 /// SCEV structural equivalence is usually sufficient for testing whether two
 /// expressions are equal, however for the purposes of looking for a condition
 /// guarding a loop, it can be useful to be a little more general, since a
 /// front-end may have replicated the controlling expression.
 static bool HasSameValue(const SCEV *A, const SCEV *B) {
   // Quick check to see if they are the same SCEV.
   if (A == B) return true;
 
   auto ComputesEqualValues = [](const Instruction *A, const Instruction *B) {
     // Not all instructions that are "identical" compute the same value.  For
     // instance, two distinct alloca instructions allocating the same type are
     // identical and do not read memory; but compute distinct values.
     return A->isIdenticalTo(B) && (isa<BinaryOperator>(A) || isa<GetElementPtrInst>(A));
   };
 
   // Otherwise, if they're both SCEVUnknown, it's possible that they hold
   // two different instructions with the same value. Check for this case.
   if (const SCEVUnknown *AU = dyn_cast<SCEVUnknown>(A))
     if (const SCEVUnknown *BU = dyn_cast<SCEVUnknown>(B))
       if (const Instruction *AI = dyn_cast<Instruction>(AU->getValue()))
         if (const Instruction *BI = dyn_cast<Instruction>(BU->getValue()))
           if (ComputesEqualValues(AI, BI))
             return true;
 
   // Otherwise assume they may have a different value.
   return false;
 }
 
 bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
                                            const SCEV *&LHS, const SCEV *&RHS,
                                            unsigned Depth) {
   bool Changed = false;
 
   // If we hit the max recursion limit bail out.
   if (Depth >= 3)
     return false;
 
   // Canonicalize a constant to the right side.
   if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
     // Check for both operands constant.
     if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
       if (ConstantExpr::getICmp(Pred,
                                 LHSC->getValue(),
                                 RHSC->getValue())->isNullValue())
         goto trivially_false;
       else
         goto trivially_true;
     }
     // Otherwise swap the operands to put the constant on the right.
     std::swap(LHS, RHS);
     Pred = ICmpInst::getSwappedPredicate(Pred);
     Changed = true;
   }
 
   // If we're comparing an addrec with a value which is loop-invariant in the
   // addrec's loop, put the addrec on the left. Also make a dominance check,
   // as both operands could be addrecs loop-invariant in each other's loop.
   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS)) {
     const Loop *L = AR->getLoop();
     if (isLoopInvariant(LHS, L) && properlyDominates(LHS, L->getHeader())) {
       std::swap(LHS, RHS);
       Pred = ICmpInst::getSwappedPredicate(Pred);
       Changed = true;
     }
   }
 
   // If there's a constant operand, canonicalize comparisons with boundary
   // cases, and canonicalize *-or-equal comparisons to regular comparisons.
   if (const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS)) {
     const APInt &RA = RC->getAPInt();
 
     bool SimplifiedByConstantRange = false;
 
     if (!ICmpInst::isEquality(Pred)) {
       ConstantRange ExactCR = ConstantRange::makeExactICmpRegion(Pred, RA);
       if (ExactCR.isFullSet())
         goto trivially_true;
       else if (ExactCR.isEmptySet())
         goto trivially_false;
 
       APInt NewRHS;
       CmpInst::Predicate NewPred;
       if (ExactCR.getEquivalentICmp(NewPred, NewRHS) &&
           ICmpInst::isEquality(NewPred)) {
         // We were able to convert an inequality to an equality.
         Pred = NewPred;
         RHS = getConstant(NewRHS);
         Changed = SimplifiedByConstantRange = true;
       }
     }
 
     if (!SimplifiedByConstantRange) {
       switch (Pred) {
       default:
         break;
       case ICmpInst::ICMP_EQ:
       case ICmpInst::ICMP_NE:
         // Fold ((-1) * %a) + %b == 0 (equivalent to %b-%a == 0) into %a == %b.
         if (!RA)
           if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(LHS))
             if (const SCEVMulExpr *ME =
                     dyn_cast<SCEVMulExpr>(AE->getOperand(0)))
               if (AE->getNumOperands() == 2 && ME->getNumOperands() == 2 &&
                   ME->getOperand(0)->isAllOnesValue()) {
                 RHS = AE->getOperand(1);
                 LHS = ME->getOperand(1);
                 Changed = true;
               }
         break;
 
 
         // The "Should have been caught earlier!" messages refer to the fact
         // that the ExactCR.isFullSet() or ExactCR.isEmptySet() check above
         // should have fired on the corresponding cases, and canonicalized the
         // check to trivially_true or trivially_false.
 
       case ICmpInst::ICMP_UGE:
         assert(!RA.isMinValue() && "Should have been caught earlier!");
         Pred = ICmpInst::ICMP_UGT;
         RHS = getConstant(RA - 1);
         Changed = true;
         break;
       case ICmpInst::ICMP_ULE:
         assert(!RA.isMaxValue() && "Should have been caught earlier!");
         Pred = ICmpInst::ICMP_ULT;
         RHS = getConstant(RA + 1);
         Changed = true;
         break;
       case ICmpInst::ICMP_SGE:
         assert(!RA.isMinSignedValue() && "Should have been caught earlier!");
         Pred = ICmpInst::ICMP_SGT;
         RHS = getConstant(RA - 1);
         Changed = true;
         break;
       case ICmpInst::ICMP_SLE:
         assert(!RA.isMaxSignedValue() && "Should have been caught earlier!");
         Pred = ICmpInst::ICMP_SLT;
         RHS = getConstant(RA + 1);
         Changed = true;
         break;
       }
     }
   }
 
   // Check for obvious equality.
   if (HasSameValue(LHS, RHS)) {
     if (ICmpInst::isTrueWhenEqual(Pred))
       goto trivially_true;
     if (ICmpInst::isFalseWhenEqual(Pred))
       goto trivially_false;
   }
 
   // If possible, canonicalize GE/LE comparisons to GT/LT comparisons, by
   // adding or subtracting 1 from one of the operands.
   switch (Pred) {
   case ICmpInst::ICMP_SLE:
     if (!getSignedRangeMax(RHS).isMaxSignedValue()) {
       RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
                        SCEV::FlagNSW);
       Pred = ICmpInst::ICMP_SLT;
       Changed = true;
     } else if (!getSignedRangeMin(LHS).isMinSignedValue()) {
       LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS,
                        SCEV::FlagNSW);
       Pred = ICmpInst::ICMP_SLT;
       Changed = true;
     }
     break;
   case ICmpInst::ICMP_SGE:
     if (!getSignedRangeMin(RHS).isMinSignedValue()) {
       RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS,
                        SCEV::FlagNSW);
       Pred = ICmpInst::ICMP_SGT;
       Changed = true;
     } else if (!getSignedRangeMax(LHS).isMaxSignedValue()) {
       LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS,
                        SCEV::FlagNSW);
       Pred = ICmpInst::ICMP_SGT;
       Changed = true;
     }
     break;
   case ICmpInst::ICMP_ULE:
     if (!getUnsignedRangeMax(RHS).isMaxValue()) {
       RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
                        SCEV::FlagNUW);
       Pred = ICmpInst::ICMP_ULT;
       Changed = true;
     } else if (!getUnsignedRangeMin(LHS).isMinValue()) {
       LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS);
       Pred = ICmpInst::ICMP_ULT;
       Changed = true;
     }
     break;
   case ICmpInst::ICMP_UGE:
     if (!getUnsignedRangeMin(RHS).isMinValue()) {
       RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS);
       Pred = ICmpInst::ICMP_UGT;
       Changed = true;
     } else if (!getUnsignedRangeMax(LHS).isMaxValue()) {
       LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS,
                        SCEV::FlagNUW);
       Pred = ICmpInst::ICMP_UGT;
       Changed = true;
     }
     break;
   default:
     break;
   }
 
   // TODO: More simplifications are possible here.
 
   // Recursively simplify until we either hit a recursion limit or nothing
   // changes.
   if (Changed)
     return SimplifyICmpOperands(Pred, LHS, RHS, Depth+1);
 
   return Changed;
 
 trivially_true:
   // Return 0 == 0.
   LHS = RHS = getConstant(ConstantInt::getFalse(getContext()));
   Pred = ICmpInst::ICMP_EQ;
   return true;
 
 trivially_false:
   // Return 0 != 0.
   LHS = RHS = getConstant(ConstantInt::getFalse(getContext()));
   Pred = ICmpInst::ICMP_NE;
   return true;
 }
 
 bool ScalarEvolution::isKnownNegative(const SCEV *S) {
   return getSignedRangeMax(S).isNegative();
 }
 
 bool ScalarEvolution::isKnownPositive(const SCEV *S) {
   return getSignedRangeMin(S).isStrictlyPositive();
 }
 
 bool ScalarEvolution::isKnownNonNegative(const SCEV *S) {
   return !getSignedRangeMin(S).isNegative();
 }
 
 bool ScalarEvolution::isKnownNonPositive(const SCEV *S) {
   return !getSignedRangeMax(S).isStrictlyPositive();
 }
 
 bool ScalarEvolution::isKnownNonZero(const SCEV *S) {
   return isKnownNegative(S) || isKnownPositive(S);
 }
 
 bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred,
                                        const SCEV *LHS, const SCEV *RHS) {
   // Canonicalize the inputs first.
   (void)SimplifyICmpOperands(Pred, LHS, RHS);
 
   // If LHS or RHS is an addrec, check to see if the condition is true in
   // every iteration of the loop.
   // If LHS and RHS are both addrec, both conditions must be true in
   // every iteration of the loop.
   const SCEVAddRecExpr *LAR = dyn_cast<SCEVAddRecExpr>(LHS);
   const SCEVAddRecExpr *RAR = dyn_cast<SCEVAddRecExpr>(RHS);
   bool LeftGuarded = false;
   bool RightGuarded = false;
   if (LAR) {
     const Loop *L = LAR->getLoop();
     if (isLoopEntryGuardedByCond(L, Pred, LAR->getStart(), RHS) &&
         isLoopBackedgeGuardedByCond(L, Pred, LAR->getPostIncExpr(*this), RHS)) {
       if (!RAR) return true;
       LeftGuarded = true;
     }
   }
   if (RAR) {
     const Loop *L = RAR->getLoop();
     if (isLoopEntryGuardedByCond(L, Pred, LHS, RAR->getStart()) &&
         isLoopBackedgeGuardedByCond(L, Pred, LHS, RAR->getPostIncExpr(*this))) {
       if (!LAR) return true;
       RightGuarded = true;
     }
   }
   if (LeftGuarded && RightGuarded)
     return true;
 
   if (isKnownPredicateViaSplitting(Pred, LHS, RHS))
     return true;
 
   // Otherwise see what can be done with known constant ranges.
   return isKnownPredicateViaConstantRanges(Pred, LHS, RHS);
 }
 
 bool ScalarEvolution::isMonotonicPredicate(const SCEVAddRecExpr *LHS,
                                            ICmpInst::Predicate Pred,
                                            bool &Increasing) {
   bool Result = isMonotonicPredicateImpl(LHS, Pred, Increasing);
 
 #ifndef NDEBUG
   // Verify an invariant: inverting the predicate should turn a monotonically
   // increasing change to a monotonically decreasing one, and vice versa.
   bool IncreasingSwapped;
   bool ResultSwapped = isMonotonicPredicateImpl(
       LHS, ICmpInst::getSwappedPredicate(Pred), IncreasingSwapped);
 
   assert(Result == ResultSwapped && "should be able to analyze both!");
   if (ResultSwapped)
     assert(Increasing == !IncreasingSwapped &&
            "monotonicity should flip as we flip the predicate");
 #endif
 
   return Result;
 }
 
 bool ScalarEvolution::isMonotonicPredicateImpl(const SCEVAddRecExpr *LHS,
                                                ICmpInst::Predicate Pred,
                                                bool &Increasing) {
 
   // A zero step value for LHS means the induction variable is essentially a
   // loop invariant value. We don't really depend on the predicate actually
   // flipping from false to true (for increasing predicates, and the other way
   // around for decreasing predicates), all we care about is that *if* the
   // predicate changes then it only changes from false to true.
   //
   // A zero step value in itself is not very useful, but there may be places
   // where SCEV can prove X >= 0 but not prove X > 0, so it is helpful to be
   // as general as possible.
 
   switch (Pred) {
   default:
     return false; // Conservative answer
 
   case ICmpInst::ICMP_UGT:
   case ICmpInst::ICMP_UGE:
   case ICmpInst::ICMP_ULT:
   case ICmpInst::ICMP_ULE:
     if (!LHS->hasNoUnsignedWrap())
       return false;
 
     Increasing = Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE;
     return true;
 
   case ICmpInst::ICMP_SGT:
   case ICmpInst::ICMP_SGE:
   case ICmpInst::ICMP_SLT:
   case ICmpInst::ICMP_SLE: {
     if (!LHS->hasNoSignedWrap())
       return false;
 
     const SCEV *Step = LHS->getStepRecurrence(*this);
 
     if (isKnownNonNegative(Step)) {
       Increasing = Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE;
       return true;
     }
 
     if (isKnownNonPositive(Step)) {
       Increasing = Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE;
       return true;
     }
 
     return false;
   }
 
   }
 
   llvm_unreachable("switch has default clause!");
 }
 
 bool ScalarEvolution::isLoopInvariantPredicate(
     ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L,
     ICmpInst::Predicate &InvariantPred, const SCEV *&InvariantLHS,
     const SCEV *&InvariantRHS) {
 
   // If there is a loop-invariant, force it into the RHS, otherwise bail out.
   if (!isLoopInvariant(RHS, L)) {
     if (!isLoopInvariant(LHS, L))
       return false;
 
     std::swap(LHS, RHS);
     Pred = ICmpInst::getSwappedPredicate(Pred);
   }
 
   const SCEVAddRecExpr *ArLHS = dyn_cast<SCEVAddRecExpr>(LHS);
   if (!ArLHS || ArLHS->getLoop() != L)
     return false;
 
   bool Increasing;
   if (!isMonotonicPredicate(ArLHS, Pred, Increasing))
     return false;
 
   // If the predicate "ArLHS `Pred` RHS" monotonically increases from false to
   // true as the loop iterates, and the backedge is control dependent on
   // "ArLHS `Pred` RHS" == true then we can reason as follows:
   //
   //   * if the predicate was false in the first iteration then the predicate
   //     is never evaluated again, since the loop exits without taking the
   //     backedge.
   //   * if the predicate was true in the first iteration then it will
   //     continue to be true for all future iterations since it is
   //     monotonically increasing.
   //
   // For both the above possibilities, we can replace the loop varying
   // predicate with its value on the first iteration of the loop (which is
   // loop invariant).
   //
   // A similar reasoning applies for a monotonically decreasing predicate, by
   // replacing true with false and false with true in the above two bullets.
 
   auto P = Increasing ? Pred : ICmpInst::getInversePredicate(Pred);
 
   if (!isLoopBackedgeGuardedByCond(L, P, LHS, RHS))
     return false;
 
   InvariantPred = Pred;
   InvariantLHS = ArLHS->getStart();
   InvariantRHS = RHS;
   return true;
 }
 
 bool ScalarEvolution::isKnownPredicateViaConstantRanges(
     ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) {
   if (HasSameValue(LHS, RHS))
     return ICmpInst::isTrueWhenEqual(Pred);
 
   // This code is split out from isKnownPredicate because it is called from
   // within isLoopEntryGuardedByCond.
 
   auto CheckRanges =
       [&](const ConstantRange &RangeLHS, const ConstantRange &RangeRHS) {
     return ConstantRange::makeSatisfyingICmpRegion(Pred, RangeRHS)
         .contains(RangeLHS);
   };
 
   // The check at the top of the function catches the case where the values are
   // known to be equal.
   if (Pred == CmpInst::ICMP_EQ)
     return false;
 
   if (Pred == CmpInst::ICMP_NE)
     return CheckRanges(getSignedRange(LHS), getSignedRange(RHS)) ||
            CheckRanges(getUnsignedRange(LHS), getUnsignedRange(RHS)) ||
            isKnownNonZero(getMinusSCEV(LHS, RHS));
 
   if (CmpInst::isSigned(Pred))
     return CheckRanges(getSignedRange(LHS), getSignedRange(RHS));
 
   return CheckRanges(getUnsignedRange(LHS), getUnsignedRange(RHS));
 }
 
 bool ScalarEvolution::isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred,
                                                     const SCEV *LHS,
                                                     const SCEV *RHS) {
   // Match Result to (X + Y)<ExpectedFlags> where Y is a constant integer.
   // Return Y via OutY.
   auto MatchBinaryAddToConst =
       [this](const SCEV *Result, const SCEV *X, APInt &OutY,
              SCEV::NoWrapFlags ExpectedFlags) {
     const SCEV *NonConstOp, *ConstOp;
     SCEV::NoWrapFlags FlagsPresent;
 
     if (!splitBinaryAdd(Result, ConstOp, NonConstOp, FlagsPresent) ||
         !isa<SCEVConstant>(ConstOp) || NonConstOp != X)
       return false;
 
     OutY = cast<SCEVConstant>(ConstOp)->getAPInt();
     return (FlagsPresent & ExpectedFlags) == ExpectedFlags;
   };
 
   APInt C;
 
   switch (Pred) {
   default:
     break;
 
   case ICmpInst::ICMP_SGE:
     std::swap(LHS, RHS);
     LLVM_FALLTHROUGH;
   case ICmpInst::ICMP_SLE:
     // X s<= (X + C)<nsw> if C >= 0
     if (MatchBinaryAddToConst(RHS, LHS, C, SCEV::FlagNSW) && C.isNonNegative())
       return true;
 
     // (X + C)<nsw> s<= X if C <= 0
     if (MatchBinaryAddToConst(LHS, RHS, C, SCEV::FlagNSW) &&
         !C.isStrictlyPositive())
       return true;
     break;
 
   case ICmpInst::ICMP_SGT:
     std::swap(LHS, RHS);
     LLVM_FALLTHROUGH;
   case ICmpInst::ICMP_SLT:
     // X s< (X + C)<nsw> if C > 0
     if (MatchBinaryAddToConst(RHS, LHS, C, SCEV::FlagNSW) &&
         C.isStrictlyPositive())
       return true;
 
     // (X + C)<nsw> s< X if C < 0
     if (MatchBinaryAddToConst(LHS, RHS, C, SCEV::FlagNSW) && C.isNegative())
       return true;
     break;
   }
 
   return false;
 }
 
 bool ScalarEvolution::isKnownPredicateViaSplitting(ICmpInst::Predicate Pred,
                                                    const SCEV *LHS,
                                                    const SCEV *RHS) {
   if (Pred != ICmpInst::ICMP_ULT || ProvingSplitPredicate)
     return false;
 
   // Allowing arbitrary number of activations of isKnownPredicateViaSplitting on
   // the stack can result in exponential time complexity.
   SaveAndRestore<bool> Restore(ProvingSplitPredicate, true);
 
   // If L >= 0 then I `ult` L <=> I >= 0 && I `slt` L
   //
   // To prove L >= 0 we use isKnownNonNegative whereas to prove I >= 0 we use
   // isKnownPredicate.  isKnownPredicate is more powerful, but also more
   // expensive; and using isKnownNonNegative(RHS) is sufficient for most of the
   // interesting cases seen in practice.  We can consider "upgrading" L >= 0 to
   // use isKnownPredicate later if needed.
   return isKnownNonNegative(RHS) &&
          isKnownPredicate(CmpInst::ICMP_SGE, LHS, getZero(LHS->getType())) &&
          isKnownPredicate(CmpInst::ICMP_SLT, LHS, RHS);
 }
 
 bool ScalarEvolution::isImpliedViaGuard(BasicBlock *BB,
                                         ICmpInst::Predicate Pred,
                                         const SCEV *LHS, const SCEV *RHS) {
   // No need to even try if we know the module has no guards.
   if (!HasGuards)
     return false;
 
   return any_of(*BB, [&](Instruction &I) {
     using namespace llvm::PatternMatch;
 
     Value *Condition;
     return match(&I, m_Intrinsic<Intrinsic::experimental_guard>(
                          m_Value(Condition))) &&
            isImpliedCond(Pred, LHS, RHS, Condition, false);
   });
 }
 
 /// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is
 /// protected by a conditional between LHS and RHS.  This is used to
 /// to eliminate casts.
 bool
 ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
                                              ICmpInst::Predicate Pred,
                                              const SCEV *LHS, const SCEV *RHS) {
   // Interpret a null as meaning no loop, where there is obviously no guard
   // (interprocedural conditions notwithstanding).
   if (!L) return true;
 
   if (isKnownPredicateViaConstantRanges(Pred, LHS, RHS))
     return true;
 
   BasicBlock *Latch = L->getLoopLatch();
   if (!Latch)
     return false;
 
   BranchInst *LoopContinuePredicate =
     dyn_cast<BranchInst>(Latch->getTerminator());
   if (LoopContinuePredicate && LoopContinuePredicate->isConditional() &&
       isImpliedCond(Pred, LHS, RHS,
                     LoopContinuePredicate->getCondition(),
                     LoopContinuePredicate->getSuccessor(0) != L->getHeader()))
     return true;
 
   // We don't want more than one activation of the following loops on the stack
   // -- that can lead to O(n!) time complexity.
   if (WalkingBEDominatingConds)
     return false;
 
   SaveAndRestore<bool> ClearOnExit(WalkingBEDominatingConds, true);
 
   // See if we can exploit a trip count to prove the predicate.
   const auto &BETakenInfo = getBackedgeTakenInfo(L);
   const SCEV *LatchBECount = BETakenInfo.getExact(Latch, this);
   if (LatchBECount != getCouldNotCompute()) {
     // We know that Latch branches back to the loop header exactly
     // LatchBECount times.  This means the backdege condition at Latch is
     // equivalent to  "{0,+,1} u< LatchBECount".
     Type *Ty = LatchBECount->getType();
     auto NoWrapFlags = SCEV::NoWrapFlags(SCEV::FlagNUW | SCEV::FlagNW);
     const SCEV *LoopCounter =
       getAddRecExpr(getZero(Ty), getOne(Ty), L, NoWrapFlags);
     if (isImpliedCond(Pred, LHS, RHS, ICmpInst::ICMP_ULT, LoopCounter,
                       LatchBECount))
       return true;
   }
 
   // Check conditions due to any @llvm.assume intrinsics.
   for (auto &AssumeVH : AC.assumptions()) {
     if (!AssumeVH)
       continue;
     auto *CI = cast<CallInst>(AssumeVH);
     if (!DT.dominates(CI, Latch->getTerminator()))
       continue;
 
     if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false))
       return true;
   }
 
   // If the loop is not reachable from the entry block, we risk running into an
   // infinite loop as we walk up into the dom tree.  These loops do not matter
   // anyway, so we just return a conservative answer when we see them.
   if (!DT.isReachableFromEntry(L->getHeader()))
     return false;
 
   if (isImpliedViaGuard(Latch, Pred, LHS, RHS))
     return true;
 
   for (DomTreeNode *DTN = DT[Latch], *HeaderDTN = DT[L->getHeader()];
        DTN != HeaderDTN; DTN = DTN->getIDom()) {
     assert(DTN && "should reach the loop header before reaching the root!");
 
     BasicBlock *BB = DTN->getBlock();
     if (isImpliedViaGuard(BB, Pred, LHS, RHS))
       return true;
 
     BasicBlock *PBB = BB->getSinglePredecessor();
     if (!PBB)
       continue;
 
     BranchInst *ContinuePredicate = dyn_cast<BranchInst>(PBB->getTerminator());
     if (!ContinuePredicate || !ContinuePredicate->isConditional())
       continue;
 
     Value *Condition = ContinuePredicate->getCondition();
 
     // If we have an edge `E` within the loop body that dominates the only
     // latch, the condition guarding `E` also guards the backedge.  This
     // reasoning works only for loops with a single latch.
 
     BasicBlockEdge DominatingEdge(PBB, BB);
     if (DominatingEdge.isSingleEdge()) {
       // We're constructively (and conservatively) enumerating edges within the
       // loop body that dominate the latch.  The dominator tree better agree
       // with us on this:
       assert(DT.dominates(DominatingEdge, Latch) && "should be!");
 
       if (isImpliedCond(Pred, LHS, RHS, Condition,
                         BB != ContinuePredicate->getSuccessor(0)))
         return true;
     }
   }
 
   return false;
 }
 
 bool
 ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
                                           ICmpInst::Predicate Pred,
                                           const SCEV *LHS, const SCEV *RHS) {
   // Interpret a null as meaning no loop, where there is obviously no guard
   // (interprocedural conditions notwithstanding).
   if (!L) return false;
 
   if (isKnownPredicateViaConstantRanges(Pred, LHS, RHS))
     return true;
 
   // Starting at the loop predecessor, climb up the predecessor chain, as long
   // as there are predecessors that can be found that have unique successors
   // leading to the original header.
   for (std::pair<BasicBlock *, BasicBlock *>
          Pair(L->getLoopPredecessor(), L->getHeader());
        Pair.first;
        Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) {
 
     if (isImpliedViaGuard(Pair.first, Pred, LHS, RHS))
       return true;
 
     BranchInst *LoopEntryPredicate =
       dyn_cast<BranchInst>(Pair.first->getTerminator());
     if (!LoopEntryPredicate ||
         LoopEntryPredicate->isUnconditional())
       continue;
 
     if (isImpliedCond(Pred, LHS, RHS,
                       LoopEntryPredicate->getCondition(),
                       LoopEntryPredicate->getSuccessor(0) != Pair.second))
       return true;
   }
 
   // Check conditions due to any @llvm.assume intrinsics.
   for (auto &AssumeVH : AC.assumptions()) {
     if (!AssumeVH)
       continue;
     auto *CI = cast<CallInst>(AssumeVH);
     if (!DT.dominates(CI, L->getHeader()))
       continue;
 
     if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false))
       return true;
   }
 
   return false;
 }
 
 bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
                                     const SCEV *LHS, const SCEV *RHS,
                                     Value *FoundCondValue,
                                     bool Inverse) {
   if (!PendingLoopPredicates.insert(FoundCondValue).second)
     return false;
 
   auto ClearOnExit =
       make_scope_exit([&]() { PendingLoopPredicates.erase(FoundCondValue); });
 
   // Recursively handle And and Or conditions.
   if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FoundCondValue)) {
     if (BO->getOpcode() == Instruction::And) {
       if (!Inverse)
         return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) ||
                isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse);
     } else if (BO->getOpcode() == Instruction::Or) {
       if (Inverse)
         return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) ||
                isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse);
     }
   }
 
   ICmpInst *ICI = dyn_cast<ICmpInst>(FoundCondValue);
   if (!ICI) return false;
 
   // Now that we found a conditional branch that dominates the loop or controls
   // the loop latch. Check to see if it is the comparison we are looking for.
   ICmpInst::Predicate FoundPred;
   if (Inverse)
     FoundPred = ICI->getInversePredicate();
   else
     FoundPred = ICI->getPredicate();
 
   const SCEV *FoundLHS = getSCEV(ICI->getOperand(0));
   const SCEV *FoundRHS = getSCEV(ICI->getOperand(1));
 
   return isImpliedCond(Pred, LHS, RHS, FoundPred, FoundLHS, FoundRHS);
 }
 
 bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
                                     const SCEV *RHS,
                                     ICmpInst::Predicate FoundPred,
                                     const SCEV *FoundLHS,
                                     const SCEV *FoundRHS) {
   // Balance the types.
   if (getTypeSizeInBits(LHS->getType()) <
       getTypeSizeInBits(FoundLHS->getType())) {
     if (CmpInst::isSigned(Pred)) {
       LHS = getSignExtendExpr(LHS, FoundLHS->getType());
       RHS = getSignExtendExpr(RHS, FoundLHS->getType());
     } else {
       LHS = getZeroExtendExpr(LHS, FoundLHS->getType());
       RHS = getZeroExtendExpr(RHS, FoundLHS->getType());
     }
   } else if (getTypeSizeInBits(LHS->getType()) >
       getTypeSizeInBits(FoundLHS->getType())) {
     if (CmpInst::isSigned(FoundPred)) {
       FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType());
       FoundRHS = getSignExtendExpr(FoundRHS, LHS->getType());
     } else {
       FoundLHS = getZeroExtendExpr(FoundLHS, LHS->getType());
       FoundRHS = getZeroExtendExpr(FoundRHS, LHS->getType());
     }
   }
 
   // Canonicalize the query to match the way instcombine will have
   // canonicalized the comparison.
   if (SimplifyICmpOperands(Pred, LHS, RHS))
     if (LHS == RHS)
       return CmpInst::isTrueWhenEqual(Pred);
   if (SimplifyICmpOperands(FoundPred, FoundLHS, FoundRHS))
     if (FoundLHS == FoundRHS)
       return CmpInst::isFalseWhenEqual(FoundPred);
 
   // Check to see if we can make the LHS or RHS match.
   if (LHS == FoundRHS || RHS == FoundLHS) {
     if (isa<SCEVConstant>(RHS)) {
       std::swap(FoundLHS, FoundRHS);
       FoundPred = ICmpInst::getSwappedPredicate(FoundPred);
     } else {
       std::swap(LHS, RHS);
       Pred = ICmpInst::getSwappedPredicate(Pred);
     }
   }
 
   // Check whether the found predicate is the same as the desired predicate.
   if (FoundPred == Pred)
     return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS);
 
   // Check whether swapping the found predicate makes it the same as the
   // desired predicate.
   if (ICmpInst::getSwappedPredicate(FoundPred) == Pred) {
     if (isa<SCEVConstant>(RHS))
       return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS);
     else
       return isImpliedCondOperands(ICmpInst::getSwappedPredicate(Pred),
                                    RHS, LHS, FoundLHS, FoundRHS);
   }
 
   // Unsigned comparison is the same as signed comparison when both the operands
   // are non-negative.
   if (CmpInst::isUnsigned(FoundPred) &&
       CmpInst::getSignedPredicate(FoundPred) == Pred &&
       isKnownNonNegative(FoundLHS) && isKnownNonNegative(FoundRHS))
     return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS);
 
   // Check if we can make progress by sharpening ranges.
   if (FoundPred == ICmpInst::ICMP_NE &&
       (isa<SCEVConstant>(FoundLHS) || isa<SCEVConstant>(FoundRHS))) {
 
     const SCEVConstant *C = nullptr;
     const SCEV *V = nullptr;
 
     if (isa<SCEVConstant>(FoundLHS)) {
       C = cast<SCEVConstant>(FoundLHS);
       V = FoundRHS;
     } else {
       C = cast<SCEVConstant>(FoundRHS);
       V = FoundLHS;
     }
 
     // The guarding predicate tells us that C != V. If the known range
     // of V is [C, t), we can sharpen the range to [C + 1, t).  The
     // range we consider has to correspond to same signedness as the
     // predicate we're interested in folding.
 
     APInt Min = ICmpInst::isSigned(Pred) ?
         getSignedRangeMin(V) : getUnsignedRangeMin(V);
 
     if (Min == C->getAPInt()) {
       // Given (V >= Min && V != Min) we conclude V >= (Min + 1).
       // This is true even if (Min + 1) wraps around -- in case of
       // wraparound, (Min + 1) < Min, so (V >= Min => V >= (Min + 1)).
 
       APInt SharperMin = Min + 1;
 
       switch (Pred) {
         case ICmpInst::ICMP_SGE:
         case ICmpInst::ICMP_UGE:
           // We know V `Pred` SharperMin.  If this implies LHS `Pred`
           // RHS, we're done.
           if (isImpliedCondOperands(Pred, LHS, RHS, V,
                                     getConstant(SharperMin)))
             return true;
           LLVM_FALLTHROUGH;
 
         case ICmpInst::ICMP_SGT:
         case ICmpInst::ICMP_UGT:
           // We know from the range information that (V `Pred` Min ||
           // V == Min).  We know from the guarding condition that !(V
           // == Min).  This gives us
           //
           //       V `Pred` Min || V == Min && !(V == Min)
           //   =>  V `Pred` Min
           //
           // If V `Pred` Min implies LHS `Pred` RHS, we're done.
 
           if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(Min)))
             return true;
           LLVM_FALLTHROUGH;
 
         default:
           // No change
           break;
       }
     }
   }
 
   // Check whether the actual condition is beyond sufficient.
   if (FoundPred == ICmpInst::ICMP_EQ)
     if (ICmpInst::isTrueWhenEqual(Pred))
       if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS))
         return true;
   if (Pred == ICmpInst::ICMP_NE)
     if (!ICmpInst::isTrueWhenEqual(FoundPred))
       if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS))
         return true;
 
   // Otherwise assume the worst.
   return false;
 }
 
 bool ScalarEvolution::splitBinaryAdd(const SCEV *Expr,
                                      const SCEV *&L, const SCEV *&R,
                                      SCEV::NoWrapFlags &Flags) {
   const auto *AE = dyn_cast<SCEVAddExpr>(Expr);
   if (!AE || AE->getNumOperands() != 2)
     return false;
 
   L = AE->getOperand(0);
   R = AE->getOperand(1);
   Flags = AE->getNoWrapFlags();
   return true;
 }
 
 Optional<APInt> ScalarEvolution::computeConstantDifference(const SCEV *More,
                                                            const SCEV *Less) {
   // We avoid subtracting expressions here because this function is usually
   // fairly deep in the call stack (i.e. is called many times).
 
   if (isa<SCEVAddRecExpr>(Less) && isa<SCEVAddRecExpr>(More)) {
     const auto *LAR = cast<SCEVAddRecExpr>(Less);
     const auto *MAR = cast<SCEVAddRecExpr>(More);
 
     if (LAR->getLoop() != MAR->getLoop())
       return None;
 
     // We look at affine expressions only; not for correctness but to keep
     // getStepRecurrence cheap.
     if (!LAR->isAffine() || !MAR->isAffine())
       return None;
 
     if (LAR->getStepRecurrence(*this) != MAR->getStepRecurrence(*this))
       return None;
 
     Less = LAR->getStart();
     More = MAR->getStart();
 
     // fall through
   }
 
   if (isa<SCEVConstant>(Less) && isa<SCEVConstant>(More)) {
     const auto &M = cast<SCEVConstant>(More)->getAPInt();
     const auto &L = cast<SCEVConstant>(Less)->getAPInt();
     return M - L;
   }
 
   const SCEV *L, *R;
   SCEV::NoWrapFlags Flags;
   if (splitBinaryAdd(Less, L, R, Flags))
     if (const auto *LC = dyn_cast<SCEVConstant>(L))
       if (R == More)
         return -(LC->getAPInt());
 
   if (splitBinaryAdd(More, L, R, Flags))
     if (const auto *LC = dyn_cast<SCEVConstant>(L))
       if (R == Less)
         return LC->getAPInt();
 
   return None;
 }
 
 bool ScalarEvolution::isImpliedCondOperandsViaNoOverflow(
     ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS,
     const SCEV *FoundLHS, const SCEV *FoundRHS) {
   if (Pred != CmpInst::ICMP_SLT && Pred != CmpInst::ICMP_ULT)
     return false;
 
   const auto *AddRecLHS = dyn_cast<SCEVAddRecExpr>(LHS);
   if (!AddRecLHS)
     return false;
 
   const auto *AddRecFoundLHS = dyn_cast<SCEVAddRecExpr>(FoundLHS);
   if (!AddRecFoundLHS)
     return false;
 
   // We'd like to let SCEV reason about control dependencies, so we constrain
   // both the inequalities to be about add recurrences on the same loop.  This
   // way we can use isLoopEntryGuardedByCond later.
 
   const Loop *L = AddRecFoundLHS->getLoop();
   if (L != AddRecLHS->getLoop())
     return false;
 
   //  FoundLHS u< FoundRHS u< -C =>  (FoundLHS + C) u< (FoundRHS + C) ... (1)
   //
   //  FoundLHS s< FoundRHS s< INT_MIN - C => (FoundLHS + C) s< (FoundRHS + C)
   //                                                                  ... (2)
   //
   // Informal proof for (2), assuming (1) [*]:
   //
   // We'll also assume (A s< B) <=> ((A + INT_MIN) u< (B + INT_MIN)) ... (3)[**]
   //
   // Then
   //
   //       FoundLHS s< FoundRHS s< INT_MIN - C
   // <=>  (FoundLHS + INT_MIN) u< (FoundRHS + INT_MIN) u< -C   [ using (3) ]
   // <=>  (FoundLHS + INT_MIN + C) u< (FoundRHS + INT_MIN + C) [ using (1) ]
   // <=>  (FoundLHS + INT_MIN + C + INT_MIN) s<
   //                        (FoundRHS + INT_MIN + C + INT_MIN) [ using (3) ]
   // <=>  FoundLHS + C s< FoundRHS + C
   //
   // [*]: (1) can be proved by ruling out overflow.
   //
   // [**]: This can be proved by analyzing all the four possibilities:
   //    (A s< 0, B s< 0), (A s< 0, B s>= 0), (A s>= 0, B s< 0) and
   //    (A s>= 0, B s>= 0).
   //
   // Note:
   // Despite (2), "FoundRHS s< INT_MIN - C" does not mean that "FoundRHS + C"
   // will not sign underflow.  For instance, say FoundLHS = (i8 -128), FoundRHS
   // = (i8 -127) and C = (i8 -100).  Then INT_MIN - C = (i8 -28), and FoundRHS
   // s< (INT_MIN - C).  Lack of sign overflow / underflow in "FoundRHS + C" is
   // neither necessary nor sufficient to prove "(FoundLHS + C) s< (FoundRHS +
   // C)".
 
   Optional<APInt> LDiff = computeConstantDifference(LHS, FoundLHS);
   Optional<APInt> RDiff = computeConstantDifference(RHS, FoundRHS);
   if (!LDiff || !RDiff || *LDiff != *RDiff)
     return false;
 
   if (LDiff->isMinValue())
     return true;
 
   APInt FoundRHSLimit;
 
   if (Pred == CmpInst::ICMP_ULT) {
     FoundRHSLimit = -(*RDiff);
   } else {
     assert(Pred == CmpInst::ICMP_SLT && "Checked above!");
     FoundRHSLimit = APInt::getSignedMinValue(getTypeSizeInBits(RHS->getType())) - *RDiff;
   }
 
   // Try to prove (1) or (2), as needed.
   return isLoopEntryGuardedByCond(L, Pred, FoundRHS,
                                   getConstant(FoundRHSLimit));
 }
 
 bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
                                             const SCEV *LHS, const SCEV *RHS,
                                             const SCEV *FoundLHS,
                                             const SCEV *FoundRHS) {
   if (isImpliedCondOperandsViaRanges(Pred, LHS, RHS, FoundLHS, FoundRHS))
     return true;
 
   if (isImpliedCondOperandsViaNoOverflow(Pred, LHS, RHS, FoundLHS, FoundRHS))
     return true;
 
   return isImpliedCondOperandsHelper(Pred, LHS, RHS,
                                      FoundLHS, FoundRHS) ||
          // ~x < ~y --> x > y
          isImpliedCondOperandsHelper(Pred, LHS, RHS,
                                      getNotSCEV(FoundRHS),
                                      getNotSCEV(FoundLHS));
 }
 
 /// If Expr computes ~A, return A else return nullptr
 static const SCEV *MatchNotExpr(const SCEV *Expr) {
   const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Expr);
   if (!Add || Add->getNumOperands() != 2 ||
       !Add->getOperand(0)->isAllOnesValue())
     return nullptr;
 
   const SCEVMulExpr *AddRHS = dyn_cast<SCEVMulExpr>(Add->getOperand(1));
   if (!AddRHS || AddRHS->getNumOperands() != 2 ||
       !AddRHS->getOperand(0)->isAllOnesValue())
     return nullptr;
 
   return AddRHS->getOperand(1);
 }
 
 /// Is MaybeMaxExpr an SMax or UMax of Candidate and some other values?
 template<typename MaxExprType>
 static bool IsMaxConsistingOf(const SCEV *MaybeMaxExpr,
                               const SCEV *Candidate) {
   const MaxExprType *MaxExpr = dyn_cast<MaxExprType>(MaybeMaxExpr);
   if (!MaxExpr) return false;
 
   return find(MaxExpr->operands(), Candidate) != MaxExpr->op_end();
 }
 
 /// Is MaybeMinExpr an SMin or UMin of Candidate and some other values?
 template<typename MaxExprType>
 static bool IsMinConsistingOf(ScalarEvolution &SE,
                               const SCEV *MaybeMinExpr,
                               const SCEV *Candidate) {
   const SCEV *MaybeMaxExpr = MatchNotExpr(MaybeMinExpr);
   if (!MaybeMaxExpr)
     return false;
 
   return IsMaxConsistingOf<MaxExprType>(MaybeMaxExpr, SE.getNotSCEV(Candidate));
 }
 
 static bool IsKnownPredicateViaAddRecStart(ScalarEvolution &SE,
                                            ICmpInst::Predicate Pred,
                                            const SCEV *LHS, const SCEV *RHS) {
   // If both sides are affine addrecs for the same loop, with equal
   // steps, and we know the recurrences don't wrap, then we only
   // need to check the predicate on the starting values.
 
   if (!ICmpInst::isRelational(Pred))
     return false;
 
   const SCEVAddRecExpr *LAR = dyn_cast<SCEVAddRecExpr>(LHS);
   if (!LAR)
     return false;
   const SCEVAddRecExpr *RAR = dyn_cast<SCEVAddRecExpr>(RHS);
   if (!RAR)
     return false;
   if (LAR->getLoop() != RAR->getLoop())
     return false;
   if (!LAR->isAffine() || !RAR->isAffine())
     return false;
 
   if (LAR->getStepRecurrence(SE) != RAR->getStepRecurrence(SE))
     return false;
 
   SCEV::NoWrapFlags NW = ICmpInst::isSigned(Pred) ?
                          SCEV::FlagNSW : SCEV::FlagNUW;
   if (!LAR->getNoWrapFlags(NW) || !RAR->getNoWrapFlags(NW))
     return false;
 
   return SE.isKnownPredicate(Pred, LAR->getStart(), RAR->getStart());
 }
 
 /// Is LHS `Pred` RHS true on the virtue of LHS or RHS being a Min or Max
 /// expression?
 static bool IsKnownPredicateViaMinOrMax(ScalarEvolution &SE,
                                         ICmpInst::Predicate Pred,
                                         const SCEV *LHS, const SCEV *RHS) {
   switch (Pred) {
   default:
     return false;
 
   case ICmpInst::ICMP_SGE:
     std::swap(LHS, RHS);
     LLVM_FALLTHROUGH;
   case ICmpInst::ICMP_SLE:
     return
       // min(A, ...) <= A
       IsMinConsistingOf<SCEVSMaxExpr>(SE, LHS, RHS) ||
       // A <= max(A, ...)
       IsMaxConsistingOf<SCEVSMaxExpr>(RHS, LHS);
 
   case ICmpInst::ICMP_UGE:
     std::swap(LHS, RHS);
     LLVM_FALLTHROUGH;
   case ICmpInst::ICMP_ULE:
     return
       // min(A, ...) <= A
       IsMinConsistingOf<SCEVUMaxExpr>(SE, LHS, RHS) ||
       // A <= max(A, ...)
       IsMaxConsistingOf<SCEVUMaxExpr>(RHS, LHS);
   }
 
   llvm_unreachable("covered switch fell through?!");
 }
 
 bool ScalarEvolution::isImpliedViaOperations(ICmpInst::Predicate Pred,
                                              const SCEV *LHS, const SCEV *RHS,
                                              const SCEV *FoundLHS,
                                              const SCEV *FoundRHS,
                                              unsigned Depth) {
   assert(getTypeSizeInBits(LHS->getType()) ==
              getTypeSizeInBits(RHS->getType()) &&
          "LHS and RHS have different sizes?");
   assert(getTypeSizeInBits(FoundLHS->getType()) ==
              getTypeSizeInBits(FoundRHS->getType()) &&
          "FoundLHS and FoundRHS have different sizes?");
   // We want to avoid hurting the compile time with analysis of too big trees.
   if (Depth > MaxSCEVOperationsImplicationDepth)
     return false;
   // We only want to work with ICMP_SGT comparison so far.
   // TODO: Extend to ICMP_UGT?
   if (Pred == ICmpInst::ICMP_SLT) {
     Pred = ICmpInst::ICMP_SGT;
     std::swap(LHS, RHS);
     std::swap(FoundLHS, FoundRHS);
   }
   if (Pred != ICmpInst::ICMP_SGT)
     return false;
 
   auto GetOpFromSExt = [&](const SCEV *S) {
     if (auto *Ext = dyn_cast<SCEVSignExtendExpr>(S))
       return Ext->getOperand();
     // TODO: If S is a SCEVConstant then you can cheaply "strip" the sext off
     // the constant in some cases.
     return S;
   };
 
   // Acquire values from extensions.
   auto *OrigFoundLHS = FoundLHS;
   LHS = GetOpFromSExt(LHS);
   FoundLHS = GetOpFromSExt(FoundLHS);
 
   // Is the SGT predicate can be proved trivially or using the found context.
   auto IsSGTViaContext = [&](const SCEV *S1, const SCEV *S2) {
     return isKnownViaSimpleReasoning(ICmpInst::ICMP_SGT, S1, S2) ||
            isImpliedViaOperations(ICmpInst::ICMP_SGT, S1, S2, OrigFoundLHS,
                                   FoundRHS, Depth + 1);
   };
 
   if (auto *LHSAddExpr = dyn_cast<SCEVAddExpr>(LHS)) {
     // We want to avoid creation of any new non-constant SCEV. Since we are
     // going to compare the operands to RHS, we should be certain that we don't
     // need any size extensions for this. So let's decline all cases when the
     // sizes of types of LHS and RHS do not match.
     // TODO: Maybe try to get RHS from sext to catch more cases?
     if (getTypeSizeInBits(LHS->getType()) != getTypeSizeInBits(RHS->getType()))
       return false;
 
     // Should not overflow.
     if (!LHSAddExpr->hasNoSignedWrap())
       return false;
 
     auto *LL = LHSAddExpr->getOperand(0);
     auto *LR = LHSAddExpr->getOperand(1);
     auto *MinusOne = getNegativeSCEV(getOne(RHS->getType()));
 
     // Checks that S1 >= 0 && S2 > RHS, trivially or using the found context.
     auto IsSumGreaterThanRHS = [&](const SCEV *S1, const SCEV *S2) {
       return IsSGTViaContext(S1, MinusOne) && IsSGTViaContext(S2, RHS);
     };
     // Try to prove the following rule:
     // (LHS = LL + LR) && (LL >= 0) && (LR > RHS) => (LHS > RHS).
     // (LHS = LL + LR) && (LR >= 0) && (LL > RHS) => (LHS > RHS).
     if (IsSumGreaterThanRHS(LL, LR) || IsSumGreaterThanRHS(LR, LL))
       return true;
   } else if (auto *LHSUnknownExpr = dyn_cast<SCEVUnknown>(LHS)) {
     Value *LL, *LR;
     // FIXME: Once we have SDiv implemented, we can get rid of this matching.
 
     using namespace llvm::PatternMatch;
 
     if (match(LHSUnknownExpr->getValue(), m_SDiv(m_Value(LL), m_Value(LR)))) {
       // Rules for division.
       // We are going to perform some comparisons with Denominator and its
       // derivative expressions. In general case, creating a SCEV for it may
       // lead to a complex analysis of the entire graph, and in particular it
       // can request trip count recalculation for the same loop. This would
       // cache as SCEVCouldNotCompute to avoid the infinite recursion. To avoid
       // this, we only want to create SCEVs that are constants in this section.
       // So we bail if Denominator is not a constant.
       if (!isa<ConstantInt>(LR))
         return false;
 
       auto *Denominator = cast<SCEVConstant>(getSCEV(LR));
 
       // We want to make sure that LHS = FoundLHS / Denominator. If it is so,
       // then a SCEV for the numerator already exists and matches with FoundLHS.
       auto *Numerator = getExistingSCEV(LL);
       if (!Numerator || Numerator->getType() != FoundLHS->getType())
         return false;
 
       // Make sure that the numerator matches with FoundLHS and the denominator
       // is positive.
       if (!HasSameValue(Numerator, FoundLHS) || !isKnownPositive(Denominator))
         return false;
 
       auto *DTy = Denominator->getType();
       auto *FRHSTy = FoundRHS->getType();
       if (DTy->isPointerTy() != FRHSTy->isPointerTy())
         // One of types is a pointer and another one is not. We cannot extend
         // them properly to a wider type, so let us just reject this case.
         // TODO: Usage of getEffectiveSCEVType for DTy, FRHSTy etc should help
         // to avoid this check.
         return false;
 
       // Given that:
       // FoundLHS > FoundRHS, LHS = FoundLHS / Denominator, Denominator > 0.
       auto *WTy = getWiderType(DTy, FRHSTy);
       auto *DenominatorExt = getNoopOrSignExtend(Denominator, WTy);
       auto *FoundRHSExt = getNoopOrSignExtend(FoundRHS, WTy);
 
       // Try to prove the following rule:
       // (FoundRHS > Denominator - 2) && (RHS <= 0) => (LHS > RHS).
       // For example, given that FoundLHS > 2. It means that FoundLHS is at
       // least 3. If we divide it by Denominator < 4, we will have at least 1.
       auto *DenomMinusTwo = getMinusSCEV(DenominatorExt, getConstant(WTy, 2));
       if (isKnownNonPositive(RHS) &&
           IsSGTViaContext(FoundRHSExt, DenomMinusTwo))
         return true;
 
       // Try to prove the following rule:
       // (FoundRHS > -1 - Denominator) && (RHS < 0) => (LHS > RHS).
       // For example, given that FoundLHS > -3. Then FoundLHS is at least -2.
       // If we divide it by Denominator > 2, then:
       // 1. If FoundLHS is negative, then the result is 0.
       // 2. If FoundLHS is non-negative, then the result is non-negative.
       // Anyways, the result is non-negative.
       auto *MinusOne = getNegativeSCEV(getOne(WTy));
       auto *NegDenomMinusOne = getMinusSCEV(MinusOne, DenominatorExt);
       if (isKnownNegative(RHS) &&
           IsSGTViaContext(FoundRHSExt, NegDenomMinusOne))
         return true;
     }
   }
 
   return false;
 }
 
 bool
 ScalarEvolution::isKnownViaSimpleReasoning(ICmpInst::Predicate Pred,
                                            const SCEV *LHS, const SCEV *RHS) {
   return isKnownPredicateViaConstantRanges(Pred, LHS, RHS) ||
          IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS) ||
          IsKnownPredicateViaAddRecStart(*this, Pred, LHS, RHS) ||
          isKnownPredicateViaNoOverflow(Pred, LHS, RHS);
 }
 
 bool
 ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
                                              const SCEV *LHS, const SCEV *RHS,
                                              const SCEV *FoundLHS,
                                              const SCEV *FoundRHS) {
   switch (Pred) {
   default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
   case ICmpInst::ICMP_EQ:
   case ICmpInst::ICMP_NE:
     if (HasSameValue(LHS, FoundLHS) && HasSameValue(RHS, FoundRHS))
       return true;
     break;
   case ICmpInst::ICMP_SLT:
   case ICmpInst::ICMP_SLE:
     if (isKnownViaSimpleReasoning(ICmpInst::ICMP_SLE, LHS, FoundLHS) &&
         isKnownViaSimpleReasoning(ICmpInst::ICMP_SGE, RHS, FoundRHS))
       return true;
     break;
   case ICmpInst::ICMP_SGT:
   case ICmpInst::ICMP_SGE:
     if (isKnownViaSimpleReasoning(ICmpInst::ICMP_SGE, LHS, FoundLHS) &&
         isKnownViaSimpleReasoning(ICmpInst::ICMP_SLE, RHS, FoundRHS))
       return true;
     break;
   case ICmpInst::ICMP_ULT:
   case ICmpInst::ICMP_ULE:
     if (isKnownViaSimpleReasoning(ICmpInst::ICMP_ULE, LHS, FoundLHS) &&
         isKnownViaSimpleReasoning(ICmpInst::ICMP_UGE, RHS, FoundRHS))
       return true;
     break;
   case ICmpInst::ICMP_UGT:
   case ICmpInst::ICMP_UGE:
     if (isKnownViaSimpleReasoning(ICmpInst::ICMP_UGE, LHS, FoundLHS) &&
         isKnownViaSimpleReasoning(ICmpInst::ICMP_ULE, RHS, FoundRHS))
       return true;
     break;
   }
 
   // Maybe it can be proved via operations?
   if (isImpliedViaOperations(Pred, LHS, RHS, FoundLHS, FoundRHS))
     return true;
 
   return false;
 }
 
 bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred,
                                                      const SCEV *LHS,
                                                      const SCEV *RHS,
                                                      const SCEV *FoundLHS,
                                                      const SCEV *FoundRHS) {
   if (!isa<SCEVConstant>(RHS) || !isa<SCEVConstant>(FoundRHS))
     // The restriction on `FoundRHS` be lifted easily -- it exists only to
     // reduce the compile time impact of this optimization.
     return false;
 
   Optional<APInt> Addend = computeConstantDifference(LHS, FoundLHS);
   if (!Addend)
     return false;
 
   const APInt &ConstFoundRHS = cast<SCEVConstant>(FoundRHS)->getAPInt();
 
   // `FoundLHSRange` is the range we know `FoundLHS` to be in by virtue of the
   // antecedent "`FoundLHS` `Pred` `FoundRHS`".
   ConstantRange FoundLHSRange =
       ConstantRange::makeAllowedICmpRegion(Pred, ConstFoundRHS);
 
   // Since `LHS` is `FoundLHS` + `Addend`, we can compute a range for `LHS`:
   ConstantRange LHSRange = FoundLHSRange.add(ConstantRange(*Addend));
 
   // We can also compute the range of values for `LHS` that satisfy the
   // consequent, "`LHS` `Pred` `RHS`":
   const APInt &ConstRHS = cast<SCEVConstant>(RHS)->getAPInt();
   ConstantRange SatisfyingLHSRange =
       ConstantRange::makeSatisfyingICmpRegion(Pred, ConstRHS);
 
   // The antecedent implies the consequent if every value of `LHS` that
   // satisfies the antecedent also satisfies the consequent.
   return SatisfyingLHSRange.contains(LHSRange);
 }
 
 bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride,
                                          bool IsSigned, bool NoWrap) {
   assert(isKnownPositive(Stride) && "Positive stride expected!");
 
   if (NoWrap) return false;
 
   unsigned BitWidth = getTypeSizeInBits(RHS->getType());
   const SCEV *One = getOne(Stride->getType());
 
   if (IsSigned) {
     APInt MaxRHS = getSignedRangeMax(RHS);
     APInt MaxValue = APInt::getSignedMaxValue(BitWidth);
     APInt MaxStrideMinusOne = getSignedRangeMax(getMinusSCEV(Stride, One));
 
     // SMaxRHS + SMaxStrideMinusOne > SMaxValue => overflow!
     return (std::move(MaxValue) - MaxStrideMinusOne).slt(MaxRHS);
   }
 
   APInt MaxRHS = getUnsignedRangeMax(RHS);
   APInt MaxValue = APInt::getMaxValue(BitWidth);
   APInt MaxStrideMinusOne = getUnsignedRangeMax(getMinusSCEV(Stride, One));
 
   // UMaxRHS + UMaxStrideMinusOne > UMaxValue => overflow!
   return (std::move(MaxValue) - MaxStrideMinusOne).ult(MaxRHS);
 }
 
 bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride,
                                          bool IsSigned, bool NoWrap) {
   if (NoWrap) return false;
 
   unsigned BitWidth = getTypeSizeInBits(RHS->getType());
   const SCEV *One = getOne(Stride->getType());
 
   if (IsSigned) {
     APInt MinRHS = getSignedRangeMin(RHS);
     APInt MinValue = APInt::getSignedMinValue(BitWidth);
     APInt MaxStrideMinusOne = getSignedRangeMax(getMinusSCEV(Stride, One));
 
     // SMinRHS - SMaxStrideMinusOne < SMinValue => overflow!
     return (std::move(MinValue) + MaxStrideMinusOne).sgt(MinRHS);
   }
 
   APInt MinRHS = getUnsignedRangeMin(RHS);
   APInt MinValue = APInt::getMinValue(BitWidth);
   APInt MaxStrideMinusOne = getUnsignedRangeMax(getMinusSCEV(Stride, One));
 
   // UMinRHS - UMaxStrideMinusOne < UMinValue => overflow!
   return (std::move(MinValue) + MaxStrideMinusOne).ugt(MinRHS);
 }
 
 const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step,
                                             bool Equality) {
   const SCEV *One = getOne(Step->getType());
   Delta = Equality ? getAddExpr(Delta, Step)
                    : getAddExpr(Delta, getMinusSCEV(Step, One));
   return getUDivExpr(Delta, Step);
 }
 
 const SCEV *ScalarEvolution::computeMaxBECountForLT(const SCEV *Start,
                                                     const SCEV *Stride,
                                                     const SCEV *End,
                                                     unsigned BitWidth,
                                                     bool IsSigned) {
 
   assert(!isKnownNonPositive(Stride) &&
          "Stride is expected strictly positive!");
   // Calculate the maximum backedge count based on the range of values
   // permitted by Start, End, and Stride.
   const SCEV *MaxBECount;
   APInt MinStart =
       IsSigned ? getSignedRangeMin(Start) : getUnsignedRangeMin(Start);
 
   APInt StrideForMaxBECount =
       IsSigned ? getSignedRangeMin(Stride) : getUnsignedRangeMin(Stride);
 
   // We already know that the stride is positive, so we paper over conservatism
   // in our range computation by forcing StrideForMaxBECount to be at least one.
   // In theory this is unnecessary, but we expect MaxBECount to be a
   // SCEVConstant, and (udiv <constant> 0) is not constant folded by SCEV (there
   // is nothing to constant fold it to).
   APInt One(BitWidth, 1, IsSigned);
   StrideForMaxBECount = APIntOps::smax(One, StrideForMaxBECount);
 
   APInt MaxValue = IsSigned ? APInt::getSignedMaxValue(BitWidth)
                             : APInt::getMaxValue(BitWidth);
   APInt Limit = MaxValue - (StrideForMaxBECount - 1);
 
   // Although End can be a MAX expression we estimate MaxEnd considering only
   // the case End = RHS of the loop termination condition. This is safe because
   // in the other case (End - Start) is zero, leading to a zero maximum backedge
   // taken count.
   APInt MaxEnd = IsSigned ? APIntOps::smin(getSignedRangeMax(End), Limit)
                           : APIntOps::umin(getUnsignedRangeMax(End), Limit);
 
   MaxBECount = computeBECount(getConstant(MaxEnd - MinStart) /* Delta */,
                               getConstant(StrideForMaxBECount) /* Step */,
                               false /* Equality */);
 
   return MaxBECount;
 }
 
 ScalarEvolution::ExitLimit
 ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
                                   const Loop *L, bool IsSigned,
                                   bool ControlsExit, bool AllowPredicates) {
   SmallPtrSet<const SCEVPredicate *, 4> Predicates;
 
   const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
   bool PredicatedIV = false;
 
   if (!IV && AllowPredicates) {
     // Try to make this an AddRec using runtime tests, in the first X
     // iterations of this loop, where X is the SCEV expression found by the
     // algorithm below.
     IV = convertSCEVToAddRecWithPredicates(LHS, L, Predicates);
     PredicatedIV = true;
   }
 
   // Avoid weird loops
   if (!IV || IV->getLoop() != L || !IV->isAffine())
     return getCouldNotCompute();
 
   bool NoWrap = ControlsExit &&
                 IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW);
 
   const SCEV *Stride = IV->getStepRecurrence(*this);
 
   bool PositiveStride = isKnownPositive(Stride);
 
   // Avoid negative or zero stride values.
   if (!PositiveStride) {
     // We can compute the correct backedge taken count for loops with unknown
     // strides if we can prove that the loop is not an infinite loop with side
     // effects. Here's the loop structure we are trying to handle -
     //
     // i = start
     // do {
     //   A[i] = i;
     //   i += s;
     // } while (i < end);
     //
     // The backedge taken count for such loops is evaluated as -
     // (max(end, start + stride) - start - 1) /u stride
     //
     // The additional preconditions that we need to check to prove correctness
     // of the above formula is as follows -
     //
     // a) IV is either nuw or nsw depending upon signedness (indicated by the
     //    NoWrap flag).
     // b) loop is single exit with no side effects.
     //
     //
     // Precondition a) implies that if the stride is negative, this is a single
     // trip loop. The backedge taken count formula reduces to zero in this case.
     //
     // Precondition b) implies that the unknown stride cannot be zero otherwise
     // we have UB.
     //
     // The positive stride case is the same as isKnownPositive(Stride) returning
     // true (original behavior of the function).
     //
     // We want to make sure that the stride is truly unknown as there are edge
     // cases where ScalarEvolution propagates no wrap flags to the
     // post-increment/decrement IV even though the increment/decrement operation
     // itself is wrapping. The computed backedge taken count may be wrong in
     // such cases. This is prevented by checking that the stride is not known to
     // be either positive or non-positive. For example, no wrap flags are
     // propagated to the post-increment IV of this loop with a trip count of 2 -
     //
     // unsigned char i;
     // for(i=127; i<128; i+=129)
     //   A[i] = i;
     //
     if (PredicatedIV || !NoWrap || isKnownNonPositive(Stride) ||
         !loopHasNoSideEffects(L))
       return getCouldNotCompute();
   } else if (!Stride->isOne() &&
              doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap))
     // Avoid proven overflow cases: this will ensure that the backedge taken
     // count will not generate any unsigned overflow. Relaxed no-overflow
     // conditions exploit NoWrapFlags, allowing to optimize in presence of
     // undefined behaviors like the case of C language.
     return getCouldNotCompute();
 
   ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SLT
                                       : ICmpInst::ICMP_ULT;
   const SCEV *Start = IV->getStart();
   const SCEV *End = RHS;
   // When the RHS is not invariant, we do not know the end bound of the loop and
   // cannot calculate the ExactBECount needed by ExitLimit. However, we can
   // calculate the MaxBECount, given the start, stride and max value for the end
   // bound of the loop (RHS), and the fact that IV does not overflow (which is
   // checked above).
   if (!isLoopInvariant(RHS, L)) {
     const SCEV *MaxBECount = computeMaxBECountForLT(
         Start, Stride, RHS, getTypeSizeInBits(LHS->getType()), IsSigned);
     return ExitLimit(getCouldNotCompute() /* ExactNotTaken */, MaxBECount,
                      false /*MaxOrZero*/, Predicates);
   }
   // If the backedge is taken at least once, then it will be taken
   // (End-Start)/Stride times (rounded up to a multiple of Stride), where Start
   // is the LHS value of the less-than comparison the first time it is evaluated
   // and End is the RHS.
   const SCEV *BECountIfBackedgeTaken =
     computeBECount(getMinusSCEV(End, Start), Stride, false);
   // If the loop entry is guarded by the result of the backedge test of the
   // first loop iteration, then we know the backedge will be taken at least
   // once and so the backedge taken count is as above. If not then we use the
   // expression (max(End,Start)-Start)/Stride to describe the backedge count,
   // as if the backedge is taken at least once max(End,Start) is End and so the
   // result is as above, and if not max(End,Start) is Start so we get a backedge
   // count of zero.
   const SCEV *BECount;
   if (isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride), RHS))
     BECount = BECountIfBackedgeTaken;
   else {
     End = IsSigned ? getSMaxExpr(RHS, Start) : getUMaxExpr(RHS, Start);
     BECount = computeBECount(getMinusSCEV(End, Start), Stride, false);
   }
 
   const SCEV *MaxBECount;
   bool MaxOrZero = false;
   if (isa<SCEVConstant>(BECount))
     MaxBECount = BECount;
   else if (isa<SCEVConstant>(BECountIfBackedgeTaken)) {
     // If we know exactly how many times the backedge will be taken if it's
     // taken at least once, then the backedge count will either be that or
     // zero.
     MaxBECount = BECountIfBackedgeTaken;
     MaxOrZero = true;
   } else {
     MaxBECount = computeMaxBECountForLT(
         Start, Stride, RHS, getTypeSizeInBits(LHS->getType()), IsSigned);
   }
 
   if (isa<SCEVCouldNotCompute>(MaxBECount) &&
       !isa<SCEVCouldNotCompute>(BECount))
     MaxBECount = getConstant(getUnsignedRangeMax(BECount));
 
   return ExitLimit(BECount, MaxBECount, MaxOrZero, Predicates);
 }
 
 ScalarEvolution::ExitLimit
 ScalarEvolution::howManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
                                      const Loop *L, bool IsSigned,
                                      bool ControlsExit, bool AllowPredicates) {
   SmallPtrSet<const SCEVPredicate *, 4> Predicates;
   // We handle only IV > Invariant
   if (!isLoopInvariant(RHS, L))
     return getCouldNotCompute();
 
   const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
   if (!IV && AllowPredicates)
     // Try to make this an AddRec using runtime tests, in the first X
     // iterations of this loop, where X is the SCEV expression found by the
     // algorithm below.
     IV = convertSCEVToAddRecWithPredicates(LHS, L, Predicates);
 
   // Avoid weird loops
   if (!IV || IV->getLoop() != L || !IV->isAffine())
     return getCouldNotCompute();
 
   bool NoWrap = ControlsExit &&
                 IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW);
 
   const SCEV *Stride = getNegativeSCEV(IV->getStepRecurrence(*this));
 
   // Avoid negative or zero stride values
   if (!isKnownPositive(Stride))
     return getCouldNotCompute();
 
   // Avoid proven overflow cases: this will ensure that the backedge taken count
   // will not generate any unsigned overflow. Relaxed no-overflow conditions
   // exploit NoWrapFlags, allowing to optimize in presence of undefined
   // behaviors like the case of C language.
   if (!Stride->isOne() && doesIVOverflowOnGT(RHS, Stride, IsSigned, NoWrap))
     return getCouldNotCompute();
 
   ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SGT
                                       : ICmpInst::ICMP_UGT;
 
   const SCEV *Start = IV->getStart();
   const SCEV *End = RHS;
   if (!isLoopEntryGuardedByCond(L, Cond, getAddExpr(Start, Stride), RHS))
     End = IsSigned ? getSMinExpr(RHS, Start) : getUMinExpr(RHS, Start);
 
   const SCEV *BECount = computeBECount(getMinusSCEV(Start, End), Stride, false);
 
   APInt MaxStart = IsSigned ? getSignedRangeMax(Start)
                             : getUnsignedRangeMax(Start);
 
   APInt MinStride = IsSigned ? getSignedRangeMin(Stride)
                              : getUnsignedRangeMin(Stride);
 
   unsigned BitWidth = getTypeSizeInBits(LHS->getType());
   APInt Limit = IsSigned ? APInt::getSignedMinValue(BitWidth) + (MinStride - 1)
                          : APInt::getMinValue(BitWidth) + (MinStride - 1);
 
   // Although End can be a MIN expression we estimate MinEnd considering only
   // the case End = RHS. This is safe because in the other case (Start - End)
   // is zero, leading to a zero maximum backedge taken count.
   APInt MinEnd =
     IsSigned ? APIntOps::smax(getSignedRangeMin(RHS), Limit)
              : APIntOps::umax(getUnsignedRangeMin(RHS), Limit);
 
 
   const SCEV *MaxBECount = getCouldNotCompute();
   if (isa<SCEVConstant>(BECount))
     MaxBECount = BECount;
   else
     MaxBECount = computeBECount(getConstant(MaxStart - MinEnd),
                                 getConstant(MinStride), false);
 
   if (isa<SCEVCouldNotCompute>(MaxBECount))
     MaxBECount = BECount;
 
   return ExitLimit(BECount, MaxBECount, false, Predicates);
 }
 
 const SCEV *SCEVAddRecExpr::getNumIterationsInRange(const ConstantRange &Range,
                                                     ScalarEvolution &SE) const {
   if (Range.isFullSet())  // Infinite loop.
     return SE.getCouldNotCompute();
 
   // If the start is a non-zero constant, shift the range to simplify things.
   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(getStart()))
     if (!SC->getValue()->isZero()) {
       SmallVector<const SCEV *, 4> Operands(op_begin(), op_end());
       Operands[0] = SE.getZero(SC->getType());
       const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop(),
                                              getNoWrapFlags(FlagNW));
       if (const auto *ShiftedAddRec = dyn_cast<SCEVAddRecExpr>(Shifted))
         return ShiftedAddRec->getNumIterationsInRange(
             Range.subtract(SC->getAPInt()), SE);
       // This is strange and shouldn't happen.
       return SE.getCouldNotCompute();
     }
 
   // The only time we can solve this is when we have all constant indices.
   // Otherwise, we cannot determine the overflow conditions.
   if (any_of(operands(), [](const SCEV *Op) { return !isa<SCEVConstant>(Op); }))
     return SE.getCouldNotCompute();
 
   // Okay at this point we know that all elements of the chrec are constants and
   // that the start element is zero.
 
   // First check to see if the range contains zero.  If not, the first
   // iteration exits.
   unsigned BitWidth = SE.getTypeSizeInBits(getType());
   if (!Range.contains(APInt(BitWidth, 0)))
     return SE.getZero(getType());
 
   if (isAffine()) {
     // If this is an affine expression then we have this situation:
     //   Solve {0,+,A} in Range  ===  Ax in Range
 
     // We know that zero is in the range.  If A is positive then we know that
     // the upper value of the range must be the first possible exit value.
     // If A is negative then the lower of the range is the last possible loop
     // value.  Also note that we already checked for a full range.
     APInt A = cast<SCEVConstant>(getOperand(1))->getAPInt();
     APInt End = A.sge(1) ? (Range.getUpper() - 1) : Range.getLower();
 
     // The exit value should be (End+A)/A.
     APInt ExitVal = (End + A).udiv(A);
     ConstantInt *ExitValue = ConstantInt::get(SE.getContext(), ExitVal);
 
     // Evaluate at the exit value.  If we really did fall out of the valid
     // range, then we computed our trip count, otherwise wrap around or other
     // things must have happened.
     ConstantInt *Val = EvaluateConstantChrecAtConstant(this, ExitValue, SE);
     if (Range.contains(Val->getValue()))
       return SE.getCouldNotCompute();  // Something strange happened
 
     // Ensure that the previous value is in the range.  This is a sanity check.
     assert(Range.contains(
            EvaluateConstantChrecAtConstant(this,
            ConstantInt::get(SE.getContext(), ExitVal - 1), SE)->getValue()) &&
            "Linear scev computation is off in a bad way!");
     return SE.getConstant(ExitValue);
   } else if (isQuadratic()) {
     // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of the
     // quadratic equation to solve it.  To do this, we must frame our problem in
     // terms of figuring out when zero is crossed, instead of when
     // Range.getUpper() is crossed.
     SmallVector<const SCEV *, 4> NewOps(op_begin(), op_end());
     NewOps[0] = SE.getNegativeSCEV(SE.getConstant(Range.getUpper()));
     const SCEV *NewAddRec = SE.getAddRecExpr(NewOps, getLoop(), FlagAnyWrap);
 
     // Next, solve the constructed addrec
     if (auto Roots =
             SolveQuadraticEquation(cast<SCEVAddRecExpr>(NewAddRec), SE)) {
       const SCEVConstant *R1 = Roots->first;
       const SCEVConstant *R2 = Roots->second;
       // Pick the smallest positive root value.
       if (ConstantInt *CB = dyn_cast<ConstantInt>(ConstantExpr::getICmp(
               ICmpInst::ICMP_ULT, R1->getValue(), R2->getValue()))) {
         if (!CB->getZExtValue())
           std::swap(R1, R2); // R1 is the minimum root now.
 
         // Make sure the root is not off by one.  The returned iteration should
         // not be in the range, but the previous one should be.  When solving
         // for "X*X < 5", for example, we should not return a root of 2.
         ConstantInt *R1Val =
             EvaluateConstantChrecAtConstant(this, R1->getValue(), SE);
         if (Range.contains(R1Val->getValue())) {
           // The next iteration must be out of the range...
           ConstantInt *NextVal =
               ConstantInt::get(SE.getContext(), R1->getAPInt() + 1);
 
           R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
           if (!Range.contains(R1Val->getValue()))
             return SE.getConstant(NextVal);
           return SE.getCouldNotCompute(); // Something strange happened
         }
 
         // If R1 was not in the range, then it is a good return value.  Make
         // sure that R1-1 WAS in the range though, just in case.
         ConstantInt *NextVal =
             ConstantInt::get(SE.getContext(), R1->getAPInt() - 1);
         R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
         if (Range.contains(R1Val->getValue()))
           return R1;
         return SE.getCouldNotCompute(); // Something strange happened
       }
     }
   }
 
   return SE.getCouldNotCompute();
 }
 
 // Return true when S contains at least an undef value.
 static inline bool containsUndefs(const SCEV *S) {
   return SCEVExprContains(S, [](const SCEV *S) {
     if (const auto *SU = dyn_cast<SCEVUnknown>(S))
       return isa<UndefValue>(SU->getValue());
     else if (const auto *SC = dyn_cast<SCEVConstant>(S))
       return isa<UndefValue>(SC->getValue());
     return false;
   });
 }
 
 namespace {
 
 // Collect all steps of SCEV expressions.
 struct SCEVCollectStrides {
   ScalarEvolution &SE;
   SmallVectorImpl<const SCEV *> &Strides;
 
   SCEVCollectStrides(ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &S)
       : SE(SE), Strides(S) {}
 
   bool follow(const SCEV *S) {
     if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
       Strides.push_back(AR->getStepRecurrence(SE));
     return true;
   }
 
   bool isDone() const { return false; }
 };
 
 // Collect all SCEVUnknown and SCEVMulExpr expressions.
 struct SCEVCollectTerms {
   SmallVectorImpl<const SCEV *> &Terms;
 
   SCEVCollectTerms(SmallVectorImpl<const SCEV *> &T) : Terms(T) {}
 
   bool follow(const SCEV *S) {
     if (isa<SCEVUnknown>(S) || isa<SCEVMulExpr>(S) ||
         isa<SCEVSignExtendExpr>(S)) {
       if (!containsUndefs(S))
         Terms.push_back(S);
 
       // Stop recursion: once we collected a term, do not walk its operands.
       return false;
     }
 
     // Keep looking.
     return true;
   }
 
   bool isDone() const { return false; }
 };
 
 // Check if a SCEV contains an AddRecExpr.
 struct SCEVHasAddRec {
   bool &ContainsAddRec;
 
   SCEVHasAddRec(bool &ContainsAddRec) : ContainsAddRec(ContainsAddRec) {
     ContainsAddRec = false;
   }
 
   bool follow(const SCEV *S) {
     if (isa<SCEVAddRecExpr>(S)) {
       ContainsAddRec = true;
 
       // Stop recursion: once we collected a term, do not walk its operands.
       return false;
     }
 
     // Keep looking.
     return true;
   }
 
   bool isDone() const { return false; }
 };
 
 // Find factors that are multiplied with an expression that (possibly as a
 // subexpression) contains an AddRecExpr. In the expression:
 //
 //  8 * (100 +  %p * %q * (%a + {0, +, 1}_loop))
 //
 // "%p * %q" are factors multiplied by the expression "(%a + {0, +, 1}_loop)"
 // that contains the AddRec {0, +, 1}_loop. %p * %q are likely to be array size
 // parameters as they form a product with an induction variable.
 //
 // This collector expects all array size parameters to be in the same MulExpr.
 // It might be necessary to later add support for collecting parameters that are
 // spread over different nested MulExpr.
 struct SCEVCollectAddRecMultiplies {
   SmallVectorImpl<const SCEV *> &Terms;
   ScalarEvolution &SE;
 
   SCEVCollectAddRecMultiplies(SmallVectorImpl<const SCEV *> &T, ScalarEvolution &SE)
       : Terms(T), SE(SE) {}
 
   bool follow(const SCEV *S) {
     if (auto *Mul = dyn_cast<SCEVMulExpr>(S)) {
       bool HasAddRec = false;
       SmallVector<const SCEV *, 0> Operands;
       for (auto Op : Mul->operands()) {
         const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(Op);
         if (Unknown && !isa<CallInst>(Unknown->getValue())) {
           Operands.push_back(Op);
         } else if (Unknown) {
           HasAddRec = true;
         } else {
           bool ContainsAddRec;
           SCEVHasAddRec ContiansAddRec(ContainsAddRec);
           visitAll(Op, ContiansAddRec);
           HasAddRec |= ContainsAddRec;
         }
       }
       if (Operands.size() == 0)
         return true;
 
       if (!HasAddRec)
         return false;
 
       Terms.push_back(SE.getMulExpr(Operands));
       // Stop recursion: once we collected a term, do not walk its operands.
       return false;
     }
 
     // Keep looking.
     return true;
   }
 
   bool isDone() const { return false; }
 };
 
 } // end anonymous namespace
 
 /// Find parametric terms in this SCEVAddRecExpr. We first for parameters in
 /// two places:
 ///   1) The strides of AddRec expressions.
 ///   2) Unknowns that are multiplied with AddRec expressions.
 void ScalarEvolution::collectParametricTerms(const SCEV *Expr,
     SmallVectorImpl<const SCEV *> &Terms) {
   SmallVector<const SCEV *, 4> Strides;
   SCEVCollectStrides StrideCollector(*this, Strides);
   visitAll(Expr, StrideCollector);
 
   DEBUG({
       dbgs() << "Strides:\n";
       for (const SCEV *S : Strides)
         dbgs() << *S << "\n";
     });
 
   for (const SCEV *S : Strides) {
     SCEVCollectTerms TermCollector(Terms);
     visitAll(S, TermCollector);
   }
 
   DEBUG({
       dbgs() << "Terms:\n";
       for (const SCEV *T : Terms)
         dbgs() << *T << "\n";
     });
 
   SCEVCollectAddRecMultiplies MulCollector(Terms, *this);
   visitAll(Expr, MulCollector);
 }
 
 static bool findArrayDimensionsRec(ScalarEvolution &SE,
                                    SmallVectorImpl<const SCEV *> &Terms,
                                    SmallVectorImpl<const SCEV *> &Sizes) {
   int Last = Terms.size() - 1;
   const SCEV *Step = Terms[Last];
 
   // End of recursion.
   if (Last == 0) {
     if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Step)) {
       SmallVector<const SCEV *, 2> Qs;
       for (const SCEV *Op : M->operands())
         if (!isa<SCEVConstant>(Op))
           Qs.push_back(Op);
 
       Step = SE.getMulExpr(Qs);
     }
 
     Sizes.push_back(Step);
     return true;
   }
 
   for (const SCEV *&Term : Terms) {
     // Normalize the terms before the next call to findArrayDimensionsRec.
     const SCEV *Q, *R;
     SCEVDivision::divide(SE, Term, Step, &Q, &R);
 
     // Bail out when GCD does not evenly divide one of the terms.
     if (!R->isZero())
       return false;
 
     Term = Q;
   }
 
   // Remove all SCEVConstants.
   Terms.erase(
       remove_if(Terms, [](const SCEV *E) { return isa<SCEVConstant>(E); }),
       Terms.end());
 
   if (Terms.size() > 0)
     if (!findArrayDimensionsRec(SE, Terms, Sizes))
       return false;
 
   Sizes.push_back(Step);
   return true;
 }
 
 // Returns true when one of the SCEVs of Terms contains a SCEVUnknown parameter.
 static inline bool containsParameters(SmallVectorImpl<const SCEV *> &Terms) {
   for (const SCEV *T : Terms)
     if (SCEVExprContains(T, isa<SCEVUnknown, const SCEV *>))
       return true;
   return false;
 }
 
 // Return the number of product terms in S.
 static inline int numberOfTerms(const SCEV *S) {
   if (const SCEVMulExpr *Expr = dyn_cast<SCEVMulExpr>(S))
     return Expr->getNumOperands();
   return 1;
 }
 
 static const SCEV *removeConstantFactors(ScalarEvolution &SE, const SCEV *T) {
   if (isa<SCEVConstant>(T))
     return nullptr;
 
   if (isa<SCEVUnknown>(T))
     return T;
 
   if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(T)) {
     SmallVector<const SCEV *, 2> Factors;
     for (const SCEV *Op : M->operands())
       if (!isa<SCEVConstant>(Op))
         Factors.push_back(Op);
 
     return SE.getMulExpr(Factors);
   }
 
   return T;
 }
 
 /// Return the size of an element read or written by Inst.
 const SCEV *ScalarEvolution::getElementSize(Instruction *Inst) {
   Type *Ty;
   if (StoreInst *Store = dyn_cast<StoreInst>(Inst))
     Ty = Store->getValueOperand()->getType();
   else if (LoadInst *Load = dyn_cast<LoadInst>(Inst))
     Ty = Load->getType();
   else
     return nullptr;
 
   Type *ETy = getEffectiveSCEVType(PointerType::getUnqual(Ty));
   return getSizeOfExpr(ETy, Ty);
 }
 
 void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms,
                                           SmallVectorImpl<const SCEV *> &Sizes,
                                           const SCEV *ElementSize) {
   if (Terms.size() < 1 || !ElementSize)
     return;
 
   // Early return when Terms do not contain parameters: we do not delinearize
   // non parametric SCEVs.
   if (!containsParameters(Terms))
     return;
 
   DEBUG({
       dbgs() << "Terms:\n";
       for (const SCEV *T : Terms)
         dbgs() << *T << "\n";
     });
 
   // Remove duplicates.
   array_pod_sort(Terms.begin(), Terms.end());
   Terms.erase(std::unique(Terms.begin(), Terms.end()), Terms.end());
 
   // Put larger terms first.
   std::sort(Terms.begin(), Terms.end(), [](const SCEV *LHS, const SCEV *RHS) {
     return numberOfTerms(LHS) > numberOfTerms(RHS);
   });
 
   // Try to divide all terms by the element size. If term is not divisible by
   // element size, proceed with the original term.
   for (const SCEV *&Term : Terms) {
     const SCEV *Q, *R;
     SCEVDivision::divide(*this, Term, ElementSize, &Q, &R);
     if (!Q->isZero())
       Term = Q;
   }
 
   SmallVector<const SCEV *, 4> NewTerms;
 
   // Remove constant factors.
   for (const SCEV *T : Terms)
     if (const SCEV *NewT = removeConstantFactors(*this, T))
       NewTerms.push_back(NewT);
 
   DEBUG({
       dbgs() << "Terms after sorting:\n";
       for (const SCEV *T : NewTerms)
         dbgs() << *T << "\n";
     });
 
   if (NewTerms.empty() || !findArrayDimensionsRec(*this, NewTerms, Sizes)) {
     Sizes.clear();
     return;
   }
 
   // The last element to be pushed into Sizes is the size of an element.
   Sizes.push_back(ElementSize);
 
   DEBUG({
       dbgs() << "Sizes:\n";
       for (const SCEV *S : Sizes)
         dbgs() << *S << "\n";
     });
 }
 
 void ScalarEvolution::computeAccessFunctions(
     const SCEV *Expr, SmallVectorImpl<const SCEV *> &Subscripts,
     SmallVectorImpl<const SCEV *> &Sizes) {
   // Early exit in case this SCEV is not an affine multivariate function.
   if (Sizes.empty())
     return;
 
   if (auto *AR = dyn_cast<SCEVAddRecExpr>(Expr))
     if (!AR->isAffine())
       return;
 
   const SCEV *Res = Expr;
   int Last = Sizes.size() - 1;
   for (int i = Last; i >= 0; i--) {
     const SCEV *Q, *R;
     SCEVDivision::divide(*this, Res, Sizes[i], &Q, &R);
 
     DEBUG({
         dbgs() << "Res: " << *Res << "\n";
         dbgs() << "Sizes[i]: " << *Sizes[i] << "\n";
         dbgs() << "Res divided by Sizes[i]:\n";
         dbgs() << "Quotient: " << *Q << "\n";
         dbgs() << "Remainder: " << *R << "\n";
       });
 
     Res = Q;
 
     // Do not record the last subscript corresponding to the size of elements in
     // the array.
     if (i == Last) {
 
       // Bail out if the remainder is too complex.
       if (isa<SCEVAddRecExpr>(R)) {
         Subscripts.clear();
         Sizes.clear();
         return;
       }
 
       continue;
     }
 
     // Record the access function for the current subscript.
     Subscripts.push_back(R);
   }
 
   // Also push in last position the remainder of the last division: it will be
   // the access function of the innermost dimension.
   Subscripts.push_back(Res);
 
   std::reverse(Subscripts.begin(), Subscripts.end());
 
   DEBUG({
       dbgs() << "Subscripts:\n";
       for (const SCEV *S : Subscripts)
         dbgs() << *S << "\n";
     });
 }
 
 /// Splits the SCEV into two vectors of SCEVs representing the subscripts and
 /// sizes of an array access. Returns the remainder of the delinearization that
 /// is the offset start of the array.  The SCEV->delinearize algorithm computes
 /// the multiples of SCEV coefficients: that is a pattern matching of sub
 /// expressions in the stride and base of a SCEV corresponding to the
 /// computation of a GCD (greatest common divisor) of base and stride.  When
 /// SCEV->delinearize fails, it returns the SCEV unchanged.
 ///
 /// For example: when analyzing the memory access A[i][j][k] in this loop nest
 ///
 ///  void foo(long n, long m, long o, double A[n][m][o]) {
 ///
 ///    for (long i = 0; i < n; i++)
 ///      for (long j = 0; j < m; j++)
 ///        for (long k = 0; k < o; k++)
 ///          A[i][j][k] = 1.0;
 ///  }
 ///
 /// the delinearization input is the following AddRec SCEV:
 ///
 ///  AddRec: {{{%A,+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
 ///
 /// From this SCEV, we are able to say that the base offset of the access is %A
 /// because it appears as an offset that does not divide any of the strides in
 /// the loops:
 ///
 ///  CHECK: Base offset: %A
 ///
 /// and then SCEV->delinearize determines the size of some of the dimensions of
 /// the array as these are the multiples by which the strides are happening:
 ///
 ///  CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes.
 ///
 /// Note that the outermost dimension remains of UnknownSize because there are
 /// no strides that would help identifying the size of the last dimension: when
 /// the array has been statically allocated, one could compute the size of that
 /// dimension by dividing the overall size of the array by the size of the known
 /// dimensions: %m * %o * 8.
 ///
 /// Finally delinearize provides the access functions for the array reference
 /// that does correspond to A[i][j][k] of the above C testcase:
 ///
 ///  CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>]
 ///
 /// The testcases are checking the output of a function pass:
 /// DelinearizationPass that walks through all loads and stores of a function
 /// asking for the SCEV of the memory access with respect to all enclosing
 /// loops, calling SCEV->delinearize on that and printing the results.
 void ScalarEvolution::delinearize(const SCEV *Expr,
                                  SmallVectorImpl<const SCEV *> &Subscripts,
                                  SmallVectorImpl<const SCEV *> &Sizes,
                                  const SCEV *ElementSize) {
   // First step: collect parametric terms.
   SmallVector<const SCEV *, 4> Terms;
   collectParametricTerms(Expr, Terms);
 
   if (Terms.empty())
     return;
 
   // Second step: find subscript sizes.
   findArrayDimensions(Terms, Sizes, ElementSize);
 
   if (Sizes.empty())
     return;
 
   // Third step: compute the access functions for each subscript.
   computeAccessFunctions(Expr, Subscripts, Sizes);
 
   if (Subscripts.empty())
     return;
 
   DEBUG({
       dbgs() << "succeeded to delinearize " << *Expr << "\n";
       dbgs() << "ArrayDecl[UnknownSize]";
       for (const SCEV *S : Sizes)
         dbgs() << "[" << *S << "]";
 
       dbgs() << "\nArrayRef";
       for (const SCEV *S : Subscripts)
         dbgs() << "[" << *S << "]";
       dbgs() << "\n";
     });
 }
 
 //===----------------------------------------------------------------------===//
 //                   SCEVCallbackVH Class Implementation
 //===----------------------------------------------------------------------===//
 
 void ScalarEvolution::SCEVCallbackVH::deleted() {
   assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!");
   if (PHINode *PN = dyn_cast<PHINode>(getValPtr()))
     SE->ConstantEvolutionLoopExitValue.erase(PN);
   SE->eraseValueFromMap(getValPtr());
   // this now dangles!
 }
 
 void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) {
   assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!");
 
   // Forget all the expressions associated with users of the old value,
   // so that future queries will recompute the expressions using the new
   // value.
   Value *Old = getValPtr();
   SmallVector<User *, 16> Worklist(Old->user_begin(), Old->user_end());
   SmallPtrSet<User *, 8> Visited;
   while (!Worklist.empty()) {
     User *U = Worklist.pop_back_val();
     // Deleting the Old value will cause this to dangle. Postpone
     // that until everything else is done.
     if (U == Old)
       continue;
     if (!Visited.insert(U).second)
       continue;
     if (PHINode *PN = dyn_cast<PHINode>(U))
       SE->ConstantEvolutionLoopExitValue.erase(PN);
     SE->eraseValueFromMap(U);
     Worklist.insert(Worklist.end(), U->user_begin(), U->user_end());
   }
   // Delete the Old value.
   if (PHINode *PN = dyn_cast<PHINode>(Old))
     SE->ConstantEvolutionLoopExitValue.erase(PN);
   SE->eraseValueFromMap(Old);
   // this now dangles!
 }
 
 ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se)
   : CallbackVH(V), SE(se) {}
 
 //===----------------------------------------------------------------------===//
 //                   ScalarEvolution Class Implementation
 //===----------------------------------------------------------------------===//
 
 ScalarEvolution::ScalarEvolution(Function &F, TargetLibraryInfo &TLI,
                                  AssumptionCache &AC, DominatorTree &DT,
                                  LoopInfo &LI)
     : F(F), TLI(TLI), AC(AC), DT(DT), LI(LI),
       CouldNotCompute(new SCEVCouldNotCompute()), ValuesAtScopes(64),
       LoopDispositions(64), BlockDispositions(64) {
   // To use guards for proving predicates, we need to scan every instruction in
   // relevant basic blocks, and not just terminators.  Doing this is a waste of
   // time if the IR does not actually contain any calls to
   // @llvm.experimental.guard, so do a quick check and remember this beforehand.
   //
   // This pessimizes the case where a pass that preserves ScalarEvolution wants
   // to _add_ guards to the module when there weren't any before, and wants
   // ScalarEvolution to optimize based on those guards.  For now we prefer to be
   // efficient in lieu of being smart in that rather obscure case.
 
   auto *GuardDecl = F.getParent()->getFunction(
       Intrinsic::getName(Intrinsic::experimental_guard));
   HasGuards = GuardDecl && !GuardDecl->use_empty();
 }
 
 ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg)
     : F(Arg.F), HasGuards(Arg.HasGuards), TLI(Arg.TLI), AC(Arg.AC), DT(Arg.DT),
       LI(Arg.LI), CouldNotCompute(std::move(Arg.CouldNotCompute)),
       ValueExprMap(std::move(Arg.ValueExprMap)),
       PendingLoopPredicates(std::move(Arg.PendingLoopPredicates)),
       MinTrailingZerosCache(std::move(Arg.MinTrailingZerosCache)),
       BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)),
       PredicatedBackedgeTakenCounts(
           std::move(Arg.PredicatedBackedgeTakenCounts)),
       ConstantEvolutionLoopExitValue(
           std::move(Arg.ConstantEvolutionLoopExitValue)),
       ValuesAtScopes(std::move(Arg.ValuesAtScopes)),
       LoopDispositions(std::move(Arg.LoopDispositions)),
       LoopPropertiesCache(std::move(Arg.LoopPropertiesCache)),
       BlockDispositions(std::move(Arg.BlockDispositions)),
       UnsignedRanges(std::move(Arg.UnsignedRanges)),
       SignedRanges(std::move(Arg.SignedRanges)),
       UniqueSCEVs(std::move(Arg.UniqueSCEVs)),
       UniquePreds(std::move(Arg.UniquePreds)),
       SCEVAllocator(std::move(Arg.SCEVAllocator)),
       LoopUsers(std::move(Arg.LoopUsers)),
       PredicatedSCEVRewrites(std::move(Arg.PredicatedSCEVRewrites)),
       FirstUnknown(Arg.FirstUnknown) {
   Arg.FirstUnknown = nullptr;
 }
 
 ScalarEvolution::~ScalarEvolution() {
   // Iterate through all the SCEVUnknown instances and call their
   // destructors, so that they release their references to their values.
   for (SCEVUnknown *U = FirstUnknown; U;) {
     SCEVUnknown *Tmp = U;
     U = U->Next;
     Tmp->~SCEVUnknown();
   }
   FirstUnknown = nullptr;
 
   ExprValueMap.clear();
   ValueExprMap.clear();
   HasRecMap.clear();
 
   // Free any extra memory created for ExitNotTakenInfo in the unlikely event
   // that a loop had multiple computable exits.
   for (auto &BTCI : BackedgeTakenCounts)
     BTCI.second.clear();
   for (auto &BTCI : PredicatedBackedgeTakenCounts)
     BTCI.second.clear();
 
   assert(PendingLoopPredicates.empty() && "isImpliedCond garbage");
   assert(!WalkingBEDominatingConds && "isLoopBackedgeGuardedByCond garbage!");
   assert(!ProvingSplitPredicate && "ProvingSplitPredicate garbage!");
 }
 
 bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) {
   return !isa<SCEVCouldNotCompute>(getBackedgeTakenCount(L));
 }
 
 static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
                           const Loop *L) {
   // Print all inner loops first
   for (Loop *I : *L)
     PrintLoopInfo(OS, SE, I);
 
   OS << "Loop ";
   L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
   OS << ": ";
 
   SmallVector<BasicBlock *, 8> ExitBlocks;
   L->getExitBlocks(ExitBlocks);
   if (ExitBlocks.size() != 1)
     OS << "<multiple exits> ";
 
   if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
     OS << "backedge-taken count is " << *SE->getBackedgeTakenCount(L);
   } else {
     OS << "Unpredictable backedge-taken count. ";
   }
 
   OS << "\n"
         "Loop ";
   L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
   OS << ": ";
 
   if (!isa<SCEVCouldNotCompute>(SE->getMaxBackedgeTakenCount(L))) {
     OS << "max backedge-taken count is " << *SE->getMaxBackedgeTakenCount(L);
     if (SE->isBackedgeTakenCountMaxOrZero(L))
       OS << ", actual taken count either this or zero.";
   } else {
     OS << "Unpredictable max backedge-taken count. ";
   }
 
   OS << "\n"
         "Loop ";
   L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
   OS << ": ";
 
   SCEVUnionPredicate Pred;
   auto PBT = SE->getPredicatedBackedgeTakenCount(L, Pred);
   if (!isa<SCEVCouldNotCompute>(PBT)) {
     OS << "Predicated backedge-taken count is " << *PBT << "\n";
     OS << " Predicates:\n";
     Pred.print(OS, 4);
   } else {
     OS << "Unpredictable predicated backedge-taken count. ";
   }
   OS << "\n";
 
   if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
     OS << "Loop ";
     L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
     OS << ": ";
     OS << "Trip multiple is " << SE->getSmallConstantTripMultiple(L) << "\n";
   }
 }
 
 static StringRef loopDispositionToStr(ScalarEvolution::LoopDisposition LD) {
   switch (LD) {
   case ScalarEvolution::LoopVariant:
     return "Variant";
   case ScalarEvolution::LoopInvariant:
     return "Invariant";
   case ScalarEvolution::LoopComputable:
     return "Computable";
   }
   llvm_unreachable("Unknown ScalarEvolution::LoopDisposition kind!");
 }
 
 void ScalarEvolution::print(raw_ostream &OS) const {
   // ScalarEvolution's implementation of the print method is to print
   // out SCEV values of all instructions that are interesting. Doing
   // this potentially causes it to create new SCEV objects though,
   // which technically conflicts with the const qualifier. This isn't
   // observable from outside the class though, so casting away the
   // const isn't dangerous.
   ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
 
   OS << "Classifying expressions for: ";
   F.printAsOperand(OS, /*PrintType=*/false);
   OS << "\n";
   for (Instruction &I : instructions(F))
     if (isSCEVable(I.getType()) && !isa<CmpInst>(I)) {
       OS << I << '\n';
       OS << "  -->  ";
       const SCEV *SV = SE.getSCEV(&I);
       SV->print(OS);
       if (!isa<SCEVCouldNotCompute>(SV)) {
         OS << " U: ";
         SE.getUnsignedRange(SV).print(OS);
         OS << " S: ";
         SE.getSignedRange(SV).print(OS);
       }
 
       const Loop *L = LI.getLoopFor(I.getParent());
 
       const SCEV *AtUse = SE.getSCEVAtScope(SV, L);
       if (AtUse != SV) {
         OS << "  -->  ";
         AtUse->print(OS);
         if (!isa<SCEVCouldNotCompute>(AtUse)) {
           OS << " U: ";
           SE.getUnsignedRange(AtUse).print(OS);
           OS << " S: ";
           SE.getSignedRange(AtUse).print(OS);
         }
       }
 
       if (L) {
         OS << "\t\t" "Exits: ";
         const SCEV *ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop());
         if (!SE.isLoopInvariant(ExitValue, L)) {
           OS << "<<Unknown>>";
         } else {
           OS << *ExitValue;
         }
 
         bool First = true;
         for (auto *Iter = L; Iter; Iter = Iter->getParentLoop()) {
           if (First) {
             OS << "\t\t" "LoopDispositions: { ";
             First = false;
           } else {
             OS << ", ";
           }
 
           Iter->getHeader()->printAsOperand(OS, /*PrintType=*/false);
           OS << ": " << loopDispositionToStr(SE.getLoopDisposition(SV, Iter));
         }
 
         for (auto *InnerL : depth_first(L)) {
           if (InnerL == L)
             continue;
           if (First) {
             OS << "\t\t" "LoopDispositions: { ";
             First = false;
           } else {
             OS << ", ";
           }
 
           InnerL->getHeader()->printAsOperand(OS, /*PrintType=*/false);
           OS << ": " << loopDispositionToStr(SE.getLoopDisposition(SV, InnerL));
         }
 
         OS << " }";
       }
 
       OS << "\n";
     }
 
   OS << "Determining loop execution counts for: ";
   F.printAsOperand(OS, /*PrintType=*/false);
   OS << "\n";
   for (Loop *I : LI)
     PrintLoopInfo(OS, &SE, I);
 }
 
 ScalarEvolution::LoopDisposition
 ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) {
   auto &Values = LoopDispositions[S];
   for (auto &V : Values) {
     if (V.getPointer() == L)
       return V.getInt();
   }
   Values.emplace_back(L, LoopVariant);
   LoopDisposition D = computeLoopDisposition(S, L);
   auto &Values2 = LoopDispositions[S];
   for (auto &V : make_range(Values2.rbegin(), Values2.rend())) {
     if (V.getPointer() == L) {
       V.setInt(D);
       break;
     }
   }
   return D;
 }
 
 ScalarEvolution::LoopDisposition
 ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
   switch (static_cast<SCEVTypes>(S->getSCEVType())) {
   case scConstant:
     return LoopInvariant;
   case scTruncate:
   case scZeroExtend:
   case scSignExtend:
     return getLoopDisposition(cast<SCEVCastExpr>(S)->getOperand(), L);
   case scAddRecExpr: {
     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S);
 
     // If L is the addrec's loop, it's computable.
     if (AR->getLoop() == L)
       return LoopComputable;
 
     // Add recurrences are never invariant in the function-body (null loop).
     if (!L)
       return LoopVariant;
 
     // Everything that is not defined at loop entry is variant.
     if (DT.dominates(L->getHeader(), AR->getLoop()->getHeader()))
       return LoopVariant;
     assert(!L->contains(AR->getLoop()) && "Containing loop's header does not"
            " dominate the contained loop's header?");
 
     // This recurrence is invariant w.r.t. L if AR's loop contains L.
     if (AR->getLoop()->contains(L))
       return LoopInvariant;
 
     // This recurrence is variant w.r.t. L if any of its operands
     // are variant.
     for (auto *Op : AR->operands())
       if (!isLoopInvariant(Op, L))
         return LoopVariant;
 
     // Otherwise it's loop-invariant.
     return LoopInvariant;
   }
   case scAddExpr:
   case scMulExpr:
   case scUMaxExpr:
   case scSMaxExpr: {
     bool HasVarying = false;
     for (auto *Op : cast<SCEVNAryExpr>(S)->operands()) {
       LoopDisposition D = getLoopDisposition(Op, L);
       if (D == LoopVariant)
         return LoopVariant;
       if (D == LoopComputable)
         HasVarying = true;
     }
     return HasVarying ? LoopComputable : LoopInvariant;
   }
   case scUDivExpr: {
     const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
     LoopDisposition LD = getLoopDisposition(UDiv->getLHS(), L);
     if (LD == LoopVariant)
       return LoopVariant;
     LoopDisposition RD = getLoopDisposition(UDiv->getRHS(), L);
     if (RD == LoopVariant)
       return LoopVariant;
     return (LD == LoopInvariant && RD == LoopInvariant) ?
            LoopInvariant : LoopComputable;
   }
   case scUnknown:
     // All non-instruction values are loop invariant.  All instructions are loop
     // invariant if they are not contained in the specified loop.
     // Instructions are never considered invariant in the function body
     // (null loop) because they are defined within the "loop".
     if (auto *I = dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue()))
       return (L && !L->contains(I)) ? LoopInvariant : LoopVariant;
     return LoopInvariant;
   case scCouldNotCompute:
     llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
   }
   llvm_unreachable("Unknown SCEV kind!");
 }
 
 bool ScalarEvolution::isLoopInvariant(const SCEV *S, const Loop *L) {
   return getLoopDisposition(S, L) == LoopInvariant;
 }
 
 bool ScalarEvolution::hasComputableLoopEvolution(const SCEV *S, const Loop *L) {
   return getLoopDisposition(S, L) == LoopComputable;
 }
 
 ScalarEvolution::BlockDisposition
 ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) {
   auto &Values = BlockDispositions[S];
   for (auto &V : Values) {
     if (V.getPointer() == BB)
       return V.getInt();
   }
   Values.emplace_back(BB, DoesNotDominateBlock);
   BlockDisposition D = computeBlockDisposition(S, BB);
   auto &Values2 = BlockDispositions[S];
   for (auto &V : make_range(Values2.rbegin(), Values2.rend())) {
     if (V.getPointer() == BB) {
       V.setInt(D);
       break;
     }
   }
   return D;
 }
 
 ScalarEvolution::BlockDisposition
 ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
   switch (static_cast<SCEVTypes>(S->getSCEVType())) {
   case scConstant:
     return ProperlyDominatesBlock;
   case scTruncate:
   case scZeroExtend:
   case scSignExtend:
     return getBlockDisposition(cast<SCEVCastExpr>(S)->getOperand(), BB);
   case scAddRecExpr: {
     // This uses a "dominates" query instead of "properly dominates" query
     // to test for proper dominance too, because the instruction which
     // produces the addrec's value is a PHI, and a PHI effectively properly
     // dominates its entire containing block.
     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S);
     if (!DT.dominates(AR->getLoop()->getHeader(), BB))
       return DoesNotDominateBlock;
 
     // Fall through into SCEVNAryExpr handling.
     LLVM_FALLTHROUGH;
   }
   case scAddExpr:
   case scMulExpr:
   case scUMaxExpr:
   case scSMaxExpr: {
     const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
     bool Proper = true;
     for (const SCEV *NAryOp : NAry->operands()) {
       BlockDisposition D = getBlockDisposition(NAryOp, BB);
       if (D == DoesNotDominateBlock)
         return DoesNotDominateBlock;
       if (D == DominatesBlock)
         Proper = false;
     }
     return Proper ? ProperlyDominatesBlock : DominatesBlock;
   }
   case scUDivExpr: {
     const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
     const SCEV *LHS = UDiv->getLHS(), *RHS = UDiv->getRHS();
     BlockDisposition LD = getBlockDisposition(LHS, BB);
     if (LD == DoesNotDominateBlock)
       return DoesNotDominateBlock;
     BlockDisposition RD = getBlockDisposition(RHS, BB);
     if (RD == DoesNotDominateBlock)
       return DoesNotDominateBlock;
     return (LD == ProperlyDominatesBlock && RD == ProperlyDominatesBlock) ?
       ProperlyDominatesBlock : DominatesBlock;
   }
   case scUnknown:
     if (Instruction *I =
           dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue())) {
       if (I->getParent() == BB)
         return DominatesBlock;
       if (DT.properlyDominates(I->getParent(), BB))
         return ProperlyDominatesBlock;
       return DoesNotDominateBlock;
     }
     return ProperlyDominatesBlock;
   case scCouldNotCompute:
     llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
   }
   llvm_unreachable("Unknown SCEV kind!");
 }
 
 bool ScalarEvolution::dominates(const SCEV *S, const BasicBlock *BB) {
   return getBlockDisposition(S, BB) >= DominatesBlock;
 }
 
 bool ScalarEvolution::properlyDominates(const SCEV *S, const BasicBlock *BB) {
   return getBlockDisposition(S, BB) == ProperlyDominatesBlock;
 }
 
 bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const {
   return SCEVExprContains(S, [&](const SCEV *Expr) { return Expr == Op; });
 }
 
 bool ScalarEvolution::ExitLimit::hasOperand(const SCEV *S) const {
   auto IsS = [&](const SCEV *X) { return S == X; };
   auto ContainsS = [&](const SCEV *X) {
     return !isa<SCEVCouldNotCompute>(X) && SCEVExprContains(X, IsS);
   };
   return ContainsS(ExactNotTaken) || ContainsS(MaxNotTaken);
 }
 
 void
 ScalarEvolution::forgetMemoizedResults(const SCEV *S) {
   ValuesAtScopes.erase(S);
   LoopDispositions.erase(S);
   BlockDispositions.erase(S);
   UnsignedRanges.erase(S);
   SignedRanges.erase(S);
   ExprValueMap.erase(S);
   HasRecMap.erase(S);
   MinTrailingZerosCache.erase(S);
 
   for (auto I = PredicatedSCEVRewrites.begin();
        I != PredicatedSCEVRewrites.end();) {
     std::pair<const SCEV *, const Loop *> Entry = I->first;
     if (Entry.first == S)
       PredicatedSCEVRewrites.erase(I++);
     else
       ++I;
   }
 
   auto RemoveSCEVFromBackedgeMap =
       [S, this](DenseMap<const Loop *, BackedgeTakenInfo> &Map) {
         for (auto I = Map.begin(), E = Map.end(); I != E;) {
           BackedgeTakenInfo &BEInfo = I->second;
           if (BEInfo.hasOperand(S, this)) {
             BEInfo.clear();
             Map.erase(I++);
           } else
             ++I;
         }
       };
 
   RemoveSCEVFromBackedgeMap(BackedgeTakenCounts);
   RemoveSCEVFromBackedgeMap(PredicatedBackedgeTakenCounts);
 }
 
 void ScalarEvolution::addToLoopUseLists(const SCEV *S) {
   struct FindUsedLoops {
     SmallPtrSet<const Loop *, 8> LoopsUsed;
     bool follow(const SCEV *S) {
       if (auto *AR = dyn_cast<SCEVAddRecExpr>(S))
         LoopsUsed.insert(AR->getLoop());
       return true;
     }
 
     bool isDone() const { return false; }
   };
 
   FindUsedLoops F;
   SCEVTraversal<FindUsedLoops>(F).visitAll(S);
 
   for (auto *L : F.LoopsUsed)
     LoopUsers[L].push_back(S);
 }
 
 void ScalarEvolution::verify() const {
   ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
   ScalarEvolution SE2(F, TLI, AC, DT, LI);
 
   SmallVector<Loop *, 8> LoopStack(LI.begin(), LI.end());
 
   // Map's SCEV expressions from one ScalarEvolution "universe" to another.
   struct SCEVMapper : public SCEVRewriteVisitor<SCEVMapper> {
     SCEVMapper(ScalarEvolution &SE) : SCEVRewriteVisitor<SCEVMapper>(SE) {}
 
     const SCEV *visitConstant(const SCEVConstant *Constant) {
       return SE.getConstant(Constant->getAPInt());
     }
 
     const SCEV *visitUnknown(const SCEVUnknown *Expr) {
       return SE.getUnknown(Expr->getValue());
     }
 
     const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) {
       return SE.getCouldNotCompute();
     }
   };
 
   SCEVMapper SCM(SE2);
 
   while (!LoopStack.empty()) {
     auto *L = LoopStack.pop_back_val();
     LoopStack.insert(LoopStack.end(), L->begin(), L->end());
 
     auto *CurBECount = SCM.visit(
         const_cast<ScalarEvolution *>(this)->getBackedgeTakenCount(L));
     auto *NewBECount = SE2.getBackedgeTakenCount(L);
 
     if (CurBECount == SE2.getCouldNotCompute() ||
         NewBECount == SE2.getCouldNotCompute()) {
       // NB! This situation is legal, but is very suspicious -- whatever pass
       // change the loop to make a trip count go from could not compute to
       // computable or vice-versa *should have* invalidated SCEV.  However, we
       // choose not to assert here (for now) since we don't want false
       // positives.
       continue;
     }
 
     if (containsUndefs(CurBECount) || containsUndefs(NewBECount)) {
       // SCEV treats "undef" as an unknown but consistent value (i.e. it does
       // not propagate undef aggressively).  This means we can (and do) fail
       // verification in cases where a transform makes the trip count of a loop
       // go from "undef" to "undef+1" (say).  The transform is fine, since in
       // both cases the loop iterates "undef" times, but SCEV thinks we
       // increased the trip count of the loop by 1 incorrectly.
       continue;
     }
 
     if (SE.getTypeSizeInBits(CurBECount->getType()) >
         SE.getTypeSizeInBits(NewBECount->getType()))
       NewBECount = SE2.getZeroExtendExpr(NewBECount, CurBECount->getType());
     else if (SE.getTypeSizeInBits(CurBECount->getType()) <
              SE.getTypeSizeInBits(NewBECount->getType()))
       CurBECount = SE2.getZeroExtendExpr(CurBECount, NewBECount->getType());
 
     auto *ConstantDelta =
         dyn_cast<SCEVConstant>(SE2.getMinusSCEV(CurBECount, NewBECount));
 
     if (ConstantDelta && ConstantDelta->getAPInt() != 0) {
       dbgs() << "Trip Count Changed!\n";
       dbgs() << "Old: " << *CurBECount << "\n";
       dbgs() << "New: " << *NewBECount << "\n";
       dbgs() << "Delta: " << *ConstantDelta << "\n";
       std::abort();
     }
   }
 }
 
 bool ScalarEvolution::invalidate(
     Function &F, const PreservedAnalyses &PA,
     FunctionAnalysisManager::Invalidator &Inv) {
   // Invalidate the ScalarEvolution object whenever it isn't preserved or one
   // of its dependencies is invalidated.
   auto PAC = PA.getChecker<ScalarEvolutionAnalysis>();
   return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>()) ||
          Inv.invalidate<AssumptionAnalysis>(F, PA) ||
          Inv.invalidate<DominatorTreeAnalysis>(F, PA) ||
          Inv.invalidate<LoopAnalysis>(F, PA);
 }
 
 AnalysisKey ScalarEvolutionAnalysis::Key;
 
 ScalarEvolution ScalarEvolutionAnalysis::run(Function &F,
                                              FunctionAnalysisManager &AM) {
   return ScalarEvolution(F, AM.getResult<TargetLibraryAnalysis>(F),
                          AM.getResult<AssumptionAnalysis>(F),
                          AM.getResult<DominatorTreeAnalysis>(F),
                          AM.getResult<LoopAnalysis>(F));
 }
 
 PreservedAnalyses
 ScalarEvolutionPrinterPass::run(Function &F, FunctionAnalysisManager &AM) {
   AM.getResult<ScalarEvolutionAnalysis>(F).print(OS);
   return PreservedAnalyses::all();
 }
 
 INITIALIZE_PASS_BEGIN(ScalarEvolutionWrapperPass, "scalar-evolution",
                       "Scalar Evolution Analysis", false, true)
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_END(ScalarEvolutionWrapperPass, "scalar-evolution",
                     "Scalar Evolution Analysis", false, true)
 
 char ScalarEvolutionWrapperPass::ID = 0;
 
 ScalarEvolutionWrapperPass::ScalarEvolutionWrapperPass() : FunctionPass(ID) {
   initializeScalarEvolutionWrapperPassPass(*PassRegistry::getPassRegistry());
 }
 
 bool ScalarEvolutionWrapperPass::runOnFunction(Function &F) {
   SE.reset(new ScalarEvolution(
       F, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
       getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F),
       getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
       getAnalysis<LoopInfoWrapperPass>().getLoopInfo()));
   return false;
 }
 
 void ScalarEvolutionWrapperPass::releaseMemory() { SE.reset(); }
 
 void ScalarEvolutionWrapperPass::print(raw_ostream &OS, const Module *) const {
   SE->print(OS);
 }
 
 void ScalarEvolutionWrapperPass::verifyAnalysis() const {
   if (!VerifySCEV)
     return;
 
   SE->verify();
 }
 
 void ScalarEvolutionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
   AU.addRequiredTransitive<AssumptionCacheTracker>();
   AU.addRequiredTransitive<LoopInfoWrapperPass>();
   AU.addRequiredTransitive<DominatorTreeWrapperPass>();
   AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>();
 }
 
 const SCEVPredicate *ScalarEvolution::getEqualPredicate(const SCEV *LHS,
                                                         const SCEV *RHS) {
   FoldingSetNodeID ID;
   assert(LHS->getType() == RHS->getType() &&
          "Type mismatch between LHS and RHS");
   // Unique this node based on the arguments
   ID.AddInteger(SCEVPredicate::P_Equal);
   ID.AddPointer(LHS);
   ID.AddPointer(RHS);
   void *IP = nullptr;
   if (const auto *S = UniquePreds.FindNodeOrInsertPos(ID, IP))
     return S;
   SCEVEqualPredicate *Eq = new (SCEVAllocator)
       SCEVEqualPredicate(ID.Intern(SCEVAllocator), LHS, RHS);
   UniquePreds.InsertNode(Eq, IP);
   return Eq;
 }
 
 const SCEVPredicate *ScalarEvolution::getWrapPredicate(
     const SCEVAddRecExpr *AR,
     SCEVWrapPredicate::IncrementWrapFlags AddedFlags) {
   FoldingSetNodeID ID;
   // Unique this node based on the arguments
   ID.AddInteger(SCEVPredicate::P_Wrap);
   ID.AddPointer(AR);
   ID.AddInteger(AddedFlags);
   void *IP = nullptr;
   if (const auto *S = UniquePreds.FindNodeOrInsertPos(ID, IP))
     return S;
   auto *OF = new (SCEVAllocator)
       SCEVWrapPredicate(ID.Intern(SCEVAllocator), AR, AddedFlags);
   UniquePreds.InsertNode(OF, IP);
   return OF;
 }
 
 namespace {
 
 class SCEVPredicateRewriter : public SCEVRewriteVisitor<SCEVPredicateRewriter> {
 public:
 
   /// Rewrites \p S in the context of a loop L and the SCEV predication
   /// infrastructure.
   ///
   /// If \p Pred is non-null, the SCEV expression is rewritten to respect the
   /// equivalences present in \p Pred.
   ///
   /// If \p NewPreds is non-null, rewrite is free to add further predicates to
   /// \p NewPreds such that the result will be an AddRecExpr.
   static const SCEV *rewrite(const SCEV *S, const Loop *L, ScalarEvolution &SE,
                              SmallPtrSetImpl<const SCEVPredicate *> *NewPreds,
                              SCEVUnionPredicate *Pred) {
     SCEVPredicateRewriter Rewriter(L, SE, NewPreds, Pred);
     return Rewriter.visit(S);
   }
 
   const SCEV *visitUnknown(const SCEVUnknown *Expr) {
     if (Pred) {
       auto ExprPreds = Pred->getPredicatesForExpr(Expr);
       for (auto *Pred : ExprPreds)
         if (const auto *IPred = dyn_cast<SCEVEqualPredicate>(Pred))
           if (IPred->getLHS() == Expr)
             return IPred->getRHS();
     }
     return convertToAddRecWithPreds(Expr);
   }
 
   const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
     const SCEV *Operand = visit(Expr->getOperand());
     const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Operand);
     if (AR && AR->getLoop() == L && AR->isAffine()) {
       // This couldn't be folded because the operand didn't have the nuw
       // flag. Add the nusw flag as an assumption that we could make.
       const SCEV *Step = AR->getStepRecurrence(SE);
       Type *Ty = Expr->getType();
       if (addOverflowAssumption(AR, SCEVWrapPredicate::IncrementNUSW))
         return SE.getAddRecExpr(SE.getZeroExtendExpr(AR->getStart(), Ty),
                                 SE.getSignExtendExpr(Step, Ty), L,
                                 AR->getNoWrapFlags());
     }
     return SE.getZeroExtendExpr(Operand, Expr->getType());
   }
 
   const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
     const SCEV *Operand = visit(Expr->getOperand());
     const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Operand);
     if (AR && AR->getLoop() == L && AR->isAffine()) {
       // This couldn't be folded because the operand didn't have the nsw
       // flag. Add the nssw flag as an assumption that we could make.
       const SCEV *Step = AR->getStepRecurrence(SE);
       Type *Ty = Expr->getType();
       if (addOverflowAssumption(AR, SCEVWrapPredicate::IncrementNSSW))
         return SE.getAddRecExpr(SE.getSignExtendExpr(AR->getStart(), Ty),
                                 SE.getSignExtendExpr(Step, Ty), L,
                                 AR->getNoWrapFlags());
     }
     return SE.getSignExtendExpr(Operand, Expr->getType());
   }
 
 private:
   explicit SCEVPredicateRewriter(const Loop *L, ScalarEvolution &SE,
                         SmallPtrSetImpl<const SCEVPredicate *> *NewPreds,
                         SCEVUnionPredicate *Pred)
       : SCEVRewriteVisitor(SE), NewPreds(NewPreds), Pred(Pred), L(L) {}
 
   bool addOverflowAssumption(const SCEVPredicate *P) {
     if (!NewPreds) {
       // Check if we've already made this assumption.
       return Pred && Pred->implies(P);
     }
     NewPreds->insert(P);
     return true;
   }
 
   bool addOverflowAssumption(const SCEVAddRecExpr *AR,
                              SCEVWrapPredicate::IncrementWrapFlags AddedFlags) {
     auto *A = SE.getWrapPredicate(AR, AddedFlags);
     return addOverflowAssumption(A);
   }
 
   // If \p Expr represents a PHINode, we try to see if it can be represented
   // as an AddRec, possibly under a predicate (PHISCEVPred). If it is possible
   // to add this predicate as a runtime overflow check, we return the AddRec.
   // If \p Expr does not meet these conditions (is not a PHI node, or we
   // couldn't create an AddRec for it, or couldn't add the predicate), we just
   // return \p Expr.
   const SCEV *convertToAddRecWithPreds(const SCEVUnknown *Expr) {
+    if (!VersionUnknown)
+      return Expr;
     if (!isa<PHINode>(Expr->getValue()))
       return Expr;
     Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>>
     PredicatedRewrite = SE.createAddRecFromPHIWithCasts(Expr);
     if (!PredicatedRewrite)
       return Expr;
     for (auto *P : PredicatedRewrite->second){
       if (!addOverflowAssumption(P))
         return Expr;
     }
     return PredicatedRewrite->first;
   }
 
   SmallPtrSetImpl<const SCEVPredicate *> *NewPreds;
   SCEVUnionPredicate *Pred;
   const Loop *L;
 };
 
 } // end anonymous namespace
 
 const SCEV *ScalarEvolution::rewriteUsingPredicate(const SCEV *S, const Loop *L,
                                                    SCEVUnionPredicate &Preds) {
   return SCEVPredicateRewriter::rewrite(S, L, *this, nullptr, &Preds);
 }
 
 const SCEVAddRecExpr *ScalarEvolution::convertSCEVToAddRecWithPredicates(
     const SCEV *S, const Loop *L,
     SmallPtrSetImpl<const SCEVPredicate *> &Preds) {
   SmallPtrSet<const SCEVPredicate *, 4> TransformPreds;
   S = SCEVPredicateRewriter::rewrite(S, L, *this, &TransformPreds, nullptr);
   auto *AddRec = dyn_cast<SCEVAddRecExpr>(S);
 
   if (!AddRec)
     return nullptr;
 
   // Since the transformation was successful, we can now transfer the SCEV
   // predicates.
   for (auto *P : TransformPreds)
     Preds.insert(P);
 
   return AddRec;
 }
 
 /// SCEV predicates
 SCEVPredicate::SCEVPredicate(const FoldingSetNodeIDRef ID,
                              SCEVPredicateKind Kind)
     : FastID(ID), Kind(Kind) {}
 
 SCEVEqualPredicate::SCEVEqualPredicate(const FoldingSetNodeIDRef ID,
                                        const SCEV *LHS, const SCEV *RHS)
     : SCEVPredicate(ID, P_Equal), LHS(LHS), RHS(RHS) {
   assert(LHS->getType() == RHS->getType() && "LHS and RHS types don't match");
   assert(LHS != RHS && "LHS and RHS are the same SCEV");
 }
 
 bool SCEVEqualPredicate::implies(const SCEVPredicate *N) const {
   const auto *Op = dyn_cast<SCEVEqualPredicate>(N);
 
   if (!Op)
     return false;
 
   return Op->LHS == LHS && Op->RHS == RHS;
 }
 
 bool SCEVEqualPredicate::isAlwaysTrue() const { return false; }
 
 const SCEV *SCEVEqualPredicate::getExpr() const { return LHS; }
 
 void SCEVEqualPredicate::print(raw_ostream &OS, unsigned Depth) const {
   OS.indent(Depth) << "Equal predicate: " << *LHS << " == " << *RHS << "\n";
 }
 
 SCEVWrapPredicate::SCEVWrapPredicate(const FoldingSetNodeIDRef ID,
                                      const SCEVAddRecExpr *AR,
                                      IncrementWrapFlags Flags)
     : SCEVPredicate(ID, P_Wrap), AR(AR), Flags(Flags) {}
 
 const SCEV *SCEVWrapPredicate::getExpr() const { return AR; }
 
 bool SCEVWrapPredicate::implies(const SCEVPredicate *N) const {
   const auto *Op = dyn_cast<SCEVWrapPredicate>(N);
 
   return Op && Op->AR == AR && setFlags(Flags, Op->Flags) == Flags;
 }
 
 bool SCEVWrapPredicate::isAlwaysTrue() const {
   SCEV::NoWrapFlags ScevFlags = AR->getNoWrapFlags();
   IncrementWrapFlags IFlags = Flags;
 
   if (ScalarEvolution::setFlags(ScevFlags, SCEV::FlagNSW) == ScevFlags)
     IFlags = clearFlags(IFlags, IncrementNSSW);
 
   return IFlags == IncrementAnyWrap;
 }
 
 void SCEVWrapPredicate::print(raw_ostream &OS, unsigned Depth) const {
   OS.indent(Depth) << *getExpr() << " Added Flags: ";
   if (SCEVWrapPredicate::IncrementNUSW & getFlags())
     OS << "<nusw>";
   if (SCEVWrapPredicate::IncrementNSSW & getFlags())
     OS << "<nssw>";
   OS << "\n";
 }
 
 SCEVWrapPredicate::IncrementWrapFlags
 SCEVWrapPredicate::getImpliedFlags(const SCEVAddRecExpr *AR,
                                    ScalarEvolution &SE) {
   IncrementWrapFlags ImpliedFlags = IncrementAnyWrap;
   SCEV::NoWrapFlags StaticFlags = AR->getNoWrapFlags();
 
   // We can safely transfer the NSW flag as NSSW.
   if (ScalarEvolution::setFlags(StaticFlags, SCEV::FlagNSW) == StaticFlags)
     ImpliedFlags = IncrementNSSW;
 
   if (ScalarEvolution::setFlags(StaticFlags, SCEV::FlagNUW) == StaticFlags) {
     // If the increment is positive, the SCEV NUW flag will also imply the
     // WrapPredicate NUSW flag.
     if (const auto *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(SE)))
       if (Step->getValue()->getValue().isNonNegative())
         ImpliedFlags = setFlags(ImpliedFlags, IncrementNUSW);
   }
 
   return ImpliedFlags;
 }
 
 /// Union predicates don't get cached so create a dummy set ID for it.
 SCEVUnionPredicate::SCEVUnionPredicate()
     : SCEVPredicate(FoldingSetNodeIDRef(nullptr, 0), P_Union) {}
 
 bool SCEVUnionPredicate::isAlwaysTrue() const {
   return all_of(Preds,
                 [](const SCEVPredicate *I) { return I->isAlwaysTrue(); });
 }
 
 ArrayRef<const SCEVPredicate *>
 SCEVUnionPredicate::getPredicatesForExpr(const SCEV *Expr) {
   auto I = SCEVToPreds.find(Expr);
   if (I == SCEVToPreds.end())
     return ArrayRef<const SCEVPredicate *>();
   return I->second;
 }
 
 bool SCEVUnionPredicate::implies(const SCEVPredicate *N) const {
   if (const auto *Set = dyn_cast<SCEVUnionPredicate>(N))
     return all_of(Set->Preds,
                   [this](const SCEVPredicate *I) { return this->implies(I); });
 
   auto ScevPredsIt = SCEVToPreds.find(N->getExpr());
   if (ScevPredsIt == SCEVToPreds.end())
     return false;
   auto &SCEVPreds = ScevPredsIt->second;
 
   return any_of(SCEVPreds,
                 [N](const SCEVPredicate *I) { return I->implies(N); });
 }
 
 const SCEV *SCEVUnionPredicate::getExpr() const { return nullptr; }
 
 void SCEVUnionPredicate::print(raw_ostream &OS, unsigned Depth) const {
   for (auto Pred : Preds)
     Pred->print(OS, Depth);
 }
 
 void SCEVUnionPredicate::add(const SCEVPredicate *N) {
   if (const auto *Set = dyn_cast<SCEVUnionPredicate>(N)) {
     for (auto Pred : Set->Preds)
       add(Pred);
     return;
   }
 
   if (implies(N))
     return;
 
   const SCEV *Key = N->getExpr();
   assert(Key && "Only SCEVUnionPredicate doesn't have an "
                 " associated expression!");
 
   SCEVToPreds[Key].push_back(N);
   Preds.push_back(N);
 }
 
 PredicatedScalarEvolution::PredicatedScalarEvolution(ScalarEvolution &SE,
                                                      Loop &L)
     : SE(SE), L(L) {}
 
 const SCEV *PredicatedScalarEvolution::getSCEV(Value *V) {
   const SCEV *Expr = SE.getSCEV(V);
   RewriteEntry &Entry = RewriteMap[Expr];
 
   // If we already have an entry and the version matches, return it.
   if (Entry.second && Generation == Entry.first)
     return Entry.second;
 
   // We found an entry but it's stale. Rewrite the stale entry
   // according to the current predicate.
   if (Entry.second)
     Expr = Entry.second;
 
   const SCEV *NewSCEV = SE.rewriteUsingPredicate(Expr, &L, Preds);
   Entry = {Generation, NewSCEV};
 
   return NewSCEV;
 }
 
 const SCEV *PredicatedScalarEvolution::getBackedgeTakenCount() {
   if (!BackedgeCount) {
     SCEVUnionPredicate BackedgePred;
     BackedgeCount = SE.getPredicatedBackedgeTakenCount(&L, BackedgePred);
     addPredicate(BackedgePred);
   }
   return BackedgeCount;
 }
 
 void PredicatedScalarEvolution::addPredicate(const SCEVPredicate &Pred) {
   if (Preds.implies(&Pred))
     return;
   Preds.add(&Pred);
   updateGeneration();
 }
 
 const SCEVUnionPredicate &PredicatedScalarEvolution::getUnionPredicate() const {
   return Preds;
 }
 
 void PredicatedScalarEvolution::updateGeneration() {
   // If the generation number wrapped recompute everything.
   if (++Generation == 0) {
     for (auto &II : RewriteMap) {
       const SCEV *Rewritten = II.second.second;
       II.second = {Generation, SE.rewriteUsingPredicate(Rewritten, &L, Preds)};
     }
   }
 }
 
 void PredicatedScalarEvolution::setNoOverflow(
     Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags) {
   const SCEV *Expr = getSCEV(V);
   const auto *AR = cast<SCEVAddRecExpr>(Expr);
 
   auto ImpliedFlags = SCEVWrapPredicate::getImpliedFlags(AR, SE);
 
   // Clear the statically implied flags.
   Flags = SCEVWrapPredicate::clearFlags(Flags, ImpliedFlags);
   addPredicate(*SE.getWrapPredicate(AR, Flags));
 
   auto II = FlagsMap.insert({V, Flags});
   if (!II.second)
     II.first->second = SCEVWrapPredicate::setFlags(Flags, II.first->second);
 }
 
 bool PredicatedScalarEvolution::hasNoOverflow(
     Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags) {
   const SCEV *Expr = getSCEV(V);
   const auto *AR = cast<SCEVAddRecExpr>(Expr);
 
   Flags = SCEVWrapPredicate::clearFlags(
       Flags, SCEVWrapPredicate::getImpliedFlags(AR, SE));
 
   auto II = FlagsMap.find(V);
 
   if (II != FlagsMap.end())
     Flags = SCEVWrapPredicate::clearFlags(Flags, II->second);
 
   return Flags == SCEVWrapPredicate::IncrementAnyWrap;
 }
 
 const SCEVAddRecExpr *PredicatedScalarEvolution::getAsAddRec(Value *V) {
   const SCEV *Expr = this->getSCEV(V);
   SmallPtrSet<const SCEVPredicate *, 4> NewPreds;
   auto *New = SE.convertSCEVToAddRecWithPredicates(Expr, &L, NewPreds);
 
   if (!New)
     return nullptr;
 
   for (auto *P : NewPreds)
     Preds.add(P);
 
   updateGeneration();
   RewriteMap[SE.getSCEV(V)] = {Generation, New};
   return New;
 }
 
 PredicatedScalarEvolution::PredicatedScalarEvolution(
     const PredicatedScalarEvolution &Init)
     : RewriteMap(Init.RewriteMap), SE(Init.SE), L(Init.L), Preds(Init.Preds),
       Generation(Init.Generation), BackedgeCount(Init.BackedgeCount) {
   for (const auto &I : Init.FlagsMap)
     FlagsMap.insert(I);
 }
 
 void PredicatedScalarEvolution::print(raw_ostream &OS, unsigned Depth) const {
   // For each block.
   for (auto *BB : L.getBlocks())
     for (auto &I : *BB) {
       if (!SE.isSCEVable(I.getType()))
         continue;
 
       auto *Expr = SE.getSCEV(&I);
       auto II = RewriteMap.find(Expr);
 
       if (II == RewriteMap.end())
         continue;
 
       // Don't print things that are not interesting.
       if (II->second.second == Expr)
         continue;
 
       OS.indent(Depth) << "[PSE]" << I << ":\n";
       OS.indent(Depth + 2) << *Expr << "\n";
       OS.indent(Depth + 2) << "--> " << *II->second.second << "\n";
     }
 }
Index: head/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
===================================================================
--- head/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp	(revision 329982)
+++ head/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp	(revision 329983)
@@ -1,5833 +1,5835 @@
 //===- BitcodeReader.cpp - Internal BitcodeReader implementation ----------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Bitcode/BitcodeReader.h"
 #include "MetadataLoader.h"
 #include "ValueList.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Bitcode/BitstreamReader.h"
 #include "llvm/Bitcode/LLVMBitCodes.h"
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/AutoUpgrade.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Comdat.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GVMaterializer.h"
 #include "llvm/IR/GlobalAlias.h"
 #include "llvm/IR/GlobalIFunc.h"
 #include "llvm/IR/GlobalIndirectSymbol.h"
 #include "llvm/IR/GlobalObject.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/ModuleSummaryIndex.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/Support/AtomicOrdering.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
 #include <deque>
 #include <map>
 #include <memory>
 #include <set>
 #include <string>
 #include <system_error>
 #include <tuple>
 #include <utility>
 #include <vector>
 
 using namespace llvm;
 
 static cl::opt<bool> PrintSummaryGUIDs(
     "print-summary-global-ids", cl::init(false), cl::Hidden,
     cl::desc(
         "Print the global id for each value when reading the module summary"));
 
 namespace {
 
 enum {
   SWITCH_INST_MAGIC = 0x4B5 // May 2012 => 1205 => Hex
 };
 
 } // end anonymous namespace
 
 static Error error(const Twine &Message) {
   return make_error<StringError>(
       Message, make_error_code(BitcodeError::CorruptedBitcode));
 }
 
 /// Helper to read the header common to all bitcode files.
 static bool hasValidBitcodeHeader(BitstreamCursor &Stream) {
   // Sniff for the signature.
   if (!Stream.canSkipToPos(4) ||
       Stream.Read(8) != 'B' ||
       Stream.Read(8) != 'C' ||
       Stream.Read(4) != 0x0 ||
       Stream.Read(4) != 0xC ||
       Stream.Read(4) != 0xE ||
       Stream.Read(4) != 0xD)
     return false;
   return true;
 }
 
 static Expected<BitstreamCursor> initStream(MemoryBufferRef Buffer) {
   const unsigned char *BufPtr = (const unsigned char *)Buffer.getBufferStart();
   const unsigned char *BufEnd = BufPtr + Buffer.getBufferSize();
 
   if (Buffer.getBufferSize() & 3)
     return error("Invalid bitcode signature");
 
   // If we have a wrapper header, parse it and ignore the non-bc file contents.
   // The magic number is 0x0B17C0DE stored in little endian.
   if (isBitcodeWrapper(BufPtr, BufEnd))
     if (SkipBitcodeWrapperHeader(BufPtr, BufEnd, true))
       return error("Invalid bitcode wrapper header");
 
   BitstreamCursor Stream(ArrayRef<uint8_t>(BufPtr, BufEnd));
   if (!hasValidBitcodeHeader(Stream))
     return error("Invalid bitcode signature");
 
   return std::move(Stream);
 }
 
 /// Convert a string from a record into an std::string, return true on failure.
 template <typename StrTy>
 static bool convertToString(ArrayRef<uint64_t> Record, unsigned Idx,
                             StrTy &Result) {
   if (Idx > Record.size())
     return true;
 
   for (unsigned i = Idx, e = Record.size(); i != e; ++i)
     Result += (char)Record[i];
   return false;
 }
 
 // Strip all the TBAA attachment for the module.
 static void stripTBAA(Module *M) {
   for (auto &F : *M) {
     if (F.isMaterializable())
       continue;
     for (auto &I : instructions(F))
       I.setMetadata(LLVMContext::MD_tbaa, nullptr);
   }
 }
 
 /// Read the "IDENTIFICATION_BLOCK_ID" block, do some basic enforcement on the
 /// "epoch" encoded in the bitcode, and return the producer name if any.
 static Expected<std::string> readIdentificationBlock(BitstreamCursor &Stream) {
   if (Stream.EnterSubBlock(bitc::IDENTIFICATION_BLOCK_ID))
     return error("Invalid record");
 
   // Read all the records.
   SmallVector<uint64_t, 64> Record;
 
   std::string ProducerIdentification;
 
   while (true) {
     BitstreamEntry Entry = Stream.advance();
 
     switch (Entry.Kind) {
     default:
     case BitstreamEntry::Error:
       return error("Malformed block");
     case BitstreamEntry::EndBlock:
       return ProducerIdentification;
     case BitstreamEntry::Record:
       // The interesting case.
       break;
     }
 
     // Read a record.
     Record.clear();
     unsigned BitCode = Stream.readRecord(Entry.ID, Record);
     switch (BitCode) {
     default: // Default behavior: reject
       return error("Invalid value");
     case bitc::IDENTIFICATION_CODE_STRING: // IDENTIFICATION: [strchr x N]
       convertToString(Record, 0, ProducerIdentification);
       break;
     case bitc::IDENTIFICATION_CODE_EPOCH: { // EPOCH: [epoch#]
       unsigned epoch = (unsigned)Record[0];
       if (epoch != bitc::BITCODE_CURRENT_EPOCH) {
         return error(
           Twine("Incompatible epoch: Bitcode '") + Twine(epoch) +
           "' vs current: '" + Twine(bitc::BITCODE_CURRENT_EPOCH) + "'");
       }
     }
     }
   }
 }
 
 static Expected<std::string> readIdentificationCode(BitstreamCursor &Stream) {
   // We expect a number of well-defined blocks, though we don't necessarily
   // need to understand them all.
   while (true) {
     if (Stream.AtEndOfStream())
       return "";
 
     BitstreamEntry Entry = Stream.advance();
     switch (Entry.Kind) {
     case BitstreamEntry::EndBlock:
     case BitstreamEntry::Error:
       return error("Malformed block");
 
     case BitstreamEntry::SubBlock:
       if (Entry.ID == bitc::IDENTIFICATION_BLOCK_ID)
         return readIdentificationBlock(Stream);
 
       // Ignore other sub-blocks.
       if (Stream.SkipBlock())
         return error("Malformed block");
       continue;
     case BitstreamEntry::Record:
       Stream.skipRecord(Entry.ID);
       continue;
     }
   }
 }
 
 static Expected<bool> hasObjCCategoryInModule(BitstreamCursor &Stream) {
   if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
     return error("Invalid record");
 
   SmallVector<uint64_t, 64> Record;
   // Read all the records for this module.
 
   while (true) {
     BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
 
     switch (Entry.Kind) {
     case BitstreamEntry::SubBlock: // Handled for us already.
     case BitstreamEntry::Error:
       return error("Malformed block");
     case BitstreamEntry::EndBlock:
       return false;
     case BitstreamEntry::Record:
       // The interesting case.
       break;
     }
 
     // Read a record.
     switch (Stream.readRecord(Entry.ID, Record)) {
     default:
       break; // Default behavior, ignore unknown content.
     case bitc::MODULE_CODE_SECTIONNAME: { // SECTIONNAME: [strchr x N]
       std::string S;
       if (convertToString(Record, 0, S))
         return error("Invalid record");
       // Check for the i386 and other (x86_64, ARM) conventions
       if (S.find("__DATA,__objc_catlist") != std::string::npos ||
           S.find("__OBJC,__category") != std::string::npos)
         return true;
       break;
     }
     }
     Record.clear();
   }
   llvm_unreachable("Exit infinite loop");
 }
 
 static Expected<bool> hasObjCCategory(BitstreamCursor &Stream) {
   // We expect a number of well-defined blocks, though we don't necessarily
   // need to understand them all.
   while (true) {
     BitstreamEntry Entry = Stream.advance();
 
     switch (Entry.Kind) {
     case BitstreamEntry::Error:
       return error("Malformed block");
     case BitstreamEntry::EndBlock:
       return false;
 
     case BitstreamEntry::SubBlock:
       if (Entry.ID == bitc::MODULE_BLOCK_ID)
         return hasObjCCategoryInModule(Stream);
 
       // Ignore other sub-blocks.
       if (Stream.SkipBlock())
         return error("Malformed block");
       continue;
 
     case BitstreamEntry::Record:
       Stream.skipRecord(Entry.ID);
       continue;
     }
   }
 }
 
 static Expected<std::string> readModuleTriple(BitstreamCursor &Stream) {
   if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
     return error("Invalid record");
 
   SmallVector<uint64_t, 64> Record;
 
   std::string Triple;
 
   // Read all the records for this module.
   while (true) {
     BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
 
     switch (Entry.Kind) {
     case BitstreamEntry::SubBlock: // Handled for us already.
     case BitstreamEntry::Error:
       return error("Malformed block");
     case BitstreamEntry::EndBlock:
       return Triple;
     case BitstreamEntry::Record:
       // The interesting case.
       break;
     }
 
     // Read a record.
     switch (Stream.readRecord(Entry.ID, Record)) {
     default: break;  // Default behavior, ignore unknown content.
     case bitc::MODULE_CODE_TRIPLE: {  // TRIPLE: [strchr x N]
       std::string S;
       if (convertToString(Record, 0, S))
         return error("Invalid record");
       Triple = S;
       break;
     }
     }
     Record.clear();
   }
   llvm_unreachable("Exit infinite loop");
 }
 
 static Expected<std::string> readTriple(BitstreamCursor &Stream) {
   // We expect a number of well-defined blocks, though we don't necessarily
   // need to understand them all.
   while (true) {
     BitstreamEntry Entry = Stream.advance();
 
     switch (Entry.Kind) {
     case BitstreamEntry::Error:
       return error("Malformed block");
     case BitstreamEntry::EndBlock:
       return "";
 
     case BitstreamEntry::SubBlock:
       if (Entry.ID == bitc::MODULE_BLOCK_ID)
         return readModuleTriple(Stream);
 
       // Ignore other sub-blocks.
       if (Stream.SkipBlock())
         return error("Malformed block");
       continue;
 
     case BitstreamEntry::Record:
       Stream.skipRecord(Entry.ID);
       continue;
     }
   }
 }
 
 namespace {
 
 class BitcodeReaderBase {
 protected:
   BitcodeReaderBase(BitstreamCursor Stream, StringRef Strtab)
       : Stream(std::move(Stream)), Strtab(Strtab) {
     this->Stream.setBlockInfo(&BlockInfo);
   }
 
   BitstreamBlockInfo BlockInfo;
   BitstreamCursor Stream;
   StringRef Strtab;
 
   /// In version 2 of the bitcode we store names of global values and comdats in
   /// a string table rather than in the VST.
   bool UseStrtab = false;
 
   Expected<unsigned> parseVersionRecord(ArrayRef<uint64_t> Record);
 
   /// If this module uses a string table, pop the reference to the string table
   /// and return the referenced string and the rest of the record. Otherwise
   /// just return the record itself.
   std::pair<StringRef, ArrayRef<uint64_t>>
   readNameFromStrtab(ArrayRef<uint64_t> Record);
 
   bool readBlockInfo();
 
   // Contains an arbitrary and optional string identifying the bitcode producer
   std::string ProducerIdentification;
 
   Error error(const Twine &Message);
 };
 
 } // end anonymous namespace
 
 Error BitcodeReaderBase::error(const Twine &Message) {
   std::string FullMsg = Message.str();
   if (!ProducerIdentification.empty())
     FullMsg += " (Producer: '" + ProducerIdentification + "' Reader: 'LLVM " +
                LLVM_VERSION_STRING "')";
   return ::error(FullMsg);
 }
 
 Expected<unsigned>
 BitcodeReaderBase::parseVersionRecord(ArrayRef<uint64_t> Record) {
   if (Record.empty())
     return error("Invalid record");
   unsigned ModuleVersion = Record[0];
   if (ModuleVersion > 2)
     return error("Invalid value");
   UseStrtab = ModuleVersion >= 2;
   return ModuleVersion;
 }
 
 std::pair<StringRef, ArrayRef<uint64_t>>
 BitcodeReaderBase::readNameFromStrtab(ArrayRef<uint64_t> Record) {
   if (!UseStrtab)
     return {"", Record};
   // Invalid reference. Let the caller complain about the record being empty.
   if (Record[0] + Record[1] > Strtab.size())
     return {"", {}};
   return {StringRef(Strtab.data() + Record[0], Record[1]), Record.slice(2)};
 }
 
 namespace {
 
 class BitcodeReader : public BitcodeReaderBase, public GVMaterializer {
   LLVMContext &Context;
   Module *TheModule = nullptr;
   // Next offset to start scanning for lazy parsing of function bodies.
   uint64_t NextUnreadBit = 0;
   // Last function offset found in the VST.
   uint64_t LastFunctionBlockBit = 0;
   bool SeenValueSymbolTable = false;
   uint64_t VSTOffset = 0;
 
   std::vector<std::string> SectionTable;
   std::vector<std::string> GCTable;
 
   std::vector<Type*> TypeList;
   BitcodeReaderValueList ValueList;
   Optional<MetadataLoader> MDLoader;
   std::vector<Comdat *> ComdatList;
   SmallVector<Instruction *, 64> InstructionList;
 
   std::vector<std::pair<GlobalVariable *, unsigned>> GlobalInits;
   std::vector<std::pair<GlobalIndirectSymbol *, unsigned>> IndirectSymbolInits;
   std::vector<std::pair<Function *, unsigned>> FunctionPrefixes;
   std::vector<std::pair<Function *, unsigned>> FunctionPrologues;
   std::vector<std::pair<Function *, unsigned>> FunctionPersonalityFns;
 
   /// The set of attributes by index.  Index zero in the file is for null, and
   /// is thus not represented here.  As such all indices are off by one.
   std::vector<AttributeList> MAttributes;
 
   /// The set of attribute groups.
   std::map<unsigned, AttributeList> MAttributeGroups;
 
   /// While parsing a function body, this is a list of the basic blocks for the
   /// function.
   std::vector<BasicBlock*> FunctionBBs;
 
   // When reading the module header, this list is populated with functions that
   // have bodies later in the file.
   std::vector<Function*> FunctionsWithBodies;
 
   // When intrinsic functions are encountered which require upgrading they are
   // stored here with their replacement function.
   using UpdatedIntrinsicMap = DenseMap<Function *, Function *>;
   UpdatedIntrinsicMap UpgradedIntrinsics;
   // Intrinsics which were remangled because of types rename
   UpdatedIntrinsicMap RemangledIntrinsics;
 
   // Several operations happen after the module header has been read, but
   // before function bodies are processed. This keeps track of whether
   // we've done this yet.
   bool SeenFirstFunctionBody = false;
 
   /// When function bodies are initially scanned, this map contains info about
   /// where to find deferred function body in the stream.
   DenseMap<Function*, uint64_t> DeferredFunctionInfo;
 
   /// When Metadata block is initially scanned when parsing the module, we may
   /// choose to defer parsing of the metadata. This vector contains info about
   /// which Metadata blocks are deferred.
   std::vector<uint64_t> DeferredMetadataInfo;
 
   /// These are basic blocks forward-referenced by block addresses.  They are
   /// inserted lazily into functions when they're loaded.  The basic block ID is
   /// its index into the vector.
   DenseMap<Function *, std::vector<BasicBlock *>> BasicBlockFwdRefs;
   std::deque<Function *> BasicBlockFwdRefQueue;
 
   /// Indicates that we are using a new encoding for instruction operands where
   /// most operands in the current FUNCTION_BLOCK are encoded relative to the
   /// instruction number, for a more compact encoding.  Some instruction
   /// operands are not relative to the instruction ID: basic block numbers, and
   /// types. Once the old style function blocks have been phased out, we would
   /// not need this flag.
   bool UseRelativeIDs = false;
 
   /// True if all functions will be materialized, negating the need to process
   /// (e.g.) blockaddress forward references.
   bool WillMaterializeAllForwardRefs = false;
 
   bool StripDebugInfo = false;
   TBAAVerifier TBAAVerifyHelper;
 
   std::vector<std::string> BundleTags;
   SmallVector<SyncScope::ID, 8> SSIDs;
 
 public:
   BitcodeReader(BitstreamCursor Stream, StringRef Strtab,
                 StringRef ProducerIdentification, LLVMContext &Context);
 
   Error materializeForwardReferencedFunctions();
 
   Error materialize(GlobalValue *GV) override;
   Error materializeModule() override;
   std::vector<StructType *> getIdentifiedStructTypes() const override;
 
   /// \brief Main interface to parsing a bitcode buffer.
   /// \returns true if an error occurred.
   Error parseBitcodeInto(Module *M, bool ShouldLazyLoadMetadata = false,
                          bool IsImporting = false);
 
   static uint64_t decodeSignRotatedValue(uint64_t V);
 
   /// Materialize any deferred Metadata block.
   Error materializeMetadata() override;
 
   void setStripDebugInfo() override;
 
 private:
   std::vector<StructType *> IdentifiedStructTypes;
   StructType *createIdentifiedStructType(LLVMContext &Context, StringRef Name);
   StructType *createIdentifiedStructType(LLVMContext &Context);
 
   Type *getTypeByID(unsigned ID);
 
   Value *getFnValueByID(unsigned ID, Type *Ty) {
     if (Ty && Ty->isMetadataTy())
       return MetadataAsValue::get(Ty->getContext(), getFnMetadataByID(ID));
     return ValueList.getValueFwdRef(ID, Ty);
   }
 
   Metadata *getFnMetadataByID(unsigned ID) {
     return MDLoader->getMetadataFwdRefOrLoad(ID);
   }
 
   BasicBlock *getBasicBlock(unsigned ID) const {
     if (ID >= FunctionBBs.size()) return nullptr; // Invalid ID
     return FunctionBBs[ID];
   }
 
   AttributeList getAttributes(unsigned i) const {
     if (i-1 < MAttributes.size())
       return MAttributes[i-1];
     return AttributeList();
   }
 
   /// Read a value/type pair out of the specified record from slot 'Slot'.
   /// Increment Slot past the number of slots used in the record. Return true on
   /// failure.
   bool getValueTypePair(SmallVectorImpl<uint64_t> &Record, unsigned &Slot,
                         unsigned InstNum, Value *&ResVal) {
     if (Slot == Record.size()) return true;
     unsigned ValNo = (unsigned)Record[Slot++];
     // Adjust the ValNo, if it was encoded relative to the InstNum.
     if (UseRelativeIDs)
       ValNo = InstNum - ValNo;
     if (ValNo < InstNum) {
       // If this is not a forward reference, just return the value we already
       // have.
       ResVal = getFnValueByID(ValNo, nullptr);
       return ResVal == nullptr;
     }
     if (Slot == Record.size())
       return true;
 
     unsigned TypeNo = (unsigned)Record[Slot++];
     ResVal = getFnValueByID(ValNo, getTypeByID(TypeNo));
     return ResVal == nullptr;
   }
 
   /// Read a value out of the specified record from slot 'Slot'. Increment Slot
   /// past the number of slots used by the value in the record. Return true if
   /// there is an error.
   bool popValue(SmallVectorImpl<uint64_t> &Record, unsigned &Slot,
                 unsigned InstNum, Type *Ty, Value *&ResVal) {
     if (getValue(Record, Slot, InstNum, Ty, ResVal))
       return true;
     // All values currently take a single record slot.
     ++Slot;
     return false;
   }
 
   /// Like popValue, but does not increment the Slot number.
   bool getValue(SmallVectorImpl<uint64_t> &Record, unsigned Slot,
                 unsigned InstNum, Type *Ty, Value *&ResVal) {
     ResVal = getValue(Record, Slot, InstNum, Ty);
     return ResVal == nullptr;
   }
 
   /// Version of getValue that returns ResVal directly, or 0 if there is an
   /// error.
   Value *getValue(SmallVectorImpl<uint64_t> &Record, unsigned Slot,
                   unsigned InstNum, Type *Ty) {
     if (Slot == Record.size()) return nullptr;
     unsigned ValNo = (unsigned)Record[Slot];
     // Adjust the ValNo, if it was encoded relative to the InstNum.
     if (UseRelativeIDs)
       ValNo = InstNum - ValNo;
     return getFnValueByID(ValNo, Ty);
   }
 
   /// Like getValue, but decodes signed VBRs.
   Value *getValueSigned(SmallVectorImpl<uint64_t> &Record, unsigned Slot,
                         unsigned InstNum, Type *Ty) {
     if (Slot == Record.size()) return nullptr;
     unsigned ValNo = (unsigned)decodeSignRotatedValue(Record[Slot]);
     // Adjust the ValNo, if it was encoded relative to the InstNum.
     if (UseRelativeIDs)
       ValNo = InstNum - ValNo;
     return getFnValueByID(ValNo, Ty);
   }
 
   /// Converts alignment exponent (i.e. power of two (or zero)) to the
   /// corresponding alignment to use. If alignment is too large, returns
   /// a corresponding error code.
   Error parseAlignmentValue(uint64_t Exponent, unsigned &Alignment);
   Error parseAttrKind(uint64_t Code, Attribute::AttrKind *Kind);
   Error parseModule(uint64_t ResumeBit, bool ShouldLazyLoadMetadata = false);
 
   Error parseComdatRecord(ArrayRef<uint64_t> Record);
   Error parseGlobalVarRecord(ArrayRef<uint64_t> Record);
   Error parseFunctionRecord(ArrayRef<uint64_t> Record);
   Error parseGlobalIndirectSymbolRecord(unsigned BitCode,
                                         ArrayRef<uint64_t> Record);
 
   Error parseAttributeBlock();
   Error parseAttributeGroupBlock();
   Error parseTypeTable();
   Error parseTypeTableBody();
   Error parseOperandBundleTags();
   Error parseSyncScopeNames();
 
   Expected<Value *> recordValue(SmallVectorImpl<uint64_t> &Record,
                                 unsigned NameIndex, Triple &TT);
   void setDeferredFunctionInfo(unsigned FuncBitcodeOffsetDelta, Function *F,
                                ArrayRef<uint64_t> Record);
   Error parseValueSymbolTable(uint64_t Offset = 0);
   Error parseGlobalValueSymbolTable();
   Error parseConstants();
   Error rememberAndSkipFunctionBodies();
   Error rememberAndSkipFunctionBody();
   /// Save the positions of the Metadata blocks and skip parsing the blocks.
   Error rememberAndSkipMetadata();
   Error typeCheckLoadStoreInst(Type *ValType, Type *PtrType);
   Error parseFunctionBody(Function *F);
   Error globalCleanup();
   Error resolveGlobalAndIndirectSymbolInits();
   Error parseUseLists();
   Error findFunctionInStream(
       Function *F,
       DenseMap<Function *, uint64_t>::iterator DeferredFunctionInfoIterator);
 
   SyncScope::ID getDecodedSyncScopeID(unsigned Val);
 };
 
 /// Class to manage reading and parsing function summary index bitcode
 /// files/sections.
 class ModuleSummaryIndexBitcodeReader : public BitcodeReaderBase {
   /// The module index built during parsing.
   ModuleSummaryIndex &TheIndex;
 
   /// Indicates whether we have encountered a global value summary section
   /// yet during parsing.
   bool SeenGlobalValSummary = false;
 
   /// Indicates whether we have already parsed the VST, used for error checking.
   bool SeenValueSymbolTable = false;
 
   /// Set to the offset of the VST recorded in the MODULE_CODE_VSTOFFSET record.
   /// Used to enable on-demand parsing of the VST.
   uint64_t VSTOffset = 0;
 
   // Map to save ValueId to ValueInfo association that was recorded in the
   // ValueSymbolTable. It is used after the VST is parsed to convert
   // call graph edges read from the function summary from referencing
   // callees by their ValueId to using the ValueInfo instead, which is how
   // they are recorded in the summary index being built.
   // We save a GUID which refers to the same global as the ValueInfo, but
   // ignoring the linkage, i.e. for values other than local linkage they are
   // identical.
   DenseMap<unsigned, std::pair<ValueInfo, GlobalValue::GUID>>
       ValueIdToValueInfoMap;
 
   /// Map populated during module path string table parsing, from the
   /// module ID to a string reference owned by the index's module
   /// path string table, used to correlate with combined index
   /// summary records.
   DenseMap<uint64_t, StringRef> ModuleIdMap;
 
   /// Original source file name recorded in a bitcode record.
   std::string SourceFileName;
 
   /// The string identifier given to this module by the client, normally the
   /// path to the bitcode file.
   StringRef ModulePath;
 
   /// For per-module summary indexes, the unique numerical identifier given to
   /// this module by the client.
   unsigned ModuleId;
 
 public:
   ModuleSummaryIndexBitcodeReader(BitstreamCursor Stream, StringRef Strtab,
                                   ModuleSummaryIndex &TheIndex,
                                   StringRef ModulePath, unsigned ModuleId);
 
   Error parseModule();
 
 private:
   void setValueGUID(uint64_t ValueID, StringRef ValueName,
                     GlobalValue::LinkageTypes Linkage,
                     StringRef SourceFileName);
   Error parseValueSymbolTable(
       uint64_t Offset,
       DenseMap<unsigned, GlobalValue::LinkageTypes> &ValueIdToLinkageMap);
   std::vector<ValueInfo> makeRefList(ArrayRef<uint64_t> Record);
   std::vector<FunctionSummary::EdgeTy> makeCallList(ArrayRef<uint64_t> Record,
                                                     bool IsOldProfileFormat,
                                                     bool HasProfile);
   Error parseEntireSummary(unsigned ID);
   Error parseModuleStringTable();
 
   std::pair<ValueInfo, GlobalValue::GUID>
   getValueInfoFromValueId(unsigned ValueId);
 
   ModuleSummaryIndex::ModuleInfo *addThisModule();
 };
 
 } // end anonymous namespace
 
 std::error_code llvm::errorToErrorCodeAndEmitErrors(LLVMContext &Ctx,
                                                     Error Err) {
   if (Err) {
     std::error_code EC;
     handleAllErrors(std::move(Err), [&](ErrorInfoBase &EIB) {
       EC = EIB.convertToErrorCode();
       Ctx.emitError(EIB.message());
     });
     return EC;
   }
   return std::error_code();
 }
 
 BitcodeReader::BitcodeReader(BitstreamCursor Stream, StringRef Strtab,
                              StringRef ProducerIdentification,
                              LLVMContext &Context)
     : BitcodeReaderBase(std::move(Stream), Strtab), Context(Context),
       ValueList(Context) {
   this->ProducerIdentification = ProducerIdentification;
 }
 
 Error BitcodeReader::materializeForwardReferencedFunctions() {
   if (WillMaterializeAllForwardRefs)
     return Error::success();
 
   // Prevent recursion.
   WillMaterializeAllForwardRefs = true;
 
   while (!BasicBlockFwdRefQueue.empty()) {
     Function *F = BasicBlockFwdRefQueue.front();
     BasicBlockFwdRefQueue.pop_front();
     assert(F && "Expected valid function");
     if (!BasicBlockFwdRefs.count(F))
       // Already materialized.
       continue;
 
     // Check for a function that isn't materializable to prevent an infinite
     // loop.  When parsing a blockaddress stored in a global variable, there
     // isn't a trivial way to check if a function will have a body without a
     // linear search through FunctionsWithBodies, so just check it here.
     if (!F->isMaterializable())
       return error("Never resolved function from blockaddress");
 
     // Try to materialize F.
     if (Error Err = materialize(F))
       return Err;
   }
   assert(BasicBlockFwdRefs.empty() && "Function missing from queue");
 
   // Reset state.
   WillMaterializeAllForwardRefs = false;
   return Error::success();
 }
 
 //===----------------------------------------------------------------------===//
 //  Helper functions to implement forward reference resolution, etc.
 //===----------------------------------------------------------------------===//
 
 static bool hasImplicitComdat(size_t Val) {
   switch (Val) {
   default:
     return false;
   case 1:  // Old WeakAnyLinkage
   case 4:  // Old LinkOnceAnyLinkage
   case 10: // Old WeakODRLinkage
   case 11: // Old LinkOnceODRLinkage
     return true;
   }
 }
 
 static GlobalValue::LinkageTypes getDecodedLinkage(unsigned Val) {
   switch (Val) {
   default: // Map unknown/new linkages to external
   case 0:
     return GlobalValue::ExternalLinkage;
   case 2:
     return GlobalValue::AppendingLinkage;
   case 3:
     return GlobalValue::InternalLinkage;
   case 5:
     return GlobalValue::ExternalLinkage; // Obsolete DLLImportLinkage
   case 6:
     return GlobalValue::ExternalLinkage; // Obsolete DLLExportLinkage
   case 7:
     return GlobalValue::ExternalWeakLinkage;
   case 8:
     return GlobalValue::CommonLinkage;
   case 9:
     return GlobalValue::PrivateLinkage;
   case 12:
     return GlobalValue::AvailableExternallyLinkage;
   case 13:
     return GlobalValue::PrivateLinkage; // Obsolete LinkerPrivateLinkage
   case 14:
     return GlobalValue::PrivateLinkage; // Obsolete LinkerPrivateWeakLinkage
   case 15:
     return GlobalValue::ExternalLinkage; // Obsolete LinkOnceODRAutoHideLinkage
   case 1: // Old value with implicit comdat.
   case 16:
     return GlobalValue::WeakAnyLinkage;
   case 10: // Old value with implicit comdat.
   case 17:
     return GlobalValue::WeakODRLinkage;
   case 4: // Old value with implicit comdat.
   case 18:
     return GlobalValue::LinkOnceAnyLinkage;
   case 11: // Old value with implicit comdat.
   case 19:
     return GlobalValue::LinkOnceODRLinkage;
   }
 }
 
 static FunctionSummary::FFlags getDecodedFFlags(uint64_t RawFlags) {
   FunctionSummary::FFlags Flags;
   Flags.ReadNone = RawFlags & 0x1;
   Flags.ReadOnly = (RawFlags >> 1) & 0x1;
   Flags.NoRecurse = (RawFlags >> 2) & 0x1;
   Flags.ReturnDoesNotAlias = (RawFlags >> 3) & 0x1;
   return Flags;
 }
 
 /// Decode the flags for GlobalValue in the summary.
 static GlobalValueSummary::GVFlags getDecodedGVSummaryFlags(uint64_t RawFlags,
                                                             uint64_t Version) {
   // Summary were not emitted before LLVM 3.9, we don't need to upgrade Linkage
   // like getDecodedLinkage() above. Any future change to the linkage enum and
   // to getDecodedLinkage() will need to be taken into account here as above.
   auto Linkage = GlobalValue::LinkageTypes(RawFlags & 0xF); // 4 bits
   RawFlags = RawFlags >> 4;
   bool NotEligibleToImport = (RawFlags & 0x1) || Version < 3;
   // The Live flag wasn't introduced until version 3. For dead stripping
   // to work correctly on earlier versions, we must conservatively treat all
   // values as live.
   bool Live = (RawFlags & 0x2) || Version < 3;
   bool Local = (RawFlags & 0x4);
 
   return GlobalValueSummary::GVFlags(Linkage, NotEligibleToImport, Live, Local);
 }
 
 static GlobalValue::VisibilityTypes getDecodedVisibility(unsigned Val) {
   switch (Val) {
   default: // Map unknown visibilities to default.
   case 0: return GlobalValue::DefaultVisibility;
   case 1: return GlobalValue::HiddenVisibility;
   case 2: return GlobalValue::ProtectedVisibility;
   }
 }
 
 static GlobalValue::DLLStorageClassTypes
 getDecodedDLLStorageClass(unsigned Val) {
   switch (Val) {
   default: // Map unknown values to default.
   case 0: return GlobalValue::DefaultStorageClass;
   case 1: return GlobalValue::DLLImportStorageClass;
   case 2: return GlobalValue::DLLExportStorageClass;
   }
 }
 
 static bool getDecodedDSOLocal(unsigned Val) {
   switch(Val) {
   default: // Map unknown values to preemptable.
   case 0:  return false;
   case 1:  return true;
   }
 }
 
 static GlobalVariable::ThreadLocalMode getDecodedThreadLocalMode(unsigned Val) {
   switch (Val) {
     case 0: return GlobalVariable::NotThreadLocal;
     default: // Map unknown non-zero value to general dynamic.
     case 1: return GlobalVariable::GeneralDynamicTLSModel;
     case 2: return GlobalVariable::LocalDynamicTLSModel;
     case 3: return GlobalVariable::InitialExecTLSModel;
     case 4: return GlobalVariable::LocalExecTLSModel;
   }
 }
 
 static GlobalVariable::UnnamedAddr getDecodedUnnamedAddrType(unsigned Val) {
   switch (Val) {
     default: // Map unknown to UnnamedAddr::None.
     case 0: return GlobalVariable::UnnamedAddr::None;
     case 1: return GlobalVariable::UnnamedAddr::Global;
     case 2: return GlobalVariable::UnnamedAddr::Local;
   }
 }
 
 static int getDecodedCastOpcode(unsigned Val) {
   switch (Val) {
   default: return -1;
   case bitc::CAST_TRUNC   : return Instruction::Trunc;
   case bitc::CAST_ZEXT    : return Instruction::ZExt;
   case bitc::CAST_SEXT    : return Instruction::SExt;
   case bitc::CAST_FPTOUI  : return Instruction::FPToUI;
   case bitc::CAST_FPTOSI  : return Instruction::FPToSI;
   case bitc::CAST_UITOFP  : return Instruction::UIToFP;
   case bitc::CAST_SITOFP  : return Instruction::SIToFP;
   case bitc::CAST_FPTRUNC : return Instruction::FPTrunc;
   case bitc::CAST_FPEXT   : return Instruction::FPExt;
   case bitc::CAST_PTRTOINT: return Instruction::PtrToInt;
   case bitc::CAST_INTTOPTR: return Instruction::IntToPtr;
   case bitc::CAST_BITCAST : return Instruction::BitCast;
   case bitc::CAST_ADDRSPACECAST: return Instruction::AddrSpaceCast;
   }
 }
 
 static int getDecodedBinaryOpcode(unsigned Val, Type *Ty) {
   bool IsFP = Ty->isFPOrFPVectorTy();
   // BinOps are only valid for int/fp or vector of int/fp types
   if (!IsFP && !Ty->isIntOrIntVectorTy())
     return -1;
 
   switch (Val) {
   default:
     return -1;
   case bitc::BINOP_ADD:
     return IsFP ? Instruction::FAdd : Instruction::Add;
   case bitc::BINOP_SUB:
     return IsFP ? Instruction::FSub : Instruction::Sub;
   case bitc::BINOP_MUL:
     return IsFP ? Instruction::FMul : Instruction::Mul;
   case bitc::BINOP_UDIV:
     return IsFP ? -1 : Instruction::UDiv;
   case bitc::BINOP_SDIV:
     return IsFP ? Instruction::FDiv : Instruction::SDiv;
   case bitc::BINOP_UREM:
     return IsFP ? -1 : Instruction::URem;
   case bitc::BINOP_SREM:
     return IsFP ? Instruction::FRem : Instruction::SRem;
   case bitc::BINOP_SHL:
     return IsFP ? -1 : Instruction::Shl;
   case bitc::BINOP_LSHR:
     return IsFP ? -1 : Instruction::LShr;
   case bitc::BINOP_ASHR:
     return IsFP ? -1 : Instruction::AShr;
   case bitc::BINOP_AND:
     return IsFP ? -1 : Instruction::And;
   case bitc::BINOP_OR:
     return IsFP ? -1 : Instruction::Or;
   case bitc::BINOP_XOR:
     return IsFP ? -1 : Instruction::Xor;
   }
 }
 
 static AtomicRMWInst::BinOp getDecodedRMWOperation(unsigned Val) {
   switch (Val) {
   default: return AtomicRMWInst::BAD_BINOP;
   case bitc::RMW_XCHG: return AtomicRMWInst::Xchg;
   case bitc::RMW_ADD: return AtomicRMWInst::Add;
   case bitc::RMW_SUB: return AtomicRMWInst::Sub;
   case bitc::RMW_AND: return AtomicRMWInst::And;
   case bitc::RMW_NAND: return AtomicRMWInst::Nand;
   case bitc::RMW_OR: return AtomicRMWInst::Or;
   case bitc::RMW_XOR: return AtomicRMWInst::Xor;
   case bitc::RMW_MAX: return AtomicRMWInst::Max;
   case bitc::RMW_MIN: return AtomicRMWInst::Min;
   case bitc::RMW_UMAX: return AtomicRMWInst::UMax;
   case bitc::RMW_UMIN: return AtomicRMWInst::UMin;
   }
 }
 
 static AtomicOrdering getDecodedOrdering(unsigned Val) {
   switch (Val) {
   case bitc::ORDERING_NOTATOMIC: return AtomicOrdering::NotAtomic;
   case bitc::ORDERING_UNORDERED: return AtomicOrdering::Unordered;
   case bitc::ORDERING_MONOTONIC: return AtomicOrdering::Monotonic;
   case bitc::ORDERING_ACQUIRE: return AtomicOrdering::Acquire;
   case bitc::ORDERING_RELEASE: return AtomicOrdering::Release;
   case bitc::ORDERING_ACQREL: return AtomicOrdering::AcquireRelease;
   default: // Map unknown orderings to sequentially-consistent.
   case bitc::ORDERING_SEQCST: return AtomicOrdering::SequentiallyConsistent;
   }
 }
 
 static Comdat::SelectionKind getDecodedComdatSelectionKind(unsigned Val) {
   switch (Val) {
   default: // Map unknown selection kinds to any.
   case bitc::COMDAT_SELECTION_KIND_ANY:
     return Comdat::Any;
   case bitc::COMDAT_SELECTION_KIND_EXACT_MATCH:
     return Comdat::ExactMatch;
   case bitc::COMDAT_SELECTION_KIND_LARGEST:
     return Comdat::Largest;
   case bitc::COMDAT_SELECTION_KIND_NO_DUPLICATES:
     return Comdat::NoDuplicates;
   case bitc::COMDAT_SELECTION_KIND_SAME_SIZE:
     return Comdat::SameSize;
   }
 }
 
 static FastMathFlags getDecodedFastMathFlags(unsigned Val) {
   FastMathFlags FMF;
-  if (0 != (Val & FastMathFlags::AllowReassoc))
+  if (0 != (Val & bitc::UnsafeAlgebra))
+    FMF.setFast();
+  if (0 != (Val & bitc::AllowReassoc))
     FMF.setAllowReassoc();
-  if (0 != (Val & FastMathFlags::NoNaNs))
+  if (0 != (Val & bitc::NoNaNs))
     FMF.setNoNaNs();
-  if (0 != (Val & FastMathFlags::NoInfs))
+  if (0 != (Val & bitc::NoInfs))
     FMF.setNoInfs();
-  if (0 != (Val & FastMathFlags::NoSignedZeros))
+  if (0 != (Val & bitc::NoSignedZeros))
     FMF.setNoSignedZeros();
-  if (0 != (Val & FastMathFlags::AllowReciprocal))
+  if (0 != (Val & bitc::AllowReciprocal))
     FMF.setAllowReciprocal();
-  if (0 != (Val & FastMathFlags::AllowContract))
+  if (0 != (Val & bitc::AllowContract))
     FMF.setAllowContract(true);
-  if (0 != (Val & FastMathFlags::ApproxFunc))
+  if (0 != (Val & bitc::ApproxFunc))
     FMF.setApproxFunc();
   return FMF;
 }
 
 static void upgradeDLLImportExportLinkage(GlobalValue *GV, unsigned Val) {
   switch (Val) {
   case 5: GV->setDLLStorageClass(GlobalValue::DLLImportStorageClass); break;
   case 6: GV->setDLLStorageClass(GlobalValue::DLLExportStorageClass); break;
   }
 }
 
 Type *BitcodeReader::getTypeByID(unsigned ID) {
   // The type table size is always specified correctly.
   if (ID >= TypeList.size())
     return nullptr;
 
   if (Type *Ty = TypeList[ID])
     return Ty;
 
   // If we have a forward reference, the only possible case is when it is to a
   // named struct.  Just create a placeholder for now.
   return TypeList[ID] = createIdentifiedStructType(Context);
 }
 
 StructType *BitcodeReader::createIdentifiedStructType(LLVMContext &Context,
                                                       StringRef Name) {
   auto *Ret = StructType::create(Context, Name);
   IdentifiedStructTypes.push_back(Ret);
   return Ret;
 }
 
 StructType *BitcodeReader::createIdentifiedStructType(LLVMContext &Context) {
   auto *Ret = StructType::create(Context);
   IdentifiedStructTypes.push_back(Ret);
   return Ret;
 }
 
 //===----------------------------------------------------------------------===//
 //  Functions for parsing blocks from the bitcode file
 //===----------------------------------------------------------------------===//
 
 static uint64_t getRawAttributeMask(Attribute::AttrKind Val) {
   switch (Val) {
   case Attribute::EndAttrKinds:
     llvm_unreachable("Synthetic enumerators which should never get here");
 
   case Attribute::None:            return 0;
   case Attribute::ZExt:            return 1 << 0;
   case Attribute::SExt:            return 1 << 1;
   case Attribute::NoReturn:        return 1 << 2;
   case Attribute::InReg:           return 1 << 3;
   case Attribute::StructRet:       return 1 << 4;
   case Attribute::NoUnwind:        return 1 << 5;
   case Attribute::NoAlias:         return 1 << 6;
   case Attribute::ByVal:           return 1 << 7;
   case Attribute::Nest:            return 1 << 8;
   case Attribute::ReadNone:        return 1 << 9;
   case Attribute::ReadOnly:        return 1 << 10;
   case Attribute::NoInline:        return 1 << 11;
   case Attribute::AlwaysInline:    return 1 << 12;
   case Attribute::OptimizeForSize: return 1 << 13;
   case Attribute::StackProtect:    return 1 << 14;
   case Attribute::StackProtectReq: return 1 << 15;
   case Attribute::Alignment:       return 31 << 16;
   case Attribute::NoCapture:       return 1 << 21;
   case Attribute::NoRedZone:       return 1 << 22;
   case Attribute::NoImplicitFloat: return 1 << 23;
   case Attribute::Naked:           return 1 << 24;
   case Attribute::InlineHint:      return 1 << 25;
   case Attribute::StackAlignment:  return 7 << 26;
   case Attribute::ReturnsTwice:    return 1 << 29;
   case Attribute::UWTable:         return 1 << 30;
   case Attribute::NonLazyBind:     return 1U << 31;
   case Attribute::SanitizeAddress: return 1ULL << 32;
   case Attribute::MinSize:         return 1ULL << 33;
   case Attribute::NoDuplicate:     return 1ULL << 34;
   case Attribute::StackProtectStrong: return 1ULL << 35;
   case Attribute::SanitizeThread:  return 1ULL << 36;
   case Attribute::SanitizeMemory:  return 1ULL << 37;
   case Attribute::NoBuiltin:       return 1ULL << 38;
   case Attribute::Returned:        return 1ULL << 39;
   case Attribute::Cold:            return 1ULL << 40;
   case Attribute::Builtin:         return 1ULL << 41;
   case Attribute::OptimizeNone:    return 1ULL << 42;
   case Attribute::InAlloca:        return 1ULL << 43;
   case Attribute::NonNull:         return 1ULL << 44;
   case Attribute::JumpTable:       return 1ULL << 45;
   case Attribute::Convergent:      return 1ULL << 46;
   case Attribute::SafeStack:       return 1ULL << 47;
   case Attribute::NoRecurse:       return 1ULL << 48;
   case Attribute::InaccessibleMemOnly:         return 1ULL << 49;
   case Attribute::InaccessibleMemOrArgMemOnly: return 1ULL << 50;
   case Attribute::SwiftSelf:       return 1ULL << 51;
   case Attribute::SwiftError:      return 1ULL << 52;
   case Attribute::WriteOnly:       return 1ULL << 53;
   case Attribute::Speculatable:    return 1ULL << 54;
   case Attribute::StrictFP:        return 1ULL << 55;
   case Attribute::SanitizeHWAddress: return 1ULL << 56;
   case Attribute::Dereferenceable:
     llvm_unreachable("dereferenceable attribute not supported in raw format");
     break;
   case Attribute::DereferenceableOrNull:
     llvm_unreachable("dereferenceable_or_null attribute not supported in raw "
                      "format");
     break;
   case Attribute::ArgMemOnly:
     llvm_unreachable("argmemonly attribute not supported in raw format");
     break;
   case Attribute::AllocSize:
     llvm_unreachable("allocsize not supported in raw format");
     break;
   }
   llvm_unreachable("Unsupported attribute type");
 }
 
 static void addRawAttributeValue(AttrBuilder &B, uint64_t Val) {
   if (!Val) return;
 
   for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds;
        I = Attribute::AttrKind(I + 1)) {
     if (I == Attribute::Dereferenceable ||
         I == Attribute::DereferenceableOrNull ||
         I == Attribute::ArgMemOnly ||
         I == Attribute::AllocSize)
       continue;
     if (uint64_t A = (Val & getRawAttributeMask(I))) {
       if (I == Attribute::Alignment)
         B.addAlignmentAttr(1ULL << ((A >> 16) - 1));
       else if (I == Attribute::StackAlignment)
         B.addStackAlignmentAttr(1ULL << ((A >> 26)-1));
       else
         B.addAttribute(I);
     }
   }
 }
 
 /// \brief This fills an AttrBuilder object with the LLVM attributes that have
 /// been decoded from the given integer. This function must stay in sync with
 /// 'encodeLLVMAttributesForBitcode'.
 static void decodeLLVMAttributesForBitcode(AttrBuilder &B,
                                            uint64_t EncodedAttrs) {
   // FIXME: Remove in 4.0.
 
   // The alignment is stored as a 16-bit raw value from bits 31--16.  We shift
   // the bits above 31 down by 11 bits.
   unsigned Alignment = (EncodedAttrs & (0xffffULL << 16)) >> 16;
   assert((!Alignment || isPowerOf2_32(Alignment)) &&
          "Alignment must be a power of two.");
 
   if (Alignment)
     B.addAlignmentAttr(Alignment);
   addRawAttributeValue(B, ((EncodedAttrs & (0xfffffULL << 32)) >> 11) |
                           (EncodedAttrs & 0xffff));
 }
 
 Error BitcodeReader::parseAttributeBlock() {
   if (Stream.EnterSubBlock(bitc::PARAMATTR_BLOCK_ID))
     return error("Invalid record");
 
   if (!MAttributes.empty())
     return error("Invalid multiple blocks");
 
   SmallVector<uint64_t, 64> Record;
 
   SmallVector<AttributeList, 8> Attrs;
 
   // Read all the records.
   while (true) {
     BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
 
     switch (Entry.Kind) {
     case BitstreamEntry::SubBlock: // Handled for us already.
     case BitstreamEntry::Error:
       return error("Malformed block");
     case BitstreamEntry::EndBlock:
       return Error::success();
     case BitstreamEntry::Record:
       // The interesting case.
       break;
     }
 
     // Read a record.
     Record.clear();
     switch (Stream.readRecord(Entry.ID, Record)) {
     default:  // Default behavior: ignore.
       break;
     case bitc::PARAMATTR_CODE_ENTRY_OLD: // ENTRY: [paramidx0, attr0, ...]
       // FIXME: Remove in 4.0.
       if (Record.size() & 1)
         return error("Invalid record");
 
       for (unsigned i = 0, e = Record.size(); i != e; i += 2) {
         AttrBuilder B;
         decodeLLVMAttributesForBitcode(B, Record[i+1]);
         Attrs.push_back(AttributeList::get(Context, Record[i], B));
       }
 
       MAttributes.push_back(AttributeList::get(Context, Attrs));
       Attrs.clear();
       break;
     case bitc::PARAMATTR_CODE_ENTRY: // ENTRY: [attrgrp0, attrgrp1, ...]
       for (unsigned i = 0, e = Record.size(); i != e; ++i)
         Attrs.push_back(MAttributeGroups[Record[i]]);
 
       MAttributes.push_back(AttributeList::get(Context, Attrs));
       Attrs.clear();
       break;
     }
   }
 }
 
 // Returns Attribute::None on unrecognized codes.
 static Attribute::AttrKind getAttrFromCode(uint64_t Code) {
   switch (Code) {
   default:
     return Attribute::None;
   case bitc::ATTR_KIND_ALIGNMENT:
     return Attribute::Alignment;
   case bitc::ATTR_KIND_ALWAYS_INLINE:
     return Attribute::AlwaysInline;
   case bitc::ATTR_KIND_ARGMEMONLY:
     return Attribute::ArgMemOnly;
   case bitc::ATTR_KIND_BUILTIN:
     return Attribute::Builtin;
   case bitc::ATTR_KIND_BY_VAL:
     return Attribute::ByVal;
   case bitc::ATTR_KIND_IN_ALLOCA:
     return Attribute::InAlloca;
   case bitc::ATTR_KIND_COLD:
     return Attribute::Cold;
   case bitc::ATTR_KIND_CONVERGENT:
     return Attribute::Convergent;
   case bitc::ATTR_KIND_INACCESSIBLEMEM_ONLY:
     return Attribute::InaccessibleMemOnly;
   case bitc::ATTR_KIND_INACCESSIBLEMEM_OR_ARGMEMONLY:
     return Attribute::InaccessibleMemOrArgMemOnly;
   case bitc::ATTR_KIND_INLINE_HINT:
     return Attribute::InlineHint;
   case bitc::ATTR_KIND_IN_REG:
     return Attribute::InReg;
   case bitc::ATTR_KIND_JUMP_TABLE:
     return Attribute::JumpTable;
   case bitc::ATTR_KIND_MIN_SIZE:
     return Attribute::MinSize;
   case bitc::ATTR_KIND_NAKED:
     return Attribute::Naked;
   case bitc::ATTR_KIND_NEST:
     return Attribute::Nest;
   case bitc::ATTR_KIND_NO_ALIAS:
     return Attribute::NoAlias;
   case bitc::ATTR_KIND_NO_BUILTIN:
     return Attribute::NoBuiltin;
   case bitc::ATTR_KIND_NO_CAPTURE:
     return Attribute::NoCapture;
   case bitc::ATTR_KIND_NO_DUPLICATE:
     return Attribute::NoDuplicate;
   case bitc::ATTR_KIND_NO_IMPLICIT_FLOAT:
     return Attribute::NoImplicitFloat;
   case bitc::ATTR_KIND_NO_INLINE:
     return Attribute::NoInline;
   case bitc::ATTR_KIND_NO_RECURSE:
     return Attribute::NoRecurse;
   case bitc::ATTR_KIND_NON_LAZY_BIND:
     return Attribute::NonLazyBind;
   case bitc::ATTR_KIND_NON_NULL:
     return Attribute::NonNull;
   case bitc::ATTR_KIND_DEREFERENCEABLE:
     return Attribute::Dereferenceable;
   case bitc::ATTR_KIND_DEREFERENCEABLE_OR_NULL:
     return Attribute::DereferenceableOrNull;
   case bitc::ATTR_KIND_ALLOC_SIZE:
     return Attribute::AllocSize;
   case bitc::ATTR_KIND_NO_RED_ZONE:
     return Attribute::NoRedZone;
   case bitc::ATTR_KIND_NO_RETURN:
     return Attribute::NoReturn;
   case bitc::ATTR_KIND_NO_UNWIND:
     return Attribute::NoUnwind;
   case bitc::ATTR_KIND_OPTIMIZE_FOR_SIZE:
     return Attribute::OptimizeForSize;
   case bitc::ATTR_KIND_OPTIMIZE_NONE:
     return Attribute::OptimizeNone;
   case bitc::ATTR_KIND_READ_NONE:
     return Attribute::ReadNone;
   case bitc::ATTR_KIND_READ_ONLY:
     return Attribute::ReadOnly;
   case bitc::ATTR_KIND_RETURNED:
     return Attribute::Returned;
   case bitc::ATTR_KIND_RETURNS_TWICE:
     return Attribute::ReturnsTwice;
   case bitc::ATTR_KIND_S_EXT:
     return Attribute::SExt;
   case bitc::ATTR_KIND_SPECULATABLE:
     return Attribute::Speculatable;
   case bitc::ATTR_KIND_STACK_ALIGNMENT:
     return Attribute::StackAlignment;
   case bitc::ATTR_KIND_STACK_PROTECT:
     return Attribute::StackProtect;
   case bitc::ATTR_KIND_STACK_PROTECT_REQ:
     return Attribute::StackProtectReq;
   case bitc::ATTR_KIND_STACK_PROTECT_STRONG:
     return Attribute::StackProtectStrong;
   case bitc::ATTR_KIND_SAFESTACK:
     return Attribute::SafeStack;
   case bitc::ATTR_KIND_STRICT_FP:
     return Attribute::StrictFP;
   case bitc::ATTR_KIND_STRUCT_RET:
     return Attribute::StructRet;
   case bitc::ATTR_KIND_SANITIZE_ADDRESS:
     return Attribute::SanitizeAddress;
   case bitc::ATTR_KIND_SANITIZE_HWADDRESS:
     return Attribute::SanitizeHWAddress;
   case bitc::ATTR_KIND_SANITIZE_THREAD:
     return Attribute::SanitizeThread;
   case bitc::ATTR_KIND_SANITIZE_MEMORY:
     return Attribute::SanitizeMemory;
   case bitc::ATTR_KIND_SWIFT_ERROR:
     return Attribute::SwiftError;
   case bitc::ATTR_KIND_SWIFT_SELF:
     return Attribute::SwiftSelf;
   case bitc::ATTR_KIND_UW_TABLE:
     return Attribute::UWTable;
   case bitc::ATTR_KIND_WRITEONLY:
     return Attribute::WriteOnly;
   case bitc::ATTR_KIND_Z_EXT:
     return Attribute::ZExt;
   }
 }
 
 Error BitcodeReader::parseAlignmentValue(uint64_t Exponent,
                                          unsigned &Alignment) {
   // Note: Alignment in bitcode files is incremented by 1, so that zero
   // can be used for default alignment.
   if (Exponent > Value::MaxAlignmentExponent + 1)
     return error("Invalid alignment value");
   Alignment = (1 << static_cast<unsigned>(Exponent)) >> 1;
   return Error::success();
 }
 
 Error BitcodeReader::parseAttrKind(uint64_t Code, Attribute::AttrKind *Kind) {
   *Kind = getAttrFromCode(Code);
   if (*Kind == Attribute::None)
     return error("Unknown attribute kind (" + Twine(Code) + ")");
   return Error::success();
 }
 
 Error BitcodeReader::parseAttributeGroupBlock() {
   if (Stream.EnterSubBlock(bitc::PARAMATTR_GROUP_BLOCK_ID))
     return error("Invalid record");
 
   if (!MAttributeGroups.empty())
     return error("Invalid multiple blocks");
 
   SmallVector<uint64_t, 64> Record;
 
   // Read all the records.
   while (true) {
     BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
 
     switch (Entry.Kind) {
     case BitstreamEntry::SubBlock: // Handled for us already.
     case BitstreamEntry::Error:
       return error("Malformed block");
     case BitstreamEntry::EndBlock:
       return Error::success();
     case BitstreamEntry::Record:
       // The interesting case.
       break;
     }
 
     // Read a record.
     Record.clear();
     switch (Stream.readRecord(Entry.ID, Record)) {
     default:  // Default behavior: ignore.
       break;
     case bitc::PARAMATTR_GRP_CODE_ENTRY: { // ENTRY: [grpid, idx, a0, a1, ...]
       if (Record.size() < 3)
         return error("Invalid record");
 
       uint64_t GrpID = Record[0];
       uint64_t Idx = Record[1]; // Index of the object this attribute refers to.
 
       AttrBuilder B;
       for (unsigned i = 2, e = Record.size(); i != e; ++i) {
         if (Record[i] == 0) {        // Enum attribute
           Attribute::AttrKind Kind;
           if (Error Err = parseAttrKind(Record[++i], &Kind))
             return Err;
 
           B.addAttribute(Kind);
         } else if (Record[i] == 1) { // Integer attribute
           Attribute::AttrKind Kind;
           if (Error Err = parseAttrKind(Record[++i], &Kind))
             return Err;
           if (Kind == Attribute::Alignment)
             B.addAlignmentAttr(Record[++i]);
           else if (Kind == Attribute::StackAlignment)
             B.addStackAlignmentAttr(Record[++i]);
           else if (Kind == Attribute::Dereferenceable)
             B.addDereferenceableAttr(Record[++i]);
           else if (Kind == Attribute::DereferenceableOrNull)
             B.addDereferenceableOrNullAttr(Record[++i]);
           else if (Kind == Attribute::AllocSize)
             B.addAllocSizeAttrFromRawRepr(Record[++i]);
         } else {                     // String attribute
           assert((Record[i] == 3 || Record[i] == 4) &&
                  "Invalid attribute group entry");
           bool HasValue = (Record[i++] == 4);
           SmallString<64> KindStr;
           SmallString<64> ValStr;
 
           while (Record[i] != 0 && i != e)
             KindStr += Record[i++];
           assert(Record[i] == 0 && "Kind string not null terminated");
 
           if (HasValue) {
             // Has a value associated with it.
             ++i; // Skip the '0' that terminates the "kind" string.
             while (Record[i] != 0 && i != e)
               ValStr += Record[i++];
             assert(Record[i] == 0 && "Value string not null terminated");
           }
 
           B.addAttribute(KindStr.str(), ValStr.str());
         }
       }
 
       MAttributeGroups[GrpID] = AttributeList::get(Context, Idx, B);
       break;
     }
     }
   }
 }
 
 Error BitcodeReader::parseTypeTable() {
   if (Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID_NEW))
     return error("Invalid record");
 
   return parseTypeTableBody();
 }
 
 Error BitcodeReader::parseTypeTableBody() {
   if (!TypeList.empty())
     return error("Invalid multiple blocks");
 
   SmallVector<uint64_t, 64> Record;
   unsigned NumRecords = 0;
 
   SmallString<64> TypeName;
 
   // Read all the records for this type table.
   while (true) {
     BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
 
     switch (Entry.Kind) {
     case BitstreamEntry::SubBlock: // Handled for us already.
     case BitstreamEntry::Error:
       return error("Malformed block");
     case BitstreamEntry::EndBlock:
       if (NumRecords != TypeList.size())
         return error("Malformed block");
       return Error::success();
     case BitstreamEntry::Record:
       // The interesting case.
       break;
     }
 
     // Read a record.
     Record.clear();
     Type *ResultTy = nullptr;
     switch (Stream.readRecord(Entry.ID, Record)) {
     default:
       return error("Invalid value");
     case bitc::TYPE_CODE_NUMENTRY: // TYPE_CODE_NUMENTRY: [numentries]
       // TYPE_CODE_NUMENTRY contains a count of the number of types in the
       // type list.  This allows us to reserve space.
       if (Record.size() < 1)
         return error("Invalid record");
       TypeList.resize(Record[0]);
       continue;
     case bitc::TYPE_CODE_VOID:      // VOID
       ResultTy = Type::getVoidTy(Context);
       break;
     case bitc::TYPE_CODE_HALF:     // HALF
       ResultTy = Type::getHalfTy(Context);
       break;
     case bitc::TYPE_CODE_FLOAT:     // FLOAT
       ResultTy = Type::getFloatTy(Context);
       break;
     case bitc::TYPE_CODE_DOUBLE:    // DOUBLE
       ResultTy = Type::getDoubleTy(Context);
       break;
     case bitc::TYPE_CODE_X86_FP80:  // X86_FP80
       ResultTy = Type::getX86_FP80Ty(Context);
       break;
     case bitc::TYPE_CODE_FP128:     // FP128
       ResultTy = Type::getFP128Ty(Context);
       break;
     case bitc::TYPE_CODE_PPC_FP128: // PPC_FP128
       ResultTy = Type::getPPC_FP128Ty(Context);
       break;
     case bitc::TYPE_CODE_LABEL:     // LABEL
       ResultTy = Type::getLabelTy(Context);
       break;
     case bitc::TYPE_CODE_METADATA:  // METADATA
       ResultTy = Type::getMetadataTy(Context);
       break;
     case bitc::TYPE_CODE_X86_MMX:   // X86_MMX
       ResultTy = Type::getX86_MMXTy(Context);
       break;
     case bitc::TYPE_CODE_TOKEN:     // TOKEN
       ResultTy = Type::getTokenTy(Context);
       break;
     case bitc::TYPE_CODE_INTEGER: { // INTEGER: [width]
       if (Record.size() < 1)
         return error("Invalid record");
 
       uint64_t NumBits = Record[0];
       if (NumBits < IntegerType::MIN_INT_BITS ||
           NumBits > IntegerType::MAX_INT_BITS)
         return error("Bitwidth for integer type out of range");
       ResultTy = IntegerType::get(Context, NumBits);
       break;
     }
     case bitc::TYPE_CODE_POINTER: { // POINTER: [pointee type] or
                                     //          [pointee type, address space]
       if (Record.size() < 1)
         return error("Invalid record");
       unsigned AddressSpace = 0;
       if (Record.size() == 2)
         AddressSpace = Record[1];
       ResultTy = getTypeByID(Record[0]);
       if (!ResultTy ||
           !PointerType::isValidElementType(ResultTy))
         return error("Invalid type");
       ResultTy = PointerType::get(ResultTy, AddressSpace);
       break;
     }
     case bitc::TYPE_CODE_FUNCTION_OLD: {
       // FIXME: attrid is dead, remove it in LLVM 4.0
       // FUNCTION: [vararg, attrid, retty, paramty x N]
       if (Record.size() < 3)
         return error("Invalid record");
       SmallVector<Type*, 8> ArgTys;
       for (unsigned i = 3, e = Record.size(); i != e; ++i) {
         if (Type *T = getTypeByID(Record[i]))
           ArgTys.push_back(T);
         else
           break;
       }
 
       ResultTy = getTypeByID(Record[2]);
       if (!ResultTy || ArgTys.size() < Record.size()-3)
         return error("Invalid type");
 
       ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]);
       break;
     }
     case bitc::TYPE_CODE_FUNCTION: {
       // FUNCTION: [vararg, retty, paramty x N]
       if (Record.size() < 2)
         return error("Invalid record");
       SmallVector<Type*, 8> ArgTys;
       for (unsigned i = 2, e = Record.size(); i != e; ++i) {
         if (Type *T = getTypeByID(Record[i])) {
           if (!FunctionType::isValidArgumentType(T))
             return error("Invalid function argument type");
           ArgTys.push_back(T);
         }
         else
           break;
       }
 
       ResultTy = getTypeByID(Record[1]);
       if (!ResultTy || ArgTys.size() < Record.size()-2)
         return error("Invalid type");
 
       ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]);
       break;
     }
     case bitc::TYPE_CODE_STRUCT_ANON: {  // STRUCT: [ispacked, eltty x N]
       if (Record.size() < 1)
         return error("Invalid record");
       SmallVector<Type*, 8> EltTys;
       for (unsigned i = 1, e = Record.size(); i != e; ++i) {
         if (Type *T = getTypeByID(Record[i]))
           EltTys.push_back(T);
         else
           break;
       }
       if (EltTys.size() != Record.size()-1)
         return error("Invalid type");
       ResultTy = StructType::get(Context, EltTys, Record[0]);
       break;
     }
     case bitc::TYPE_CODE_STRUCT_NAME:   // STRUCT_NAME: [strchr x N]
       if (convertToString(Record, 0, TypeName))
         return error("Invalid record");
       continue;
 
     case bitc::TYPE_CODE_STRUCT_NAMED: { // STRUCT: [ispacked, eltty x N]
       if (Record.size() < 1)
         return error("Invalid record");
 
       if (NumRecords >= TypeList.size())
         return error("Invalid TYPE table");
 
       // Check to see if this was forward referenced, if so fill in the temp.
       StructType *Res = cast_or_null<StructType>(TypeList[NumRecords]);
       if (Res) {
         Res->setName(TypeName);
         TypeList[NumRecords] = nullptr;
       } else  // Otherwise, create a new struct.
         Res = createIdentifiedStructType(Context, TypeName);
       TypeName.clear();
 
       SmallVector<Type*, 8> EltTys;
       for (unsigned i = 1, e = Record.size(); i != e; ++i) {
         if (Type *T = getTypeByID(Record[i]))
           EltTys.push_back(T);
         else
           break;
       }
       if (EltTys.size() != Record.size()-1)
         return error("Invalid record");
       Res->setBody(EltTys, Record[0]);
       ResultTy = Res;
       break;
     }
     case bitc::TYPE_CODE_OPAQUE: {       // OPAQUE: []
       if (Record.size() != 1)
         return error("Invalid record");
 
       if (NumRecords >= TypeList.size())
         return error("Invalid TYPE table");
 
       // Check to see if this was forward referenced, if so fill in the temp.
       StructType *Res = cast_or_null<StructType>(TypeList[NumRecords]);
       if (Res) {
         Res->setName(TypeName);
         TypeList[NumRecords] = nullptr;
       } else  // Otherwise, create a new struct with no body.
         Res = createIdentifiedStructType(Context, TypeName);
       TypeName.clear();
       ResultTy = Res;
       break;
     }
     case bitc::TYPE_CODE_ARRAY:     // ARRAY: [numelts, eltty]
       if (Record.size() < 2)
         return error("Invalid record");
       ResultTy = getTypeByID(Record[1]);
       if (!ResultTy || !ArrayType::isValidElementType(ResultTy))
         return error("Invalid type");
       ResultTy = ArrayType::get(ResultTy, Record[0]);
       break;
     case bitc::TYPE_CODE_VECTOR:    // VECTOR: [numelts, eltty]
       if (Record.size() < 2)
         return error("Invalid record");
       if (Record[0] == 0)
         return error("Invalid vector length");
       ResultTy = getTypeByID(Record[1]);
       if (!ResultTy || !StructType::isValidElementType(ResultTy))
         return error("Invalid type");
       ResultTy = VectorType::get(ResultTy, Record[0]);
       break;
     }
 
     if (NumRecords >= TypeList.size())
       return error("Invalid TYPE table");
     if (TypeList[NumRecords])
       return error(
           "Invalid TYPE table: Only named structs can be forward referenced");
     assert(ResultTy && "Didn't read a type?");
     TypeList[NumRecords++] = ResultTy;
   }
 }
 
 Error BitcodeReader::parseOperandBundleTags() {
   if (Stream.EnterSubBlock(bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID))
     return error("Invalid record");
 
   if (!BundleTags.empty())
     return error("Invalid multiple blocks");
 
   SmallVector<uint64_t, 64> Record;
 
   while (true) {
     BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
 
     switch (Entry.Kind) {
     case BitstreamEntry::SubBlock: // Handled for us already.
     case BitstreamEntry::Error:
       return error("Malformed block");
     case BitstreamEntry::EndBlock:
       return Error::success();
     case BitstreamEntry::Record:
       // The interesting case.
       break;
     }
 
     // Tags are implicitly mapped to integers by their order.
 
     if (Stream.readRecord(Entry.ID, Record) != bitc::OPERAND_BUNDLE_TAG)
       return error("Invalid record");
 
     // OPERAND_BUNDLE_TAG: [strchr x N]
     BundleTags.emplace_back();
     if (convertToString(Record, 0, BundleTags.back()))
       return error("Invalid record");
     Record.clear();
   }
 }
 
 Error BitcodeReader::parseSyncScopeNames() {
   if (Stream.EnterSubBlock(bitc::SYNC_SCOPE_NAMES_BLOCK_ID))
     return error("Invalid record");
 
   if (!SSIDs.empty())
     return error("Invalid multiple synchronization scope names blocks");
 
   SmallVector<uint64_t, 64> Record;
   while (true) {
     BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
     switch (Entry.Kind) {
     case BitstreamEntry::SubBlock: // Handled for us already.
     case BitstreamEntry::Error:
       return error("Malformed block");
     case BitstreamEntry::EndBlock:
       if (SSIDs.empty())
         return error("Invalid empty synchronization scope names block");
       return Error::success();
     case BitstreamEntry::Record:
       // The interesting case.
       break;
     }
 
     // Synchronization scope names are implicitly mapped to synchronization
     // scope IDs by their order.
 
     if (Stream.readRecord(Entry.ID, Record) != bitc::SYNC_SCOPE_NAME)
       return error("Invalid record");
 
     SmallString<16> SSN;
     if (convertToString(Record, 0, SSN))
       return error("Invalid record");
 
     SSIDs.push_back(Context.getOrInsertSyncScopeID(SSN));
     Record.clear();
   }
 }
 
 /// Associate a value with its name from the given index in the provided record.
 Expected<Value *> BitcodeReader::recordValue(SmallVectorImpl<uint64_t> &Record,
                                              unsigned NameIndex, Triple &TT) {
   SmallString<128> ValueName;
   if (convertToString(Record, NameIndex, ValueName))
     return error("Invalid record");
   unsigned ValueID = Record[0];
   if (ValueID >= ValueList.size() || !ValueList[ValueID])
     return error("Invalid record");
   Value *V = ValueList[ValueID];
 
   StringRef NameStr(ValueName.data(), ValueName.size());
   if (NameStr.find_first_of(0) != StringRef::npos)
     return error("Invalid value name");
   V->setName(NameStr);
   auto *GO = dyn_cast<GlobalObject>(V);
   if (GO) {
     if (GO->getComdat() == reinterpret_cast<Comdat *>(1)) {
       if (TT.supportsCOMDAT())
         GO->setComdat(TheModule->getOrInsertComdat(V->getName()));
       else
         GO->setComdat(nullptr);
     }
   }
   return V;
 }
 
 /// Helper to note and return the current location, and jump to the given
 /// offset.
 static uint64_t jumpToValueSymbolTable(uint64_t Offset,
                                        BitstreamCursor &Stream) {
   // Save the current parsing location so we can jump back at the end
   // of the VST read.
   uint64_t CurrentBit = Stream.GetCurrentBitNo();
   Stream.JumpToBit(Offset * 32);
 #ifndef NDEBUG
   // Do some checking if we are in debug mode.
   BitstreamEntry Entry = Stream.advance();
   assert(Entry.Kind == BitstreamEntry::SubBlock);
   assert(Entry.ID == bitc::VALUE_SYMTAB_BLOCK_ID);
 #else
   // In NDEBUG mode ignore the output so we don't get an unused variable
   // warning.
   Stream.advance();
 #endif
   return CurrentBit;
 }
 
 void BitcodeReader::setDeferredFunctionInfo(unsigned FuncBitcodeOffsetDelta,
                                             Function *F,
                                             ArrayRef<uint64_t> Record) {
   // Note that we subtract 1 here because the offset is relative to one word
   // before the start of the identification or module block, which was
   // historically always the start of the regular bitcode header.
   uint64_t FuncWordOffset = Record[1] - 1;
   uint64_t FuncBitOffset = FuncWordOffset * 32;
   DeferredFunctionInfo[F] = FuncBitOffset + FuncBitcodeOffsetDelta;
   // Set the LastFunctionBlockBit to point to the last function block.
   // Later when parsing is resumed after function materialization,
   // we can simply skip that last function block.
   if (FuncBitOffset > LastFunctionBlockBit)
     LastFunctionBlockBit = FuncBitOffset;
 }
 
 /// Read a new-style GlobalValue symbol table.
 Error BitcodeReader::parseGlobalValueSymbolTable() {
   unsigned FuncBitcodeOffsetDelta =
       Stream.getAbbrevIDWidth() + bitc::BlockIDWidth;
 
   if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID))
     return error("Invalid record");
 
   SmallVector<uint64_t, 64> Record;
   while (true) {
     BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
 
     switch (Entry.Kind) {
     case BitstreamEntry::SubBlock:
     case BitstreamEntry::Error:
       return error("Malformed block");
     case BitstreamEntry::EndBlock:
       return Error::success();
     case BitstreamEntry::Record:
       break;
     }
 
     Record.clear();
     switch (Stream.readRecord(Entry.ID, Record)) {
     case bitc::VST_CODE_FNENTRY: // [valueid, offset]
       setDeferredFunctionInfo(FuncBitcodeOffsetDelta,
                               cast<Function>(ValueList[Record[0]]), Record);
       break;
     }
   }
 }
 
 /// Parse the value symbol table at either the current parsing location or
 /// at the given bit offset if provided.
 Error BitcodeReader::parseValueSymbolTable(uint64_t Offset) {
   uint64_t CurrentBit;
   // Pass in the Offset to distinguish between calling for the module-level
   // VST (where we want to jump to the VST offset) and the function-level
   // VST (where we don't).
   if (Offset > 0) {
     CurrentBit = jumpToValueSymbolTable(Offset, Stream);
     // If this module uses a string table, read this as a module-level VST.
     if (UseStrtab) {
       if (Error Err = parseGlobalValueSymbolTable())
         return Err;
       Stream.JumpToBit(CurrentBit);
       return Error::success();
     }
     // Otherwise, the VST will be in a similar format to a function-level VST,
     // and will contain symbol names.
   }
 
   // Compute the delta between the bitcode indices in the VST (the word offset
   // to the word-aligned ENTER_SUBBLOCK for the function block, and that
   // expected by the lazy reader. The reader's EnterSubBlock expects to have
   // already read the ENTER_SUBBLOCK code (size getAbbrevIDWidth) and BlockID
   // (size BlockIDWidth). Note that we access the stream's AbbrevID width here
   // just before entering the VST subblock because: 1) the EnterSubBlock
   // changes the AbbrevID width; 2) the VST block is nested within the same
   // outer MODULE_BLOCK as the FUNCTION_BLOCKs and therefore have the same
   // AbbrevID width before calling EnterSubBlock; and 3) when we want to
   // jump to the FUNCTION_BLOCK using this offset later, we don't want
   // to rely on the stream's AbbrevID width being that of the MODULE_BLOCK.
   unsigned FuncBitcodeOffsetDelta =
       Stream.getAbbrevIDWidth() + bitc::BlockIDWidth;
 
   if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID))
     return error("Invalid record");
 
   SmallVector<uint64_t, 64> Record;
 
   Triple TT(TheModule->getTargetTriple());
 
   // Read all the records for this value table.
   SmallString<128> ValueName;
 
   while (true) {
     BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
 
     switch (Entry.Kind) {
     case BitstreamEntry::SubBlock: // Handled for us already.
     case BitstreamEntry::Error:
       return error("Malformed block");
     case BitstreamEntry::EndBlock:
       if (Offset > 0)
         Stream.JumpToBit(CurrentBit);
       return Error::success();
     case BitstreamEntry::Record:
       // The interesting case.
       break;
     }
 
     // Read a record.
     Record.clear();
     switch (Stream.readRecord(Entry.ID, Record)) {
     default:  // Default behavior: unknown type.
       break;
     case bitc::VST_CODE_ENTRY: {  // VST_CODE_ENTRY: [valueid, namechar x N]
       Expected<Value *> ValOrErr = recordValue(Record, 1, TT);
       if (Error Err = ValOrErr.takeError())
         return Err;
       ValOrErr.get();
       break;
     }
     case bitc::VST_CODE_FNENTRY: {
       // VST_CODE_FNENTRY: [valueid, offset, namechar x N]
       Expected<Value *> ValOrErr = recordValue(Record, 2, TT);
       if (Error Err = ValOrErr.takeError())
         return Err;
       Value *V = ValOrErr.get();
 
       // Ignore function offsets emitted for aliases of functions in older
       // versions of LLVM.
       if (auto *F = dyn_cast<Function>(V))
         setDeferredFunctionInfo(FuncBitcodeOffsetDelta, F, Record);
       break;
     }
     case bitc::VST_CODE_BBENTRY: {
       if (convertToString(Record, 1, ValueName))
         return error("Invalid record");
       BasicBlock *BB = getBasicBlock(Record[0]);
       if (!BB)
         return error("Invalid record");
 
       BB->setName(StringRef(ValueName.data(), ValueName.size()));
       ValueName.clear();
       break;
     }
     }
   }
 }
 
 /// Decode a signed value stored with the sign bit in the LSB for dense VBR
 /// encoding.
 uint64_t BitcodeReader::decodeSignRotatedValue(uint64_t V) {
   if ((V & 1) == 0)
     return V >> 1;
   if (V != 1)
     return -(V >> 1);
   // There is no such thing as -0 with integers.  "-0" really means MININT.
   return 1ULL << 63;
 }
 
 /// Resolve all of the initializers for global values and aliases that we can.
 Error BitcodeReader::resolveGlobalAndIndirectSymbolInits() {
   std::vector<std::pair<GlobalVariable *, unsigned>> GlobalInitWorklist;
   std::vector<std::pair<GlobalIndirectSymbol *, unsigned>>
       IndirectSymbolInitWorklist;
   std::vector<std::pair<Function *, unsigned>> FunctionPrefixWorklist;
   std::vector<std::pair<Function *, unsigned>> FunctionPrologueWorklist;
   std::vector<std::pair<Function *, unsigned>> FunctionPersonalityFnWorklist;
 
   GlobalInitWorklist.swap(GlobalInits);
   IndirectSymbolInitWorklist.swap(IndirectSymbolInits);
   FunctionPrefixWorklist.swap(FunctionPrefixes);
   FunctionPrologueWorklist.swap(FunctionPrologues);
   FunctionPersonalityFnWorklist.swap(FunctionPersonalityFns);
 
   while (!GlobalInitWorklist.empty()) {
     unsigned ValID = GlobalInitWorklist.back().second;
     if (ValID >= ValueList.size()) {
       // Not ready to resolve this yet, it requires something later in the file.
       GlobalInits.push_back(GlobalInitWorklist.back());
     } else {
       if (Constant *C = dyn_cast_or_null<Constant>(ValueList[ValID]))
         GlobalInitWorklist.back().first->setInitializer(C);
       else
         return error("Expected a constant");
     }
     GlobalInitWorklist.pop_back();
   }
 
   while (!IndirectSymbolInitWorklist.empty()) {
     unsigned ValID = IndirectSymbolInitWorklist.back().second;
     if (ValID >= ValueList.size()) {
       IndirectSymbolInits.push_back(IndirectSymbolInitWorklist.back());
     } else {
       Constant *C = dyn_cast_or_null<Constant>(ValueList[ValID]);
       if (!C)
         return error("Expected a constant");
       GlobalIndirectSymbol *GIS = IndirectSymbolInitWorklist.back().first;
       if (isa<GlobalAlias>(GIS) && C->getType() != GIS->getType())
         return error("Alias and aliasee types don't match");
       GIS->setIndirectSymbol(C);
     }
     IndirectSymbolInitWorklist.pop_back();
   }
 
   while (!FunctionPrefixWorklist.empty()) {
     unsigned ValID = FunctionPrefixWorklist.back().second;
     if (ValID >= ValueList.size()) {
       FunctionPrefixes.push_back(FunctionPrefixWorklist.back());
     } else {
       if (Constant *C = dyn_cast_or_null<Constant>(ValueList[ValID]))
         FunctionPrefixWorklist.back().first->setPrefixData(C);
       else
         return error("Expected a constant");
     }
     FunctionPrefixWorklist.pop_back();
   }
 
   while (!FunctionPrologueWorklist.empty()) {
     unsigned ValID = FunctionPrologueWorklist.back().second;
     if (ValID >= ValueList.size()) {
       FunctionPrologues.push_back(FunctionPrologueWorklist.back());
     } else {
       if (Constant *C = dyn_cast_or_null<Constant>(ValueList[ValID]))
         FunctionPrologueWorklist.back().first->setPrologueData(C);
       else
         return error("Expected a constant");
     }
     FunctionPrologueWorklist.pop_back();
   }
 
   while (!FunctionPersonalityFnWorklist.empty()) {
     unsigned ValID = FunctionPersonalityFnWorklist.back().second;
     if (ValID >= ValueList.size()) {
       FunctionPersonalityFns.push_back(FunctionPersonalityFnWorklist.back());
     } else {
       if (Constant *C = dyn_cast_or_null<Constant>(ValueList[ValID]))
         FunctionPersonalityFnWorklist.back().first->setPersonalityFn(C);
       else
         return error("Expected a constant");
     }
     FunctionPersonalityFnWorklist.pop_back();
   }
 
   return Error::success();
 }
 
 static APInt readWideAPInt(ArrayRef<uint64_t> Vals, unsigned TypeBits) {
   SmallVector<uint64_t, 8> Words(Vals.size());
   transform(Vals, Words.begin(),
                  BitcodeReader::decodeSignRotatedValue);
 
   return APInt(TypeBits, Words);
 }
 
 Error BitcodeReader::parseConstants() {
   if (Stream.EnterSubBlock(bitc::CONSTANTS_BLOCK_ID))
     return error("Invalid record");
 
   SmallVector<uint64_t, 64> Record;
 
   // Read all the records for this value table.
   Type *CurTy = Type::getInt32Ty(Context);
   unsigned NextCstNo = ValueList.size();
 
   while (true) {
     BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
 
     switch (Entry.Kind) {
     case BitstreamEntry::SubBlock: // Handled for us already.
     case BitstreamEntry::Error:
       return error("Malformed block");
     case BitstreamEntry::EndBlock:
       if (NextCstNo != ValueList.size())
         return error("Invalid constant reference");
 
       // Once all the constants have been read, go through and resolve forward
       // references.
       ValueList.resolveConstantForwardRefs();
       return Error::success();
     case BitstreamEntry::Record:
       // The interesting case.
       break;
     }
 
     // Read a record.
     Record.clear();
     Type *VoidType = Type::getVoidTy(Context);
     Value *V = nullptr;
     unsigned BitCode = Stream.readRecord(Entry.ID, Record);
     switch (BitCode) {
     default:  // Default behavior: unknown constant
     case bitc::CST_CODE_UNDEF:     // UNDEF
       V = UndefValue::get(CurTy);
       break;
     case bitc::CST_CODE_SETTYPE:   // SETTYPE: [typeid]
       if (Record.empty())
         return error("Invalid record");
       if (Record[0] >= TypeList.size() || !TypeList[Record[0]])
         return error("Invalid record");
       if (TypeList[Record[0]] == VoidType)
         return error("Invalid constant type");
       CurTy = TypeList[Record[0]];
       continue;  // Skip the ValueList manipulation.
     case bitc::CST_CODE_NULL:      // NULL
       V = Constant::getNullValue(CurTy);
       break;
     case bitc::CST_CODE_INTEGER:   // INTEGER: [intval]
       if (!CurTy->isIntegerTy() || Record.empty())
         return error("Invalid record");
       V = ConstantInt::get(CurTy, decodeSignRotatedValue(Record[0]));
       break;
     case bitc::CST_CODE_WIDE_INTEGER: {// WIDE_INTEGER: [n x intval]
       if (!CurTy->isIntegerTy() || Record.empty())
         return error("Invalid record");
 
       APInt VInt =
           readWideAPInt(Record, cast<IntegerType>(CurTy)->getBitWidth());
       V = ConstantInt::get(Context, VInt);
 
       break;
     }
     case bitc::CST_CODE_FLOAT: {    // FLOAT: [fpval]
       if (Record.empty())
         return error("Invalid record");
       if (CurTy->isHalfTy())
         V = ConstantFP::get(Context, APFloat(APFloat::IEEEhalf(),
                                              APInt(16, (uint16_t)Record[0])));
       else if (CurTy->isFloatTy())
         V = ConstantFP::get(Context, APFloat(APFloat::IEEEsingle(),
                                              APInt(32, (uint32_t)Record[0])));
       else if (CurTy->isDoubleTy())
         V = ConstantFP::get(Context, APFloat(APFloat::IEEEdouble(),
                                              APInt(64, Record[0])));
       else if (CurTy->isX86_FP80Ty()) {
         // Bits are not stored the same way as a normal i80 APInt, compensate.
         uint64_t Rearrange[2];
         Rearrange[0] = (Record[1] & 0xffffLL) | (Record[0] << 16);
         Rearrange[1] = Record[0] >> 48;
         V = ConstantFP::get(Context, APFloat(APFloat::x87DoubleExtended(),
                                              APInt(80, Rearrange)));
       } else if (CurTy->isFP128Ty())
         V = ConstantFP::get(Context, APFloat(APFloat::IEEEquad(),
                                              APInt(128, Record)));
       else if (CurTy->isPPC_FP128Ty())
         V = ConstantFP::get(Context, APFloat(APFloat::PPCDoubleDouble(),
                                              APInt(128, Record)));
       else
         V = UndefValue::get(CurTy);
       break;
     }
 
     case bitc::CST_CODE_AGGREGATE: {// AGGREGATE: [n x value number]
       if (Record.empty())
         return error("Invalid record");
 
       unsigned Size = Record.size();
       SmallVector<Constant*, 16> Elts;
 
       if (StructType *STy = dyn_cast<StructType>(CurTy)) {
         for (unsigned i = 0; i != Size; ++i)
           Elts.push_back(ValueList.getConstantFwdRef(Record[i],
                                                      STy->getElementType(i)));
         V = ConstantStruct::get(STy, Elts);
       } else if (ArrayType *ATy = dyn_cast<ArrayType>(CurTy)) {
         Type *EltTy = ATy->getElementType();
         for (unsigned i = 0; i != Size; ++i)
           Elts.push_back(ValueList.getConstantFwdRef(Record[i], EltTy));
         V = ConstantArray::get(ATy, Elts);
       } else if (VectorType *VTy = dyn_cast<VectorType>(CurTy)) {
         Type *EltTy = VTy->getElementType();
         for (unsigned i = 0; i != Size; ++i)
           Elts.push_back(ValueList.getConstantFwdRef(Record[i], EltTy));
         V = ConstantVector::get(Elts);
       } else {
         V = UndefValue::get(CurTy);
       }
       break;
     }
     case bitc::CST_CODE_STRING:    // STRING: [values]
     case bitc::CST_CODE_CSTRING: { // CSTRING: [values]
       if (Record.empty())
         return error("Invalid record");
 
       SmallString<16> Elts(Record.begin(), Record.end());
       V = ConstantDataArray::getString(Context, Elts,
                                        BitCode == bitc::CST_CODE_CSTRING);
       break;
     }
     case bitc::CST_CODE_DATA: {// DATA: [n x value]
       if (Record.empty())
         return error("Invalid record");
 
       Type *EltTy = cast<SequentialType>(CurTy)->getElementType();
       if (EltTy->isIntegerTy(8)) {
         SmallVector<uint8_t, 16> Elts(Record.begin(), Record.end());
         if (isa<VectorType>(CurTy))
           V = ConstantDataVector::get(Context, Elts);
         else
           V = ConstantDataArray::get(Context, Elts);
       } else if (EltTy->isIntegerTy(16)) {
         SmallVector<uint16_t, 16> Elts(Record.begin(), Record.end());
         if (isa<VectorType>(CurTy))
           V = ConstantDataVector::get(Context, Elts);
         else
           V = ConstantDataArray::get(Context, Elts);
       } else if (EltTy->isIntegerTy(32)) {
         SmallVector<uint32_t, 16> Elts(Record.begin(), Record.end());
         if (isa<VectorType>(CurTy))
           V = ConstantDataVector::get(Context, Elts);
         else
           V = ConstantDataArray::get(Context, Elts);
       } else if (EltTy->isIntegerTy(64)) {
         SmallVector<uint64_t, 16> Elts(Record.begin(), Record.end());
         if (isa<VectorType>(CurTy))
           V = ConstantDataVector::get(Context, Elts);
         else
           V = ConstantDataArray::get(Context, Elts);
       } else if (EltTy->isHalfTy()) {
         SmallVector<uint16_t, 16> Elts(Record.begin(), Record.end());
         if (isa<VectorType>(CurTy))
           V = ConstantDataVector::getFP(Context, Elts);
         else
           V = ConstantDataArray::getFP(Context, Elts);
       } else if (EltTy->isFloatTy()) {
         SmallVector<uint32_t, 16> Elts(Record.begin(), Record.end());
         if (isa<VectorType>(CurTy))
           V = ConstantDataVector::getFP(Context, Elts);
         else
           V = ConstantDataArray::getFP(Context, Elts);
       } else if (EltTy->isDoubleTy()) {
         SmallVector<uint64_t, 16> Elts(Record.begin(), Record.end());
         if (isa<VectorType>(CurTy))
           V = ConstantDataVector::getFP(Context, Elts);
         else
           V = ConstantDataArray::getFP(Context, Elts);
       } else {
         return error("Invalid type for value");
       }
       break;
     }
     case bitc::CST_CODE_CE_BINOP: {  // CE_BINOP: [opcode, opval, opval]
       if (Record.size() < 3)
         return error("Invalid record");
       int Opc = getDecodedBinaryOpcode(Record[0], CurTy);
       if (Opc < 0) {
         V = UndefValue::get(CurTy);  // Unknown binop.
       } else {
         Constant *LHS = ValueList.getConstantFwdRef(Record[1], CurTy);
         Constant *RHS = ValueList.getConstantFwdRef(Record[2], CurTy);
         unsigned Flags = 0;
         if (Record.size() >= 4) {
           if (Opc == Instruction::Add ||
               Opc == Instruction::Sub ||
               Opc == Instruction::Mul ||
               Opc == Instruction::Shl) {
             if (Record[3] & (1 << bitc::OBO_NO_SIGNED_WRAP))
               Flags |= OverflowingBinaryOperator::NoSignedWrap;
             if (Record[3] & (1 << bitc::OBO_NO_UNSIGNED_WRAP))
               Flags |= OverflowingBinaryOperator::NoUnsignedWrap;
           } else if (Opc == Instruction::SDiv ||
                      Opc == Instruction::UDiv ||
                      Opc == Instruction::LShr ||
                      Opc == Instruction::AShr) {
             if (Record[3] & (1 << bitc::PEO_EXACT))
               Flags |= SDivOperator::IsExact;
           }
         }
         V = ConstantExpr::get(Opc, LHS, RHS, Flags);
       }
       break;
     }
     case bitc::CST_CODE_CE_CAST: {  // CE_CAST: [opcode, opty, opval]
       if (Record.size() < 3)
         return error("Invalid record");
       int Opc = getDecodedCastOpcode(Record[0]);
       if (Opc < 0) {
         V = UndefValue::get(CurTy);  // Unknown cast.
       } else {
         Type *OpTy = getTypeByID(Record[1]);
         if (!OpTy)
           return error("Invalid record");
         Constant *Op = ValueList.getConstantFwdRef(Record[2], OpTy);
         V = UpgradeBitCastExpr(Opc, Op, CurTy);
         if (!V) V = ConstantExpr::getCast(Opc, Op, CurTy);
       }
       break;
     }
     case bitc::CST_CODE_CE_INBOUNDS_GEP: // [ty, n x operands]
     case bitc::CST_CODE_CE_GEP: // [ty, n x operands]
     case bitc::CST_CODE_CE_GEP_WITH_INRANGE_INDEX: { // [ty, flags, n x
                                                      // operands]
       unsigned OpNum = 0;
       Type *PointeeType = nullptr;
       if (BitCode == bitc::CST_CODE_CE_GEP_WITH_INRANGE_INDEX ||
           Record.size() % 2)
         PointeeType = getTypeByID(Record[OpNum++]);
 
       bool InBounds = false;
       Optional<unsigned> InRangeIndex;
       if (BitCode == bitc::CST_CODE_CE_GEP_WITH_INRANGE_INDEX) {
         uint64_t Op = Record[OpNum++];
         InBounds = Op & 1;
         InRangeIndex = Op >> 1;
       } else if (BitCode == bitc::CST_CODE_CE_INBOUNDS_GEP)
         InBounds = true;
 
       SmallVector<Constant*, 16> Elts;
       while (OpNum != Record.size()) {
         Type *ElTy = getTypeByID(Record[OpNum++]);
         if (!ElTy)
           return error("Invalid record");
         Elts.push_back(ValueList.getConstantFwdRef(Record[OpNum++], ElTy));
       }
 
       if (PointeeType &&
           PointeeType !=
               cast<PointerType>(Elts[0]->getType()->getScalarType())
                   ->getElementType())
         return error("Explicit gep operator type does not match pointee type "
                      "of pointer operand");
 
       if (Elts.size() < 1)
         return error("Invalid gep with no operands");
 
       ArrayRef<Constant *> Indices(Elts.begin() + 1, Elts.end());
       V = ConstantExpr::getGetElementPtr(PointeeType, Elts[0], Indices,
                                          InBounds, InRangeIndex);
       break;
     }
     case bitc::CST_CODE_CE_SELECT: {  // CE_SELECT: [opval#, opval#, opval#]
       if (Record.size() < 3)
         return error("Invalid record");
 
       Type *SelectorTy = Type::getInt1Ty(Context);
 
       // The selector might be an i1 or an <n x i1>
       // Get the type from the ValueList before getting a forward ref.
       if (VectorType *VTy = dyn_cast<VectorType>(CurTy))
         if (Value *V = ValueList[Record[0]])
           if (SelectorTy != V->getType())
             SelectorTy = VectorType::get(SelectorTy, VTy->getNumElements());
 
       V = ConstantExpr::getSelect(ValueList.getConstantFwdRef(Record[0],
                                                               SelectorTy),
                                   ValueList.getConstantFwdRef(Record[1],CurTy),
                                   ValueList.getConstantFwdRef(Record[2],CurTy));
       break;
     }
     case bitc::CST_CODE_CE_EXTRACTELT
         : { // CE_EXTRACTELT: [opty, opval, opty, opval]
       if (Record.size() < 3)
         return error("Invalid record");
       VectorType *OpTy =
         dyn_cast_or_null<VectorType>(getTypeByID(Record[0]));
       if (!OpTy)
         return error("Invalid record");
       Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
       Constant *Op1 = nullptr;
       if (Record.size() == 4) {
         Type *IdxTy = getTypeByID(Record[2]);
         if (!IdxTy)
           return error("Invalid record");
         Op1 = ValueList.getConstantFwdRef(Record[3], IdxTy);
       } else // TODO: Remove with llvm 4.0
         Op1 = ValueList.getConstantFwdRef(Record[2], Type::getInt32Ty(Context));
       if (!Op1)
         return error("Invalid record");
       V = ConstantExpr::getExtractElement(Op0, Op1);
       break;
     }
     case bitc::CST_CODE_CE_INSERTELT
         : { // CE_INSERTELT: [opval, opval, opty, opval]
       VectorType *OpTy = dyn_cast<VectorType>(CurTy);
       if (Record.size() < 3 || !OpTy)
         return error("Invalid record");
       Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy);
       Constant *Op1 = ValueList.getConstantFwdRef(Record[1],
                                                   OpTy->getElementType());
       Constant *Op2 = nullptr;
       if (Record.size() == 4) {
         Type *IdxTy = getTypeByID(Record[2]);
         if (!IdxTy)
           return error("Invalid record");
         Op2 = ValueList.getConstantFwdRef(Record[3], IdxTy);
       } else // TODO: Remove with llvm 4.0
         Op2 = ValueList.getConstantFwdRef(Record[2], Type::getInt32Ty(Context));
       if (!Op2)
         return error("Invalid record");
       V = ConstantExpr::getInsertElement(Op0, Op1, Op2);
       break;
     }
     case bitc::CST_CODE_CE_SHUFFLEVEC: { // CE_SHUFFLEVEC: [opval, opval, opval]
       VectorType *OpTy = dyn_cast<VectorType>(CurTy);
       if (Record.size() < 3 || !OpTy)
         return error("Invalid record");
       Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy);
       Constant *Op1 = ValueList.getConstantFwdRef(Record[1], OpTy);
       Type *ShufTy = VectorType::get(Type::getInt32Ty(Context),
                                                  OpTy->getNumElements());
       Constant *Op2 = ValueList.getConstantFwdRef(Record[2], ShufTy);
       V = ConstantExpr::getShuffleVector(Op0, Op1, Op2);
       break;
     }
     case bitc::CST_CODE_CE_SHUFVEC_EX: { // [opty, opval, opval, opval]
       VectorType *RTy = dyn_cast<VectorType>(CurTy);
       VectorType *OpTy =
         dyn_cast_or_null<VectorType>(getTypeByID(Record[0]));
       if (Record.size() < 4 || !RTy || !OpTy)
         return error("Invalid record");
       Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
       Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy);
       Type *ShufTy = VectorType::get(Type::getInt32Ty(Context),
                                                  RTy->getNumElements());
       Constant *Op2 = ValueList.getConstantFwdRef(Record[3], ShufTy);
       V = ConstantExpr::getShuffleVector(Op0, Op1, Op2);
       break;
     }
     case bitc::CST_CODE_CE_CMP: {     // CE_CMP: [opty, opval, opval, pred]
       if (Record.size() < 4)
         return error("Invalid record");
       Type *OpTy = getTypeByID(Record[0]);
       if (!OpTy)
         return error("Invalid record");
       Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
       Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy);
 
       if (OpTy->isFPOrFPVectorTy())
         V = ConstantExpr::getFCmp(Record[3], Op0, Op1);
       else
         V = ConstantExpr::getICmp(Record[3], Op0, Op1);
       break;
     }
     // This maintains backward compatibility, pre-asm dialect keywords.
     // FIXME: Remove with the 4.0 release.
     case bitc::CST_CODE_INLINEASM_OLD: {
       if (Record.size() < 2)
         return error("Invalid record");
       std::string AsmStr, ConstrStr;
       bool HasSideEffects = Record[0] & 1;
       bool IsAlignStack = Record[0] >> 1;
       unsigned AsmStrSize = Record[1];
       if (2+AsmStrSize >= Record.size())
         return error("Invalid record");
       unsigned ConstStrSize = Record[2+AsmStrSize];
       if (3+AsmStrSize+ConstStrSize > Record.size())
         return error("Invalid record");
 
       for (unsigned i = 0; i != AsmStrSize; ++i)
         AsmStr += (char)Record[2+i];
       for (unsigned i = 0; i != ConstStrSize; ++i)
         ConstrStr += (char)Record[3+AsmStrSize+i];
       PointerType *PTy = cast<PointerType>(CurTy);
       V = InlineAsm::get(cast<FunctionType>(PTy->getElementType()),
                          AsmStr, ConstrStr, HasSideEffects, IsAlignStack);
       break;
     }
     // This version adds support for the asm dialect keywords (e.g.,
     // inteldialect).
     case bitc::CST_CODE_INLINEASM: {
       if (Record.size() < 2)
         return error("Invalid record");
       std::string AsmStr, ConstrStr;
       bool HasSideEffects = Record[0] & 1;
       bool IsAlignStack = (Record[0] >> 1) & 1;
       unsigned AsmDialect = Record[0] >> 2;
       unsigned AsmStrSize = Record[1];
       if (2+AsmStrSize >= Record.size())
         return error("Invalid record");
       unsigned ConstStrSize = Record[2+AsmStrSize];
       if (3+AsmStrSize+ConstStrSize > Record.size())
         return error("Invalid record");
 
       for (unsigned i = 0; i != AsmStrSize; ++i)
         AsmStr += (char)Record[2+i];
       for (unsigned i = 0; i != ConstStrSize; ++i)
         ConstrStr += (char)Record[3+AsmStrSize+i];
       PointerType *PTy = cast<PointerType>(CurTy);
       V = InlineAsm::get(cast<FunctionType>(PTy->getElementType()),
                          AsmStr, ConstrStr, HasSideEffects, IsAlignStack,
                          InlineAsm::AsmDialect(AsmDialect));
       break;
     }
     case bitc::CST_CODE_BLOCKADDRESS:{
       if (Record.size() < 3)
         return error("Invalid record");
       Type *FnTy = getTypeByID(Record[0]);
       if (!FnTy)
         return error("Invalid record");
       Function *Fn =
         dyn_cast_or_null<Function>(ValueList.getConstantFwdRef(Record[1],FnTy));
       if (!Fn)
         return error("Invalid record");
 
       // If the function is already parsed we can insert the block address right
       // away.
       BasicBlock *BB;
       unsigned BBID = Record[2];
       if (!BBID)
         // Invalid reference to entry block.
         return error("Invalid ID");
       if (!Fn->empty()) {
         Function::iterator BBI = Fn->begin(), BBE = Fn->end();
         for (size_t I = 0, E = BBID; I != E; ++I) {
           if (BBI == BBE)
             return error("Invalid ID");
           ++BBI;
         }
         BB = &*BBI;
       } else {
         // Otherwise insert a placeholder and remember it so it can be inserted
         // when the function is parsed.
         auto &FwdBBs = BasicBlockFwdRefs[Fn];
         if (FwdBBs.empty())
           BasicBlockFwdRefQueue.push_back(Fn);
         if (FwdBBs.size() < BBID + 1)
           FwdBBs.resize(BBID + 1);
         if (!FwdBBs[BBID])
           FwdBBs[BBID] = BasicBlock::Create(Context);
         BB = FwdBBs[BBID];
       }
       V = BlockAddress::get(Fn, BB);
       break;
     }
     }
 
     ValueList.assignValue(V, NextCstNo);
     ++NextCstNo;
   }
 }
 
 Error BitcodeReader::parseUseLists() {
   if (Stream.EnterSubBlock(bitc::USELIST_BLOCK_ID))
     return error("Invalid record");
 
   // Read all the records.
   SmallVector<uint64_t, 64> Record;
 
   while (true) {
     BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
 
     switch (Entry.Kind) {
     case BitstreamEntry::SubBlock: // Handled for us already.
     case BitstreamEntry::Error:
       return error("Malformed block");
     case BitstreamEntry::EndBlock:
       return Error::success();
     case BitstreamEntry::Record:
       // The interesting case.
       break;
     }
 
     // Read a use list record.
     Record.clear();
     bool IsBB = false;
     switch (Stream.readRecord(Entry.ID, Record)) {
     default:  // Default behavior: unknown type.
       break;
     case bitc::USELIST_CODE_BB:
       IsBB = true;
       LLVM_FALLTHROUGH;
     case bitc::USELIST_CODE_DEFAULT: {
       unsigned RecordLength = Record.size();
       if (RecordLength < 3)
         // Records should have at least an ID and two indexes.
         return error("Invalid record");
       unsigned ID = Record.back();
       Record.pop_back();
 
       Value *V;
       if (IsBB) {
         assert(ID < FunctionBBs.size() && "Basic block not found");
         V = FunctionBBs[ID];
       } else
         V = ValueList[ID];
       unsigned NumUses = 0;
       SmallDenseMap<const Use *, unsigned, 16> Order;
       for (const Use &U : V->materialized_uses()) {
         if (++NumUses > Record.size())
           break;
         Order[&U] = Record[NumUses - 1];
       }
       if (Order.size() != Record.size() || NumUses > Record.size())
         // Mismatches can happen if the functions are being materialized lazily
         // (out-of-order), or a value has been upgraded.
         break;
 
       V->sortUseList([&](const Use &L, const Use &R) {
         return Order.lookup(&L) < Order.lookup(&R);
       });
       break;
     }
     }
   }
 }
 
 /// When we see the block for metadata, remember where it is and then skip it.
 /// This lets us lazily deserialize the metadata.
 Error BitcodeReader::rememberAndSkipMetadata() {
   // Save the current stream state.
   uint64_t CurBit = Stream.GetCurrentBitNo();
   DeferredMetadataInfo.push_back(CurBit);
 
   // Skip over the block for now.
   if (Stream.SkipBlock())
     return error("Invalid record");
   return Error::success();
 }
 
 Error BitcodeReader::materializeMetadata() {
   for (uint64_t BitPos : DeferredMetadataInfo) {
     // Move the bit stream to the saved position.
     Stream.JumpToBit(BitPos);
     if (Error Err = MDLoader->parseModuleMetadata())
       return Err;
   }
 
   // Upgrade "Linker Options" module flag to "llvm.linker.options" module-level
   // metadata.
   if (Metadata *Val = TheModule->getModuleFlag("Linker Options")) {
     NamedMDNode *LinkerOpts =
         TheModule->getOrInsertNamedMetadata("llvm.linker.options");
     for (const MDOperand &MDOptions : cast<MDNode>(Val)->operands())
       LinkerOpts->addOperand(cast<MDNode>(MDOptions));
   }
 
   DeferredMetadataInfo.clear();
   return Error::success();
 }
 
 void BitcodeReader::setStripDebugInfo() { StripDebugInfo = true; }
 
 /// When we see the block for a function body, remember where it is and then
 /// skip it.  This lets us lazily deserialize the functions.
 Error BitcodeReader::rememberAndSkipFunctionBody() {
   // Get the function we are talking about.
   if (FunctionsWithBodies.empty())
     return error("Insufficient function protos");
 
   Function *Fn = FunctionsWithBodies.back();
   FunctionsWithBodies.pop_back();
 
   // Save the current stream state.
   uint64_t CurBit = Stream.GetCurrentBitNo();
   assert(
       (DeferredFunctionInfo[Fn] == 0 || DeferredFunctionInfo[Fn] == CurBit) &&
       "Mismatch between VST and scanned function offsets");
   DeferredFunctionInfo[Fn] = CurBit;
 
   // Skip over the function block for now.
   if (Stream.SkipBlock())
     return error("Invalid record");
   return Error::success();
 }
 
 Error BitcodeReader::globalCleanup() {
   // Patch the initializers for globals and aliases up.
   if (Error Err = resolveGlobalAndIndirectSymbolInits())
     return Err;
   if (!GlobalInits.empty() || !IndirectSymbolInits.empty())
     return error("Malformed global initializer set");
 
   // Look for intrinsic functions which need to be upgraded at some point
   for (Function &F : *TheModule) {
     MDLoader->upgradeDebugIntrinsics(F);
     Function *NewFn;
     if (UpgradeIntrinsicFunction(&F, NewFn))
       UpgradedIntrinsics[&F] = NewFn;
     else if (auto Remangled = Intrinsic::remangleIntrinsicFunction(&F))
       // Some types could be renamed during loading if several modules are
       // loaded in the same LLVMContext (LTO scenario). In this case we should
       // remangle intrinsics names as well.
       RemangledIntrinsics[&F] = Remangled.getValue();
   }
 
   // Look for global variables which need to be renamed.
   for (GlobalVariable &GV : TheModule->globals())
     UpgradeGlobalVariable(&GV);
 
   // Force deallocation of memory for these vectors to favor the client that
   // want lazy deserialization.
   std::vector<std::pair<GlobalVariable *, unsigned>>().swap(GlobalInits);
   std::vector<std::pair<GlobalIndirectSymbol *, unsigned>>().swap(
       IndirectSymbolInits);
   return Error::success();
 }
 
 /// Support for lazy parsing of function bodies. This is required if we
 /// either have an old bitcode file without a VST forward declaration record,
 /// or if we have an anonymous function being materialized, since anonymous
 /// functions do not have a name and are therefore not in the VST.
 Error BitcodeReader::rememberAndSkipFunctionBodies() {
   Stream.JumpToBit(NextUnreadBit);
 
   if (Stream.AtEndOfStream())
     return error("Could not find function in stream");
 
   if (!SeenFirstFunctionBody)
     return error("Trying to materialize functions before seeing function blocks");
 
   // An old bitcode file with the symbol table at the end would have
   // finished the parse greedily.
   assert(SeenValueSymbolTable);
 
   SmallVector<uint64_t, 64> Record;
 
   while (true) {
     BitstreamEntry Entry = Stream.advance();
     switch (Entry.Kind) {
     default:
       return error("Expect SubBlock");
     case BitstreamEntry::SubBlock:
       switch (Entry.ID) {
       default:
         return error("Expect function block");
       case bitc::FUNCTION_BLOCK_ID:
         if (Error Err = rememberAndSkipFunctionBody())
           return Err;
         NextUnreadBit = Stream.GetCurrentBitNo();
         return Error::success();
       }
     }
   }
 }
 
 bool BitcodeReaderBase::readBlockInfo() {
   Optional<BitstreamBlockInfo> NewBlockInfo = Stream.ReadBlockInfoBlock();
   if (!NewBlockInfo)
     return true;
   BlockInfo = std::move(*NewBlockInfo);
   return false;
 }
 
 Error BitcodeReader::parseComdatRecord(ArrayRef<uint64_t> Record) {
   // v1: [selection_kind, name]
   // v2: [strtab_offset, strtab_size, selection_kind]
   StringRef Name;
   std::tie(Name, Record) = readNameFromStrtab(Record);
 
   if (Record.empty())
     return error("Invalid record");
   Comdat::SelectionKind SK = getDecodedComdatSelectionKind(Record[0]);
   std::string OldFormatName;
   if (!UseStrtab) {
     if (Record.size() < 2)
       return error("Invalid record");
     unsigned ComdatNameSize = Record[1];
     OldFormatName.reserve(ComdatNameSize);
     for (unsigned i = 0; i != ComdatNameSize; ++i)
       OldFormatName += (char)Record[2 + i];
     Name = OldFormatName;
   }
   Comdat *C = TheModule->getOrInsertComdat(Name);
   C->setSelectionKind(SK);
   ComdatList.push_back(C);
   return Error::success();
 }
 
 Error BitcodeReader::parseGlobalVarRecord(ArrayRef<uint64_t> Record) {
   // v1: [pointer type, isconst, initid, linkage, alignment, section,
   // visibility, threadlocal, unnamed_addr, externally_initialized,
   // dllstorageclass, comdat, attributes, preemption specifier] (name in VST)
   // v2: [strtab_offset, strtab_size, v1]
   StringRef Name;
   std::tie(Name, Record) = readNameFromStrtab(Record);
 
   if (Record.size() < 6)
     return error("Invalid record");
   Type *Ty = getTypeByID(Record[0]);
   if (!Ty)
     return error("Invalid record");
   bool isConstant = Record[1] & 1;
   bool explicitType = Record[1] & 2;
   unsigned AddressSpace;
   if (explicitType) {
     AddressSpace = Record[1] >> 2;
   } else {
     if (!Ty->isPointerTy())
       return error("Invalid type for value");
     AddressSpace = cast<PointerType>(Ty)->getAddressSpace();
     Ty = cast<PointerType>(Ty)->getElementType();
   }
 
   uint64_t RawLinkage = Record[3];
   GlobalValue::LinkageTypes Linkage = getDecodedLinkage(RawLinkage);
   unsigned Alignment;
   if (Error Err = parseAlignmentValue(Record[4], Alignment))
     return Err;
   std::string Section;
   if (Record[5]) {
     if (Record[5] - 1 >= SectionTable.size())
       return error("Invalid ID");
     Section = SectionTable[Record[5] - 1];
   }
   GlobalValue::VisibilityTypes Visibility = GlobalValue::DefaultVisibility;
   // Local linkage must have default visibility.
   if (Record.size() > 6 && !GlobalValue::isLocalLinkage(Linkage))
     // FIXME: Change to an error if non-default in 4.0.
     Visibility = getDecodedVisibility(Record[6]);
 
   GlobalVariable::ThreadLocalMode TLM = GlobalVariable::NotThreadLocal;
   if (Record.size() > 7)
     TLM = getDecodedThreadLocalMode(Record[7]);
 
   GlobalValue::UnnamedAddr UnnamedAddr = GlobalValue::UnnamedAddr::None;
   if (Record.size() > 8)
     UnnamedAddr = getDecodedUnnamedAddrType(Record[8]);
 
   bool ExternallyInitialized = false;
   if (Record.size() > 9)
     ExternallyInitialized = Record[9];
 
   GlobalVariable *NewGV =
       new GlobalVariable(*TheModule, Ty, isConstant, Linkage, nullptr, Name,
                          nullptr, TLM, AddressSpace, ExternallyInitialized);
   NewGV->setAlignment(Alignment);
   if (!Section.empty())
     NewGV->setSection(Section);
   NewGV->setVisibility(Visibility);
   NewGV->setUnnamedAddr(UnnamedAddr);
 
   if (Record.size() > 10)
     NewGV->setDLLStorageClass(getDecodedDLLStorageClass(Record[10]));
   else
     upgradeDLLImportExportLinkage(NewGV, RawLinkage);
 
   ValueList.push_back(NewGV);
 
   // Remember which value to use for the global initializer.
   if (unsigned InitID = Record[2])
     GlobalInits.push_back(std::make_pair(NewGV, InitID - 1));
 
   if (Record.size() > 11) {
     if (unsigned ComdatID = Record[11]) {
       if (ComdatID > ComdatList.size())
         return error("Invalid global variable comdat ID");
       NewGV->setComdat(ComdatList[ComdatID - 1]);
     }
   } else if (hasImplicitComdat(RawLinkage)) {
     NewGV->setComdat(reinterpret_cast<Comdat *>(1));
   }
 
   if (Record.size() > 12) {
     auto AS = getAttributes(Record[12]).getFnAttributes();
     NewGV->setAttributes(AS);
   }
 
   if (Record.size() > 13) {
     NewGV->setDSOLocal(getDecodedDSOLocal(Record[13]));
   }
 
   return Error::success();
 }
 
 Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) {
   // v1: [type, callingconv, isproto, linkage, paramattr, alignment, section,
   // visibility, gc, unnamed_addr, prologuedata, dllstorageclass, comdat,
   // prefixdata,  personalityfn, preemption specifier] (name in VST)
   // v2: [strtab_offset, strtab_size, v1]
   StringRef Name;
   std::tie(Name, Record) = readNameFromStrtab(Record);
 
   if (Record.size() < 8)
     return error("Invalid record");
   Type *Ty = getTypeByID(Record[0]);
   if (!Ty)
     return error("Invalid record");
   if (auto *PTy = dyn_cast<PointerType>(Ty))
     Ty = PTy->getElementType();
   auto *FTy = dyn_cast<FunctionType>(Ty);
   if (!FTy)
     return error("Invalid type for value");
   auto CC = static_cast<CallingConv::ID>(Record[1]);
   if (CC & ~CallingConv::MaxID)
     return error("Invalid calling convention ID");
 
   Function *Func =
       Function::Create(FTy, GlobalValue::ExternalLinkage, Name, TheModule);
 
   Func->setCallingConv(CC);
   bool isProto = Record[2];
   uint64_t RawLinkage = Record[3];
   Func->setLinkage(getDecodedLinkage(RawLinkage));
   Func->setAttributes(getAttributes(Record[4]));
 
   unsigned Alignment;
   if (Error Err = parseAlignmentValue(Record[5], Alignment))
     return Err;
   Func->setAlignment(Alignment);
   if (Record[6]) {
     if (Record[6] - 1 >= SectionTable.size())
       return error("Invalid ID");
     Func->setSection(SectionTable[Record[6] - 1]);
   }
   // Local linkage must have default visibility.
   if (!Func->hasLocalLinkage())
     // FIXME: Change to an error if non-default in 4.0.
     Func->setVisibility(getDecodedVisibility(Record[7]));
   if (Record.size() > 8 && Record[8]) {
     if (Record[8] - 1 >= GCTable.size())
       return error("Invalid ID");
     Func->setGC(GCTable[Record[8] - 1]);
   }
   GlobalValue::UnnamedAddr UnnamedAddr = GlobalValue::UnnamedAddr::None;
   if (Record.size() > 9)
     UnnamedAddr = getDecodedUnnamedAddrType(Record[9]);
   Func->setUnnamedAddr(UnnamedAddr);
   if (Record.size() > 10 && Record[10] != 0)
     FunctionPrologues.push_back(std::make_pair(Func, Record[10] - 1));
 
   if (Record.size() > 11)
     Func->setDLLStorageClass(getDecodedDLLStorageClass(Record[11]));
   else
     upgradeDLLImportExportLinkage(Func, RawLinkage);
 
   if (Record.size() > 12) {
     if (unsigned ComdatID = Record[12]) {
       if (ComdatID > ComdatList.size())
         return error("Invalid function comdat ID");
       Func->setComdat(ComdatList[ComdatID - 1]);
     }
   } else if (hasImplicitComdat(RawLinkage)) {
     Func->setComdat(reinterpret_cast<Comdat *>(1));
   }
 
   if (Record.size() > 13 && Record[13] != 0)
     FunctionPrefixes.push_back(std::make_pair(Func, Record[13] - 1));
 
   if (Record.size() > 14 && Record[14] != 0)
     FunctionPersonalityFns.push_back(std::make_pair(Func, Record[14] - 1));
 
   if (Record.size() > 15) {
     Func->setDSOLocal(getDecodedDSOLocal(Record[15]));
   }
 
   ValueList.push_back(Func);
 
   // If this is a function with a body, remember the prototype we are
   // creating now, so that we can match up the body with them later.
   if (!isProto) {
     Func->setIsMaterializable(true);
     FunctionsWithBodies.push_back(Func);
     DeferredFunctionInfo[Func] = 0;
   }
   return Error::success();
 }
 
 Error BitcodeReader::parseGlobalIndirectSymbolRecord(
     unsigned BitCode, ArrayRef<uint64_t> Record) {
   // v1 ALIAS_OLD: [alias type, aliasee val#, linkage] (name in VST)
   // v1 ALIAS: [alias type, addrspace, aliasee val#, linkage, visibility,
   // dllstorageclass, threadlocal, unnamed_addr,
   // preemption specifier] (name in VST)
   // v1 IFUNC: [alias type, addrspace, aliasee val#, linkage,
   // visibility, dllstorageclass, threadlocal, unnamed_addr,
   // preemption specifier] (name in VST)
   // v2: [strtab_offset, strtab_size, v1]
   StringRef Name;
   std::tie(Name, Record) = readNameFromStrtab(Record);
 
   bool NewRecord = BitCode != bitc::MODULE_CODE_ALIAS_OLD;
   if (Record.size() < (3 + (unsigned)NewRecord))
     return error("Invalid record");
   unsigned OpNum = 0;
   Type *Ty = getTypeByID(Record[OpNum++]);
   if (!Ty)
     return error("Invalid record");
 
   unsigned AddrSpace;
   if (!NewRecord) {
     auto *PTy = dyn_cast<PointerType>(Ty);
     if (!PTy)
       return error("Invalid type for value");
     Ty = PTy->getElementType();
     AddrSpace = PTy->getAddressSpace();
   } else {
     AddrSpace = Record[OpNum++];
   }
 
   auto Val = Record[OpNum++];
   auto Linkage = Record[OpNum++];
   GlobalIndirectSymbol *NewGA;
   if (BitCode == bitc::MODULE_CODE_ALIAS ||
       BitCode == bitc::MODULE_CODE_ALIAS_OLD)
     NewGA = GlobalAlias::create(Ty, AddrSpace, getDecodedLinkage(Linkage), Name,
                                 TheModule);
   else
     NewGA = GlobalIFunc::create(Ty, AddrSpace, getDecodedLinkage(Linkage), Name,
                                 nullptr, TheModule);
   // Old bitcode files didn't have visibility field.
   // Local linkage must have default visibility.
   if (OpNum != Record.size()) {
     auto VisInd = OpNum++;
     if (!NewGA->hasLocalLinkage())
       // FIXME: Change to an error if non-default in 4.0.
       NewGA->setVisibility(getDecodedVisibility(Record[VisInd]));
   }
   if (OpNum != Record.size())
     NewGA->setDLLStorageClass(getDecodedDLLStorageClass(Record[OpNum++]));
   else
     upgradeDLLImportExportLinkage(NewGA, Linkage);
   if (OpNum != Record.size())
     NewGA->setThreadLocalMode(getDecodedThreadLocalMode(Record[OpNum++]));
   if (OpNum != Record.size())
     NewGA->setUnnamedAddr(getDecodedUnnamedAddrType(Record[OpNum++]));
   if (OpNum != Record.size())
     NewGA->setDSOLocal(getDecodedDSOLocal(Record[OpNum++]));
   ValueList.push_back(NewGA);
   IndirectSymbolInits.push_back(std::make_pair(NewGA, Val));
   return Error::success();
 }
 
 Error BitcodeReader::parseModule(uint64_t ResumeBit,
                                  bool ShouldLazyLoadMetadata) {
   if (ResumeBit)
     Stream.JumpToBit(ResumeBit);
   else if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
     return error("Invalid record");
 
   SmallVector<uint64_t, 64> Record;
 
   // Read all the records for this module.
   while (true) {
     BitstreamEntry Entry = Stream.advance();
 
     switch (Entry.Kind) {
     case BitstreamEntry::Error:
       return error("Malformed block");
     case BitstreamEntry::EndBlock:
       return globalCleanup();
 
     case BitstreamEntry::SubBlock:
       switch (Entry.ID) {
       default:  // Skip unknown content.
         if (Stream.SkipBlock())
           return error("Invalid record");
         break;
       case bitc::BLOCKINFO_BLOCK_ID:
         if (readBlockInfo())
           return error("Malformed block");
         break;
       case bitc::PARAMATTR_BLOCK_ID:
         if (Error Err = parseAttributeBlock())
           return Err;
         break;
       case bitc::PARAMATTR_GROUP_BLOCK_ID:
         if (Error Err = parseAttributeGroupBlock())
           return Err;
         break;
       case bitc::TYPE_BLOCK_ID_NEW:
         if (Error Err = parseTypeTable())
           return Err;
         break;
       case bitc::VALUE_SYMTAB_BLOCK_ID:
         if (!SeenValueSymbolTable) {
           // Either this is an old form VST without function index and an
           // associated VST forward declaration record (which would have caused
           // the VST to be jumped to and parsed before it was encountered
           // normally in the stream), or there were no function blocks to
           // trigger an earlier parsing of the VST.
           assert(VSTOffset == 0 || FunctionsWithBodies.empty());
           if (Error Err = parseValueSymbolTable())
             return Err;
           SeenValueSymbolTable = true;
         } else {
           // We must have had a VST forward declaration record, which caused
           // the parser to jump to and parse the VST earlier.
           assert(VSTOffset > 0);
           if (Stream.SkipBlock())
             return error("Invalid record");
         }
         break;
       case bitc::CONSTANTS_BLOCK_ID:
         if (Error Err = parseConstants())
           return Err;
         if (Error Err = resolveGlobalAndIndirectSymbolInits())
           return Err;
         break;
       case bitc::METADATA_BLOCK_ID:
         if (ShouldLazyLoadMetadata) {
           if (Error Err = rememberAndSkipMetadata())
             return Err;
           break;
         }
         assert(DeferredMetadataInfo.empty() && "Unexpected deferred metadata");
         if (Error Err = MDLoader->parseModuleMetadata())
           return Err;
         break;
       case bitc::METADATA_KIND_BLOCK_ID:
         if (Error Err = MDLoader->parseMetadataKinds())
           return Err;
         break;
       case bitc::FUNCTION_BLOCK_ID:
         // If this is the first function body we've seen, reverse the
         // FunctionsWithBodies list.
         if (!SeenFirstFunctionBody) {
           std::reverse(FunctionsWithBodies.begin(), FunctionsWithBodies.end());
           if (Error Err = globalCleanup())
             return Err;
           SeenFirstFunctionBody = true;
         }
 
         if (VSTOffset > 0) {
           // If we have a VST forward declaration record, make sure we
           // parse the VST now if we haven't already. It is needed to
           // set up the DeferredFunctionInfo vector for lazy reading.
           if (!SeenValueSymbolTable) {
             if (Error Err = BitcodeReader::parseValueSymbolTable(VSTOffset))
               return Err;
             SeenValueSymbolTable = true;
             // Fall through so that we record the NextUnreadBit below.
             // This is necessary in case we have an anonymous function that
             // is later materialized. Since it will not have a VST entry we
             // need to fall back to the lazy parse to find its offset.
           } else {
             // If we have a VST forward declaration record, but have already
             // parsed the VST (just above, when the first function body was
             // encountered here), then we are resuming the parse after
             // materializing functions. The ResumeBit points to the
             // start of the last function block recorded in the
             // DeferredFunctionInfo map. Skip it.
             if (Stream.SkipBlock())
               return error("Invalid record");
             continue;
           }
         }
 
         // Support older bitcode files that did not have the function
         // index in the VST, nor a VST forward declaration record, as
         // well as anonymous functions that do not have VST entries.
         // Build the DeferredFunctionInfo vector on the fly.
         if (Error Err = rememberAndSkipFunctionBody())
           return Err;
 
         // Suspend parsing when we reach the function bodies. Subsequent
         // materialization calls will resume it when necessary. If the bitcode
         // file is old, the symbol table will be at the end instead and will not
         // have been seen yet. In this case, just finish the parse now.
         if (SeenValueSymbolTable) {
           NextUnreadBit = Stream.GetCurrentBitNo();
           // After the VST has been parsed, we need to make sure intrinsic name
           // are auto-upgraded.
           return globalCleanup();
         }
         break;
       case bitc::USELIST_BLOCK_ID:
         if (Error Err = parseUseLists())
           return Err;
         break;
       case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID:
         if (Error Err = parseOperandBundleTags())
           return Err;
         break;
       case bitc::SYNC_SCOPE_NAMES_BLOCK_ID:
         if (Error Err = parseSyncScopeNames())
           return Err;
         break;
       }
       continue;
 
     case BitstreamEntry::Record:
       // The interesting case.
       break;
     }
 
     // Read a record.
     auto BitCode = Stream.readRecord(Entry.ID, Record);
     switch (BitCode) {
     default: break;  // Default behavior, ignore unknown content.
     case bitc::MODULE_CODE_VERSION: {
       Expected<unsigned> VersionOrErr = parseVersionRecord(Record);
       if (!VersionOrErr)
         return VersionOrErr.takeError();
       UseRelativeIDs = *VersionOrErr >= 1;
       break;
     }
     case bitc::MODULE_CODE_TRIPLE: {  // TRIPLE: [strchr x N]
       std::string S;
       if (convertToString(Record, 0, S))
         return error("Invalid record");
       TheModule->setTargetTriple(S);
       break;
     }
     case bitc::MODULE_CODE_DATALAYOUT: {  // DATALAYOUT: [strchr x N]
       std::string S;
       if (convertToString(Record, 0, S))
         return error("Invalid record");
       TheModule->setDataLayout(S);
       break;
     }
     case bitc::MODULE_CODE_ASM: {  // ASM: [strchr x N]
       std::string S;
       if (convertToString(Record, 0, S))
         return error("Invalid record");
       TheModule->setModuleInlineAsm(S);
       break;
     }
     case bitc::MODULE_CODE_DEPLIB: {  // DEPLIB: [strchr x N]
       // FIXME: Remove in 4.0.
       std::string S;
       if (convertToString(Record, 0, S))
         return error("Invalid record");
       // Ignore value.
       break;
     }
     case bitc::MODULE_CODE_SECTIONNAME: {  // SECTIONNAME: [strchr x N]
       std::string S;
       if (convertToString(Record, 0, S))
         return error("Invalid record");
       SectionTable.push_back(S);
       break;
     }
     case bitc::MODULE_CODE_GCNAME: {  // SECTIONNAME: [strchr x N]
       std::string S;
       if (convertToString(Record, 0, S))
         return error("Invalid record");
       GCTable.push_back(S);
       break;
     }
     case bitc::MODULE_CODE_COMDAT:
       if (Error Err = parseComdatRecord(Record))
         return Err;
       break;
     case bitc::MODULE_CODE_GLOBALVAR:
       if (Error Err = parseGlobalVarRecord(Record))
         return Err;
       break;
     case bitc::MODULE_CODE_FUNCTION:
       if (Error Err = parseFunctionRecord(Record))
         return Err;
       break;
     case bitc::MODULE_CODE_IFUNC:
     case bitc::MODULE_CODE_ALIAS:
     case bitc::MODULE_CODE_ALIAS_OLD:
       if (Error Err = parseGlobalIndirectSymbolRecord(BitCode, Record))
         return Err;
       break;
     /// MODULE_CODE_VSTOFFSET: [offset]
     case bitc::MODULE_CODE_VSTOFFSET:
       if (Record.size() < 1)
         return error("Invalid record");
       // Note that we subtract 1 here because the offset is relative to one word
       // before the start of the identification or module block, which was
       // historically always the start of the regular bitcode header.
       VSTOffset = Record[0] - 1;
       break;
     /// MODULE_CODE_SOURCE_FILENAME: [namechar x N]
     case bitc::MODULE_CODE_SOURCE_FILENAME:
       SmallString<128> ValueName;
       if (convertToString(Record, 0, ValueName))
         return error("Invalid record");
       TheModule->setSourceFileName(ValueName);
       break;
     }
     Record.clear();
   }
 }
 
 Error BitcodeReader::parseBitcodeInto(Module *M, bool ShouldLazyLoadMetadata,
                                       bool IsImporting) {
   TheModule = M;
   MDLoader = MetadataLoader(Stream, *M, ValueList, IsImporting,
                             [&](unsigned ID) { return getTypeByID(ID); });
   return parseModule(0, ShouldLazyLoadMetadata);
 }
 
 Error BitcodeReader::typeCheckLoadStoreInst(Type *ValType, Type *PtrType) {
   if (!isa<PointerType>(PtrType))
     return error("Load/Store operand is not a pointer type");
   Type *ElemType = cast<PointerType>(PtrType)->getElementType();
 
   if (ValType && ValType != ElemType)
     return error("Explicit load/store type does not match pointee "
                  "type of pointer operand");
   if (!PointerType::isLoadableOrStorableType(ElemType))
     return error("Cannot load/store from pointer");
   return Error::success();
 }
 
 /// Lazily parse the specified function body block.
 Error BitcodeReader::parseFunctionBody(Function *F) {
   if (Stream.EnterSubBlock(bitc::FUNCTION_BLOCK_ID))
     return error("Invalid record");
 
   // Unexpected unresolved metadata when parsing function.
   if (MDLoader->hasFwdRefs())
     return error("Invalid function metadata: incoming forward references");
 
   InstructionList.clear();
   unsigned ModuleValueListSize = ValueList.size();
   unsigned ModuleMDLoaderSize = MDLoader->size();
 
   // Add all the function arguments to the value table.
   for (Argument &I : F->args())
     ValueList.push_back(&I);
 
   unsigned NextValueNo = ValueList.size();
   BasicBlock *CurBB = nullptr;
   unsigned CurBBNo = 0;
 
   DebugLoc LastLoc;
   auto getLastInstruction = [&]() -> Instruction * {
     if (CurBB && !CurBB->empty())
       return &CurBB->back();
     else if (CurBBNo && FunctionBBs[CurBBNo - 1] &&
              !FunctionBBs[CurBBNo - 1]->empty())
       return &FunctionBBs[CurBBNo - 1]->back();
     return nullptr;
   };
 
   std::vector<OperandBundleDef> OperandBundles;
 
   // Read all the records.
   SmallVector<uint64_t, 64> Record;
 
   while (true) {
     BitstreamEntry Entry = Stream.advance();
 
     switch (Entry.Kind) {
     case BitstreamEntry::Error:
       return error("Malformed block");
     case BitstreamEntry::EndBlock:
       goto OutOfRecordLoop;
 
     case BitstreamEntry::SubBlock:
       switch (Entry.ID) {
       default:  // Skip unknown content.
         if (Stream.SkipBlock())
           return error("Invalid record");
         break;
       case bitc::CONSTANTS_BLOCK_ID:
         if (Error Err = parseConstants())
           return Err;
         NextValueNo = ValueList.size();
         break;
       case bitc::VALUE_SYMTAB_BLOCK_ID:
         if (Error Err = parseValueSymbolTable())
           return Err;
         break;
       case bitc::METADATA_ATTACHMENT_ID:
         if (Error Err = MDLoader->parseMetadataAttachment(*F, InstructionList))
           return Err;
         break;
       case bitc::METADATA_BLOCK_ID:
         assert(DeferredMetadataInfo.empty() &&
                "Must read all module-level metadata before function-level");
         if (Error Err = MDLoader->parseFunctionMetadata())
           return Err;
         break;
       case bitc::USELIST_BLOCK_ID:
         if (Error Err = parseUseLists())
           return Err;
         break;
       }
       continue;
 
     case BitstreamEntry::Record:
       // The interesting case.
       break;
     }
 
     // Read a record.
     Record.clear();
     Instruction *I = nullptr;
     unsigned BitCode = Stream.readRecord(Entry.ID, Record);
     switch (BitCode) {
     default: // Default behavior: reject
       return error("Invalid value");
     case bitc::FUNC_CODE_DECLAREBLOCKS: {   // DECLAREBLOCKS: [nblocks]
       if (Record.size() < 1 || Record[0] == 0)
         return error("Invalid record");
       // Create all the basic blocks for the function.
       FunctionBBs.resize(Record[0]);
 
       // See if anything took the address of blocks in this function.
       auto BBFRI = BasicBlockFwdRefs.find(F);
       if (BBFRI == BasicBlockFwdRefs.end()) {
         for (unsigned i = 0, e = FunctionBBs.size(); i != e; ++i)
           FunctionBBs[i] = BasicBlock::Create(Context, "", F);
       } else {
         auto &BBRefs = BBFRI->second;
         // Check for invalid basic block references.
         if (BBRefs.size() > FunctionBBs.size())
           return error("Invalid ID");
         assert(!BBRefs.empty() && "Unexpected empty array");
         assert(!BBRefs.front() && "Invalid reference to entry block");
         for (unsigned I = 0, E = FunctionBBs.size(), RE = BBRefs.size(); I != E;
              ++I)
           if (I < RE && BBRefs[I]) {
             BBRefs[I]->insertInto(F);
             FunctionBBs[I] = BBRefs[I];
           } else {
             FunctionBBs[I] = BasicBlock::Create(Context, "", F);
           }
 
         // Erase from the table.
         BasicBlockFwdRefs.erase(BBFRI);
       }
 
       CurBB = FunctionBBs[0];
       continue;
     }
 
     case bitc::FUNC_CODE_DEBUG_LOC_AGAIN:  // DEBUG_LOC_AGAIN
       // This record indicates that the last instruction is at the same
       // location as the previous instruction with a location.
       I = getLastInstruction();
 
       if (!I)
         return error("Invalid record");
       I->setDebugLoc(LastLoc);
       I = nullptr;
       continue;
 
     case bitc::FUNC_CODE_DEBUG_LOC: {      // DEBUG_LOC: [line, col, scope, ia]
       I = getLastInstruction();
       if (!I || Record.size() < 4)
         return error("Invalid record");
 
       unsigned Line = Record[0], Col = Record[1];
       unsigned ScopeID = Record[2], IAID = Record[3];
 
       MDNode *Scope = nullptr, *IA = nullptr;
       if (ScopeID) {
         Scope = MDLoader->getMDNodeFwdRefOrNull(ScopeID - 1);
         if (!Scope)
           return error("Invalid record");
       }
       if (IAID) {
         IA = MDLoader->getMDNodeFwdRefOrNull(IAID - 1);
         if (!IA)
           return error("Invalid record");
       }
       LastLoc = DebugLoc::get(Line, Col, Scope, IA);
       I->setDebugLoc(LastLoc);
       I = nullptr;
       continue;
     }
 
     case bitc::FUNC_CODE_INST_BINOP: {    // BINOP: [opval, ty, opval, opcode]
       unsigned OpNum = 0;
       Value *LHS, *RHS;
       if (getValueTypePair(Record, OpNum, NextValueNo, LHS) ||
           popValue(Record, OpNum, NextValueNo, LHS->getType(), RHS) ||
           OpNum+1 > Record.size())
         return error("Invalid record");
 
       int Opc = getDecodedBinaryOpcode(Record[OpNum++], LHS->getType());
       if (Opc == -1)
         return error("Invalid record");
       I = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
       InstructionList.push_back(I);
       if (OpNum < Record.size()) {
         if (Opc == Instruction::Add ||
             Opc == Instruction::Sub ||
             Opc == Instruction::Mul ||
             Opc == Instruction::Shl) {
           if (Record[OpNum] & (1 << bitc::OBO_NO_SIGNED_WRAP))
             cast<BinaryOperator>(I)->setHasNoSignedWrap(true);
           if (Record[OpNum] & (1 << bitc::OBO_NO_UNSIGNED_WRAP))
             cast<BinaryOperator>(I)->setHasNoUnsignedWrap(true);
         } else if (Opc == Instruction::SDiv ||
                    Opc == Instruction::UDiv ||
                    Opc == Instruction::LShr ||
                    Opc == Instruction::AShr) {
           if (Record[OpNum] & (1 << bitc::PEO_EXACT))
             cast<BinaryOperator>(I)->setIsExact(true);
         } else if (isa<FPMathOperator>(I)) {
           FastMathFlags FMF = getDecodedFastMathFlags(Record[OpNum]);
           if (FMF.any())
             I->setFastMathFlags(FMF);
         }
 
       }
       break;
     }
     case bitc::FUNC_CODE_INST_CAST: {    // CAST: [opval, opty, destty, castopc]
       unsigned OpNum = 0;
       Value *Op;
       if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
           OpNum+2 != Record.size())
         return error("Invalid record");
 
       Type *ResTy = getTypeByID(Record[OpNum]);
       int Opc = getDecodedCastOpcode(Record[OpNum + 1]);
       if (Opc == -1 || !ResTy)
         return error("Invalid record");
       Instruction *Temp = nullptr;
       if ((I = UpgradeBitCastInst(Opc, Op, ResTy, Temp))) {
         if (Temp) {
           InstructionList.push_back(Temp);
           CurBB->getInstList().push_back(Temp);
         }
       } else {
         auto CastOp = (Instruction::CastOps)Opc;
         if (!CastInst::castIsValid(CastOp, Op, ResTy))
           return error("Invalid cast");
         I = CastInst::Create(CastOp, Op, ResTy);
       }
       InstructionList.push_back(I);
       break;
     }
     case bitc::FUNC_CODE_INST_INBOUNDS_GEP_OLD:
     case bitc::FUNC_CODE_INST_GEP_OLD:
     case bitc::FUNC_CODE_INST_GEP: { // GEP: type, [n x operands]
       unsigned OpNum = 0;
 
       Type *Ty;
       bool InBounds;
 
       if (BitCode == bitc::FUNC_CODE_INST_GEP) {
         InBounds = Record[OpNum++];
         Ty = getTypeByID(Record[OpNum++]);
       } else {
         InBounds = BitCode == bitc::FUNC_CODE_INST_INBOUNDS_GEP_OLD;
         Ty = nullptr;
       }
 
       Value *BasePtr;
       if (getValueTypePair(Record, OpNum, NextValueNo, BasePtr))
         return error("Invalid record");
 
       if (!Ty)
         Ty = cast<PointerType>(BasePtr->getType()->getScalarType())
                  ->getElementType();
       else if (Ty !=
                cast<PointerType>(BasePtr->getType()->getScalarType())
                    ->getElementType())
         return error(
             "Explicit gep type does not match pointee type of pointer operand");
 
       SmallVector<Value*, 16> GEPIdx;
       while (OpNum != Record.size()) {
         Value *Op;
         if (getValueTypePair(Record, OpNum, NextValueNo, Op))
           return error("Invalid record");
         GEPIdx.push_back(Op);
       }
 
       I = GetElementPtrInst::Create(Ty, BasePtr, GEPIdx);
 
       InstructionList.push_back(I);
       if (InBounds)
         cast<GetElementPtrInst>(I)->setIsInBounds(true);
       break;
     }
 
     case bitc::FUNC_CODE_INST_EXTRACTVAL: {
                                        // EXTRACTVAL: [opty, opval, n x indices]
       unsigned OpNum = 0;
       Value *Agg;
       if (getValueTypePair(Record, OpNum, NextValueNo, Agg))
         return error("Invalid record");
 
       unsigned RecSize = Record.size();
       if (OpNum == RecSize)
         return error("EXTRACTVAL: Invalid instruction with 0 indices");
 
       SmallVector<unsigned, 4> EXTRACTVALIdx;
       Type *CurTy = Agg->getType();
       for (; OpNum != RecSize; ++OpNum) {
         bool IsArray = CurTy->isArrayTy();
         bool IsStruct = CurTy->isStructTy();
         uint64_t Index = Record[OpNum];
 
         if (!IsStruct && !IsArray)
           return error("EXTRACTVAL: Invalid type");
         if ((unsigned)Index != Index)
           return error("Invalid value");
         if (IsStruct && Index >= CurTy->subtypes().size())
           return error("EXTRACTVAL: Invalid struct index");
         if (IsArray && Index >= CurTy->getArrayNumElements())
           return error("EXTRACTVAL: Invalid array index");
         EXTRACTVALIdx.push_back((unsigned)Index);
 
         if (IsStruct)
           CurTy = CurTy->subtypes()[Index];
         else
           CurTy = CurTy->subtypes()[0];
       }
 
       I = ExtractValueInst::Create(Agg, EXTRACTVALIdx);
       InstructionList.push_back(I);
       break;
     }
 
     case bitc::FUNC_CODE_INST_INSERTVAL: {
                            // INSERTVAL: [opty, opval, opty, opval, n x indices]
       unsigned OpNum = 0;
       Value *Agg;
       if (getValueTypePair(Record, OpNum, NextValueNo, Agg))
         return error("Invalid record");
       Value *Val;
       if (getValueTypePair(Record, OpNum, NextValueNo, Val))
         return error("Invalid record");
 
       unsigned RecSize = Record.size();
       if (OpNum == RecSize)
         return error("INSERTVAL: Invalid instruction with 0 indices");
 
       SmallVector<unsigned, 4> INSERTVALIdx;
       Type *CurTy = Agg->getType();
       for (; OpNum != RecSize; ++OpNum) {
         bool IsArray = CurTy->isArrayTy();
         bool IsStruct = CurTy->isStructTy();
         uint64_t Index = Record[OpNum];
 
         if (!IsStruct && !IsArray)
           return error("INSERTVAL: Invalid type");
         if ((unsigned)Index != Index)
           return error("Invalid value");
         if (IsStruct && Index >= CurTy->subtypes().size())
           return error("INSERTVAL: Invalid struct index");
         if (IsArray && Index >= CurTy->getArrayNumElements())
           return error("INSERTVAL: Invalid array index");
 
         INSERTVALIdx.push_back((unsigned)Index);
         if (IsStruct)
           CurTy = CurTy->subtypes()[Index];
         else
           CurTy = CurTy->subtypes()[0];
       }
 
       if (CurTy != Val->getType())
         return error("Inserted value type doesn't match aggregate type");
 
       I = InsertValueInst::Create(Agg, Val, INSERTVALIdx);
       InstructionList.push_back(I);
       break;
     }
 
     case bitc::FUNC_CODE_INST_SELECT: { // SELECT: [opval, ty, opval, opval]
       // obsolete form of select
       // handles select i1 ... in old bitcode
       unsigned OpNum = 0;
       Value *TrueVal, *FalseVal, *Cond;
       if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) ||
           popValue(Record, OpNum, NextValueNo, TrueVal->getType(), FalseVal) ||
           popValue(Record, OpNum, NextValueNo, Type::getInt1Ty(Context), Cond))
         return error("Invalid record");
 
       I = SelectInst::Create(Cond, TrueVal, FalseVal);
       InstructionList.push_back(I);
       break;
     }
 
     case bitc::FUNC_CODE_INST_VSELECT: {// VSELECT: [ty,opval,opval,predty,pred]
       // new form of select
       // handles select i1 or select [N x i1]
       unsigned OpNum = 0;
       Value *TrueVal, *FalseVal, *Cond;
       if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) ||
           popValue(Record, OpNum, NextValueNo, TrueVal->getType(), FalseVal) ||
           getValueTypePair(Record, OpNum, NextValueNo, Cond))
         return error("Invalid record");
 
       // select condition can be either i1 or [N x i1]
       if (VectorType* vector_type =
           dyn_cast<VectorType>(Cond->getType())) {
         // expect <n x i1>
         if (vector_type->getElementType() != Type::getInt1Ty(Context))
           return error("Invalid type for value");
       } else {
         // expect i1
         if (Cond->getType() != Type::getInt1Ty(Context))
           return error("Invalid type for value");
       }
 
       I = SelectInst::Create(Cond, TrueVal, FalseVal);
       InstructionList.push_back(I);
       break;
     }
 
     case bitc::FUNC_CODE_INST_EXTRACTELT: { // EXTRACTELT: [opty, opval, opval]
       unsigned OpNum = 0;
       Value *Vec, *Idx;
       if (getValueTypePair(Record, OpNum, NextValueNo, Vec) ||
           getValueTypePair(Record, OpNum, NextValueNo, Idx))
         return error("Invalid record");
       if (!Vec->getType()->isVectorTy())
         return error("Invalid type for value");
       I = ExtractElementInst::Create(Vec, Idx);
       InstructionList.push_back(I);
       break;
     }
 
     case bitc::FUNC_CODE_INST_INSERTELT: { // INSERTELT: [ty, opval,opval,opval]
       unsigned OpNum = 0;
       Value *Vec, *Elt, *Idx;
       if (getValueTypePair(Record, OpNum, NextValueNo, Vec))
         return error("Invalid record");
       if (!Vec->getType()->isVectorTy())
         return error("Invalid type for value");
       if (popValue(Record, OpNum, NextValueNo,
                    cast<VectorType>(Vec->getType())->getElementType(), Elt) ||
           getValueTypePair(Record, OpNum, NextValueNo, Idx))
         return error("Invalid record");
       I = InsertElementInst::Create(Vec, Elt, Idx);
       InstructionList.push_back(I);
       break;
     }
 
     case bitc::FUNC_CODE_INST_SHUFFLEVEC: {// SHUFFLEVEC: [opval,ty,opval,opval]
       unsigned OpNum = 0;
       Value *Vec1, *Vec2, *Mask;
       if (getValueTypePair(Record, OpNum, NextValueNo, Vec1) ||
           popValue(Record, OpNum, NextValueNo, Vec1->getType(), Vec2))
         return error("Invalid record");
 
       if (getValueTypePair(Record, OpNum, NextValueNo, Mask))
         return error("Invalid record");
       if (!Vec1->getType()->isVectorTy() || !Vec2->getType()->isVectorTy())
         return error("Invalid type for value");
       I = new ShuffleVectorInst(Vec1, Vec2, Mask);
       InstructionList.push_back(I);
       break;
     }
 
     case bitc::FUNC_CODE_INST_CMP:   // CMP: [opty, opval, opval, pred]
       // Old form of ICmp/FCmp returning bool
       // Existed to differentiate between icmp/fcmp and vicmp/vfcmp which were
       // both legal on vectors but had different behaviour.
     case bitc::FUNC_CODE_INST_CMP2: { // CMP2: [opty, opval, opval, pred]
       // FCmp/ICmp returning bool or vector of bool
 
       unsigned OpNum = 0;
       Value *LHS, *RHS;
       if (getValueTypePair(Record, OpNum, NextValueNo, LHS) ||
           popValue(Record, OpNum, NextValueNo, LHS->getType(), RHS))
         return error("Invalid record");
 
       unsigned PredVal = Record[OpNum];
       bool IsFP = LHS->getType()->isFPOrFPVectorTy();
       FastMathFlags FMF;
       if (IsFP && Record.size() > OpNum+1)
         FMF = getDecodedFastMathFlags(Record[++OpNum]);
 
       if (OpNum+1 != Record.size())
         return error("Invalid record");
 
       if (LHS->getType()->isFPOrFPVectorTy())
         I = new FCmpInst((FCmpInst::Predicate)PredVal, LHS, RHS);
       else
         I = new ICmpInst((ICmpInst::Predicate)PredVal, LHS, RHS);
 
       if (FMF.any())
         I->setFastMathFlags(FMF);
       InstructionList.push_back(I);
       break;
     }
 
     case bitc::FUNC_CODE_INST_RET: // RET: [opty,opval<optional>]
       {
         unsigned Size = Record.size();
         if (Size == 0) {
           I = ReturnInst::Create(Context);
           InstructionList.push_back(I);
           break;
         }
 
         unsigned OpNum = 0;
         Value *Op = nullptr;
         if (getValueTypePair(Record, OpNum, NextValueNo, Op))
           return error("Invalid record");
         if (OpNum != Record.size())
           return error("Invalid record");
 
         I = ReturnInst::Create(Context, Op);
         InstructionList.push_back(I);
         break;
       }
     case bitc::FUNC_CODE_INST_BR: { // BR: [bb#, bb#, opval] or [bb#]
       if (Record.size() != 1 && Record.size() != 3)
         return error("Invalid record");
       BasicBlock *TrueDest = getBasicBlock(Record[0]);
       if (!TrueDest)
         return error("Invalid record");
 
       if (Record.size() == 1) {
         I = BranchInst::Create(TrueDest);
         InstructionList.push_back(I);
       }
       else {
         BasicBlock *FalseDest = getBasicBlock(Record[1]);
         Value *Cond = getValue(Record, 2, NextValueNo,
                                Type::getInt1Ty(Context));
         if (!FalseDest || !Cond)
           return error("Invalid record");
         I = BranchInst::Create(TrueDest, FalseDest, Cond);
         InstructionList.push_back(I);
       }
       break;
     }
     case bitc::FUNC_CODE_INST_CLEANUPRET: { // CLEANUPRET: [val] or [val,bb#]
       if (Record.size() != 1 && Record.size() != 2)
         return error("Invalid record");
       unsigned Idx = 0;
       Value *CleanupPad =
           getValue(Record, Idx++, NextValueNo, Type::getTokenTy(Context));
       if (!CleanupPad)
         return error("Invalid record");
       BasicBlock *UnwindDest = nullptr;
       if (Record.size() == 2) {
         UnwindDest = getBasicBlock(Record[Idx++]);
         if (!UnwindDest)
           return error("Invalid record");
       }
 
       I = CleanupReturnInst::Create(CleanupPad, UnwindDest);
       InstructionList.push_back(I);
       break;
     }
     case bitc::FUNC_CODE_INST_CATCHRET: { // CATCHRET: [val,bb#]
       if (Record.size() != 2)
         return error("Invalid record");
       unsigned Idx = 0;
       Value *CatchPad =
           getValue(Record, Idx++, NextValueNo, Type::getTokenTy(Context));
       if (!CatchPad)
         return error("Invalid record");
       BasicBlock *BB = getBasicBlock(Record[Idx++]);
       if (!BB)
         return error("Invalid record");
 
       I = CatchReturnInst::Create(CatchPad, BB);
       InstructionList.push_back(I);
       break;
     }
     case bitc::FUNC_CODE_INST_CATCHSWITCH: { // CATCHSWITCH: [tok,num,(bb)*,bb?]
       // We must have, at minimum, the outer scope and the number of arguments.
       if (Record.size() < 2)
         return error("Invalid record");
 
       unsigned Idx = 0;
 
       Value *ParentPad =
           getValue(Record, Idx++, NextValueNo, Type::getTokenTy(Context));
 
       unsigned NumHandlers = Record[Idx++];
 
       SmallVector<BasicBlock *, 2> Handlers;
       for (unsigned Op = 0; Op != NumHandlers; ++Op) {
         BasicBlock *BB = getBasicBlock(Record[Idx++]);
         if (!BB)
           return error("Invalid record");
         Handlers.push_back(BB);
       }
 
       BasicBlock *UnwindDest = nullptr;
       if (Idx + 1 == Record.size()) {
         UnwindDest = getBasicBlock(Record[Idx++]);
         if (!UnwindDest)
           return error("Invalid record");
       }
 
       if (Record.size() != Idx)
         return error("Invalid record");
 
       auto *CatchSwitch =
           CatchSwitchInst::Create(ParentPad, UnwindDest, NumHandlers);
       for (BasicBlock *Handler : Handlers)
         CatchSwitch->addHandler(Handler);
       I = CatchSwitch;
       InstructionList.push_back(I);
       break;
     }
     case bitc::FUNC_CODE_INST_CATCHPAD:
     case bitc::FUNC_CODE_INST_CLEANUPPAD: { // [tok,num,(ty,val)*]
       // We must have, at minimum, the outer scope and the number of arguments.
       if (Record.size() < 2)
         return error("Invalid record");
 
       unsigned Idx = 0;
 
       Value *ParentPad =
           getValue(Record, Idx++, NextValueNo, Type::getTokenTy(Context));
 
       unsigned NumArgOperands = Record[Idx++];
 
       SmallVector<Value *, 2> Args;
       for (unsigned Op = 0; Op != NumArgOperands; ++Op) {
         Value *Val;
         if (getValueTypePair(Record, Idx, NextValueNo, Val))
           return error("Invalid record");
         Args.push_back(Val);
       }
 
       if (Record.size() != Idx)
         return error("Invalid record");
 
       if (BitCode == bitc::FUNC_CODE_INST_CLEANUPPAD)
         I = CleanupPadInst::Create(ParentPad, Args);
       else
         I = CatchPadInst::Create(ParentPad, Args);
       InstructionList.push_back(I);
       break;
     }
     case bitc::FUNC_CODE_INST_SWITCH: { // SWITCH: [opty, op0, op1, ...]
       // Check magic
       if ((Record[0] >> 16) == SWITCH_INST_MAGIC) {
         // "New" SwitchInst format with case ranges. The changes to write this
         // format were reverted but we still recognize bitcode that uses it.
         // Hopefully someday we will have support for case ranges and can use
         // this format again.
 
         Type *OpTy = getTypeByID(Record[1]);
         unsigned ValueBitWidth = cast<IntegerType>(OpTy)->getBitWidth();
 
         Value *Cond = getValue(Record, 2, NextValueNo, OpTy);
         BasicBlock *Default = getBasicBlock(Record[3]);
         if (!OpTy || !Cond || !Default)
           return error("Invalid record");
 
         unsigned NumCases = Record[4];
 
         SwitchInst *SI = SwitchInst::Create(Cond, Default, NumCases);
         InstructionList.push_back(SI);
 
         unsigned CurIdx = 5;
         for (unsigned i = 0; i != NumCases; ++i) {
           SmallVector<ConstantInt*, 1> CaseVals;
           unsigned NumItems = Record[CurIdx++];
           for (unsigned ci = 0; ci != NumItems; ++ci) {
             bool isSingleNumber = Record[CurIdx++];
 
             APInt Low;
             unsigned ActiveWords = 1;
             if (ValueBitWidth > 64)
               ActiveWords = Record[CurIdx++];
             Low = readWideAPInt(makeArrayRef(&Record[CurIdx], ActiveWords),
                                 ValueBitWidth);
             CurIdx += ActiveWords;
 
             if (!isSingleNumber) {
               ActiveWords = 1;
               if (ValueBitWidth > 64)
                 ActiveWords = Record[CurIdx++];
               APInt High = readWideAPInt(
                   makeArrayRef(&Record[CurIdx], ActiveWords), ValueBitWidth);
               CurIdx += ActiveWords;
 
               // FIXME: It is not clear whether values in the range should be
               // compared as signed or unsigned values. The partially
               // implemented changes that used this format in the past used
               // unsigned comparisons.
               for ( ; Low.ule(High); ++Low)
                 CaseVals.push_back(ConstantInt::get(Context, Low));
             } else
               CaseVals.push_back(ConstantInt::get(Context, Low));
           }
           BasicBlock *DestBB = getBasicBlock(Record[CurIdx++]);
           for (SmallVector<ConstantInt*, 1>::iterator cvi = CaseVals.begin(),
                  cve = CaseVals.end(); cvi != cve; ++cvi)
             SI->addCase(*cvi, DestBB);
         }
         I = SI;
         break;
       }
 
       // Old SwitchInst format without case ranges.
 
       if (Record.size() < 3 || (Record.size() & 1) == 0)
         return error("Invalid record");
       Type *OpTy = getTypeByID(Record[0]);
       Value *Cond = getValue(Record, 1, NextValueNo, OpTy);
       BasicBlock *Default = getBasicBlock(Record[2]);
       if (!OpTy || !Cond || !Default)
         return error("Invalid record");
       unsigned NumCases = (Record.size()-3)/2;
       SwitchInst *SI = SwitchInst::Create(Cond, Default, NumCases);
       InstructionList.push_back(SI);
       for (unsigned i = 0, e = NumCases; i != e; ++i) {
         ConstantInt *CaseVal =
           dyn_cast_or_null<ConstantInt>(getFnValueByID(Record[3+i*2], OpTy));
         BasicBlock *DestBB = getBasicBlock(Record[1+3+i*2]);
         if (!CaseVal || !DestBB) {
           delete SI;
           return error("Invalid record");
         }
         SI->addCase(CaseVal, DestBB);
       }
       I = SI;
       break;
     }
     case bitc::FUNC_CODE_INST_INDIRECTBR: { // INDIRECTBR: [opty, op0, op1, ...]
       if (Record.size() < 2)
         return error("Invalid record");
       Type *OpTy = getTypeByID(Record[0]);
       Value *Address = getValue(Record, 1, NextValueNo, OpTy);
       if (!OpTy || !Address)
         return error("Invalid record");
       unsigned NumDests = Record.size()-2;
       IndirectBrInst *IBI = IndirectBrInst::Create(Address, NumDests);
       InstructionList.push_back(IBI);
       for (unsigned i = 0, e = NumDests; i != e; ++i) {
         if (BasicBlock *DestBB = getBasicBlock(Record[2+i])) {
           IBI->addDestination(DestBB);
         } else {
           delete IBI;
           return error("Invalid record");
         }
       }
       I = IBI;
       break;
     }
 
     case bitc::FUNC_CODE_INST_INVOKE: {
       // INVOKE: [attrs, cc, normBB, unwindBB, fnty, op0,op1,op2, ...]
       if (Record.size() < 4)
         return error("Invalid record");
       unsigned OpNum = 0;
       AttributeList PAL = getAttributes(Record[OpNum++]);
       unsigned CCInfo = Record[OpNum++];
       BasicBlock *NormalBB = getBasicBlock(Record[OpNum++]);
       BasicBlock *UnwindBB = getBasicBlock(Record[OpNum++]);
 
       FunctionType *FTy = nullptr;
       if (CCInfo >> 13 & 1 &&
           !(FTy = dyn_cast<FunctionType>(getTypeByID(Record[OpNum++]))))
         return error("Explicit invoke type is not a function type");
 
       Value *Callee;
       if (getValueTypePair(Record, OpNum, NextValueNo, Callee))
         return error("Invalid record");
 
       PointerType *CalleeTy = dyn_cast<PointerType>(Callee->getType());
       if (!CalleeTy)
         return error("Callee is not a pointer");
       if (!FTy) {
         FTy = dyn_cast<FunctionType>(CalleeTy->getElementType());
         if (!FTy)
           return error("Callee is not of pointer to function type");
       } else if (CalleeTy->getElementType() != FTy)
         return error("Explicit invoke type does not match pointee type of "
                      "callee operand");
       if (Record.size() < FTy->getNumParams() + OpNum)
         return error("Insufficient operands to call");
 
       SmallVector<Value*, 16> Ops;
       for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) {
         Ops.push_back(getValue(Record, OpNum, NextValueNo,
                                FTy->getParamType(i)));
         if (!Ops.back())
           return error("Invalid record");
       }
 
       if (!FTy->isVarArg()) {
         if (Record.size() != OpNum)
           return error("Invalid record");
       } else {
         // Read type/value pairs for varargs params.
         while (OpNum != Record.size()) {
           Value *Op;
           if (getValueTypePair(Record, OpNum, NextValueNo, Op))
             return error("Invalid record");
           Ops.push_back(Op);
         }
       }
 
       I = InvokeInst::Create(Callee, NormalBB, UnwindBB, Ops, OperandBundles);
       OperandBundles.clear();
       InstructionList.push_back(I);
       cast<InvokeInst>(I)->setCallingConv(
           static_cast<CallingConv::ID>(CallingConv::MaxID & CCInfo));
       cast<InvokeInst>(I)->setAttributes(PAL);
       break;
     }
     case bitc::FUNC_CODE_INST_RESUME: { // RESUME: [opval]
       unsigned Idx = 0;
       Value *Val = nullptr;
       if (getValueTypePair(Record, Idx, NextValueNo, Val))
         return error("Invalid record");
       I = ResumeInst::Create(Val);
       InstructionList.push_back(I);
       break;
     }
     case bitc::FUNC_CODE_INST_UNREACHABLE: // UNREACHABLE
       I = new UnreachableInst(Context);
       InstructionList.push_back(I);
       break;
     case bitc::FUNC_CODE_INST_PHI: { // PHI: [ty, val0,bb0, ...]
       if (Record.size() < 1 || ((Record.size()-1)&1))
         return error("Invalid record");
       Type *Ty = getTypeByID(Record[0]);
       if (!Ty)
         return error("Invalid record");
 
       PHINode *PN = PHINode::Create(Ty, (Record.size()-1)/2);
       InstructionList.push_back(PN);
 
       for (unsigned i = 0, e = Record.size()-1; i != e; i += 2) {
         Value *V;
         // With the new function encoding, it is possible that operands have
         // negative IDs (for forward references).  Use a signed VBR
         // representation to keep the encoding small.
         if (UseRelativeIDs)
           V = getValueSigned(Record, 1+i, NextValueNo, Ty);
         else
           V = getValue(Record, 1+i, NextValueNo, Ty);
         BasicBlock *BB = getBasicBlock(Record[2+i]);
         if (!V || !BB)
           return error("Invalid record");
         PN->addIncoming(V, BB);
       }
       I = PN;
       break;
     }
 
     case bitc::FUNC_CODE_INST_LANDINGPAD:
     case bitc::FUNC_CODE_INST_LANDINGPAD_OLD: {
       // LANDINGPAD: [ty, val, val, num, (id0,val0 ...)?]
       unsigned Idx = 0;
       if (BitCode == bitc::FUNC_CODE_INST_LANDINGPAD) {
         if (Record.size() < 3)
           return error("Invalid record");
       } else {
         assert(BitCode == bitc::FUNC_CODE_INST_LANDINGPAD_OLD);
         if (Record.size() < 4)
           return error("Invalid record");
       }
       Type *Ty = getTypeByID(Record[Idx++]);
       if (!Ty)
         return error("Invalid record");
       if (BitCode == bitc::FUNC_CODE_INST_LANDINGPAD_OLD) {
         Value *PersFn = nullptr;
         if (getValueTypePair(Record, Idx, NextValueNo, PersFn))
           return error("Invalid record");
 
         if (!F->hasPersonalityFn())
           F->setPersonalityFn(cast<Constant>(PersFn));
         else if (F->getPersonalityFn() != cast<Constant>(PersFn))
           return error("Personality function mismatch");
       }
 
       bool IsCleanup = !!Record[Idx++];
       unsigned NumClauses = Record[Idx++];
       LandingPadInst *LP = LandingPadInst::Create(Ty, NumClauses);
       LP->setCleanup(IsCleanup);
       for (unsigned J = 0; J != NumClauses; ++J) {
         LandingPadInst::ClauseType CT =
           LandingPadInst::ClauseType(Record[Idx++]); (void)CT;
         Value *Val;
 
         if (getValueTypePair(Record, Idx, NextValueNo, Val)) {
           delete LP;
           return error("Invalid record");
         }
 
         assert((CT != LandingPadInst::Catch ||
                 !isa<ArrayType>(Val->getType())) &&
                "Catch clause has a invalid type!");
         assert((CT != LandingPadInst::Filter ||
                 isa<ArrayType>(Val->getType())) &&
                "Filter clause has invalid type!");
         LP->addClause(cast<Constant>(Val));
       }
 
       I = LP;
       InstructionList.push_back(I);
       break;
     }
 
     case bitc::FUNC_CODE_INST_ALLOCA: { // ALLOCA: [instty, opty, op, align]
       if (Record.size() != 4)
         return error("Invalid record");
       uint64_t AlignRecord = Record[3];
       const uint64_t InAllocaMask = uint64_t(1) << 5;
       const uint64_t ExplicitTypeMask = uint64_t(1) << 6;
       const uint64_t SwiftErrorMask = uint64_t(1) << 7;
       const uint64_t FlagMask = InAllocaMask | ExplicitTypeMask |
                                 SwiftErrorMask;
       bool InAlloca = AlignRecord & InAllocaMask;
       bool SwiftError = AlignRecord & SwiftErrorMask;
       Type *Ty = getTypeByID(Record[0]);
       if ((AlignRecord & ExplicitTypeMask) == 0) {
         auto *PTy = dyn_cast_or_null<PointerType>(Ty);
         if (!PTy)
           return error("Old-style alloca with a non-pointer type");
         Ty = PTy->getElementType();
       }
       Type *OpTy = getTypeByID(Record[1]);
       Value *Size = getFnValueByID(Record[2], OpTy);
       unsigned Align;
       if (Error Err = parseAlignmentValue(AlignRecord & ~FlagMask, Align)) {
         return Err;
       }
       if (!Ty || !Size)
         return error("Invalid record");
 
       // FIXME: Make this an optional field.
       const DataLayout &DL = TheModule->getDataLayout();
       unsigned AS = DL.getAllocaAddrSpace();
 
       AllocaInst *AI = new AllocaInst(Ty, AS, Size, Align);
       AI->setUsedWithInAlloca(InAlloca);
       AI->setSwiftError(SwiftError);
       I = AI;
       InstructionList.push_back(I);
       break;
     }
     case bitc::FUNC_CODE_INST_LOAD: { // LOAD: [opty, op, align, vol]
       unsigned OpNum = 0;
       Value *Op;
       if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
           (OpNum + 2 != Record.size() && OpNum + 3 != Record.size()))
         return error("Invalid record");
 
       Type *Ty = nullptr;
       if (OpNum + 3 == Record.size())
         Ty = getTypeByID(Record[OpNum++]);
       if (Error Err = typeCheckLoadStoreInst(Ty, Op->getType()))
         return Err;
       if (!Ty)
         Ty = cast<PointerType>(Op->getType())->getElementType();
 
       unsigned Align;
       if (Error Err = parseAlignmentValue(Record[OpNum], Align))
         return Err;
       I = new LoadInst(Ty, Op, "", Record[OpNum + 1], Align);
 
       InstructionList.push_back(I);
       break;
     }
     case bitc::FUNC_CODE_INST_LOADATOMIC: {
        // LOADATOMIC: [opty, op, align, vol, ordering, ssid]
       unsigned OpNum = 0;
       Value *Op;
       if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
           (OpNum + 4 != Record.size() && OpNum + 5 != Record.size()))
         return error("Invalid record");
 
       Type *Ty = nullptr;
       if (OpNum + 5 == Record.size())
         Ty = getTypeByID(Record[OpNum++]);
       if (Error Err = typeCheckLoadStoreInst(Ty, Op->getType()))
         return Err;
       if (!Ty)
         Ty = cast<PointerType>(Op->getType())->getElementType();
 
       AtomicOrdering Ordering = getDecodedOrdering(Record[OpNum + 2]);
       if (Ordering == AtomicOrdering::NotAtomic ||
           Ordering == AtomicOrdering::Release ||
           Ordering == AtomicOrdering::AcquireRelease)
         return error("Invalid record");
       if (Ordering != AtomicOrdering::NotAtomic && Record[OpNum] == 0)
         return error("Invalid record");
       SyncScope::ID SSID = getDecodedSyncScopeID(Record[OpNum + 3]);
 
       unsigned Align;
       if (Error Err = parseAlignmentValue(Record[OpNum], Align))
         return Err;
       I = new LoadInst(Op, "", Record[OpNum+1], Align, Ordering, SSID);
 
       InstructionList.push_back(I);
       break;
     }
     case bitc::FUNC_CODE_INST_STORE:
     case bitc::FUNC_CODE_INST_STORE_OLD: { // STORE2:[ptrty, ptr, val, align, vol]
       unsigned OpNum = 0;
       Value *Val, *Ptr;
       if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
           (BitCode == bitc::FUNC_CODE_INST_STORE
                ? getValueTypePair(Record, OpNum, NextValueNo, Val)
                : popValue(Record, OpNum, NextValueNo,
                           cast<PointerType>(Ptr->getType())->getElementType(),
                           Val)) ||
           OpNum + 2 != Record.size())
         return error("Invalid record");
 
       if (Error Err = typeCheckLoadStoreInst(Val->getType(), Ptr->getType()))
         return Err;
       unsigned Align;
       if (Error Err = parseAlignmentValue(Record[OpNum], Align))
         return Err;
       I = new StoreInst(Val, Ptr, Record[OpNum+1], Align);
       InstructionList.push_back(I);
       break;
     }
     case bitc::FUNC_CODE_INST_STOREATOMIC:
     case bitc::FUNC_CODE_INST_STOREATOMIC_OLD: {
       // STOREATOMIC: [ptrty, ptr, val, align, vol, ordering, ssid]
       unsigned OpNum = 0;
       Value *Val, *Ptr;
       if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
           !isa<PointerType>(Ptr->getType()) ||
           (BitCode == bitc::FUNC_CODE_INST_STOREATOMIC
                ? getValueTypePair(Record, OpNum, NextValueNo, Val)
                : popValue(Record, OpNum, NextValueNo,
                           cast<PointerType>(Ptr->getType())->getElementType(),
                           Val)) ||
           OpNum + 4 != Record.size())
         return error("Invalid record");
 
       if (Error Err = typeCheckLoadStoreInst(Val->getType(), Ptr->getType()))
         return Err;
       AtomicOrdering Ordering = getDecodedOrdering(Record[OpNum + 2]);
       if (Ordering == AtomicOrdering::NotAtomic ||
           Ordering == AtomicOrdering::Acquire ||
           Ordering == AtomicOrdering::AcquireRelease)
         return error("Invalid record");
       SyncScope::ID SSID = getDecodedSyncScopeID(Record[OpNum + 3]);
       if (Ordering != AtomicOrdering::NotAtomic && Record[OpNum] == 0)
         return error("Invalid record");
 
       unsigned Align;
       if (Error Err = parseAlignmentValue(Record[OpNum], Align))
         return Err;
       I = new StoreInst(Val, Ptr, Record[OpNum+1], Align, Ordering, SSID);
       InstructionList.push_back(I);
       break;
     }
     case bitc::FUNC_CODE_INST_CMPXCHG_OLD:
     case bitc::FUNC_CODE_INST_CMPXCHG: {
       // CMPXCHG:[ptrty, ptr, cmp, new, vol, successordering, ssid,
       //          failureordering?, isweak?]
       unsigned OpNum = 0;
       Value *Ptr, *Cmp, *New;
       if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
           (BitCode == bitc::FUNC_CODE_INST_CMPXCHG
                ? getValueTypePair(Record, OpNum, NextValueNo, Cmp)
                : popValue(Record, OpNum, NextValueNo,
                           cast<PointerType>(Ptr->getType())->getElementType(),
                           Cmp)) ||
           popValue(Record, OpNum, NextValueNo, Cmp->getType(), New) ||
           Record.size() < OpNum + 3 || Record.size() > OpNum + 5)
         return error("Invalid record");
       AtomicOrdering SuccessOrdering = getDecodedOrdering(Record[OpNum + 1]);
       if (SuccessOrdering == AtomicOrdering::NotAtomic ||
           SuccessOrdering == AtomicOrdering::Unordered)
         return error("Invalid record");
       SyncScope::ID SSID = getDecodedSyncScopeID(Record[OpNum + 2]);
 
       if (Error Err = typeCheckLoadStoreInst(Cmp->getType(), Ptr->getType()))
         return Err;
       AtomicOrdering FailureOrdering;
       if (Record.size() < 7)
         FailureOrdering =
             AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrdering);
       else
         FailureOrdering = getDecodedOrdering(Record[OpNum + 3]);
 
       I = new AtomicCmpXchgInst(Ptr, Cmp, New, SuccessOrdering, FailureOrdering,
                                 SSID);
       cast<AtomicCmpXchgInst>(I)->setVolatile(Record[OpNum]);
 
       if (Record.size() < 8) {
         // Before weak cmpxchgs existed, the instruction simply returned the
         // value loaded from memory, so bitcode files from that era will be
         // expecting the first component of a modern cmpxchg.
         CurBB->getInstList().push_back(I);
         I = ExtractValueInst::Create(I, 0);
       } else {
         cast<AtomicCmpXchgInst>(I)->setWeak(Record[OpNum+4]);
       }
 
       InstructionList.push_back(I);
       break;
     }
     case bitc::FUNC_CODE_INST_ATOMICRMW: {
       // ATOMICRMW:[ptrty, ptr, val, op, vol, ordering, ssid]
       unsigned OpNum = 0;
       Value *Ptr, *Val;
       if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
           !isa<PointerType>(Ptr->getType()) ||
           popValue(Record, OpNum, NextValueNo,
                     cast<PointerType>(Ptr->getType())->getElementType(), Val) ||
           OpNum+4 != Record.size())
         return error("Invalid record");
       AtomicRMWInst::BinOp Operation = getDecodedRMWOperation(Record[OpNum]);
       if (Operation < AtomicRMWInst::FIRST_BINOP ||
           Operation > AtomicRMWInst::LAST_BINOP)
         return error("Invalid record");
       AtomicOrdering Ordering = getDecodedOrdering(Record[OpNum + 2]);
       if (Ordering == AtomicOrdering::NotAtomic ||
           Ordering == AtomicOrdering::Unordered)
         return error("Invalid record");
       SyncScope::ID SSID = getDecodedSyncScopeID(Record[OpNum + 3]);
       I = new AtomicRMWInst(Operation, Ptr, Val, Ordering, SSID);
       cast<AtomicRMWInst>(I)->setVolatile(Record[OpNum+1]);
       InstructionList.push_back(I);
       break;
     }
     case bitc::FUNC_CODE_INST_FENCE: { // FENCE:[ordering, ssid]
       if (2 != Record.size())
         return error("Invalid record");
       AtomicOrdering Ordering = getDecodedOrdering(Record[0]);
       if (Ordering == AtomicOrdering::NotAtomic ||
           Ordering == AtomicOrdering::Unordered ||
           Ordering == AtomicOrdering::Monotonic)
         return error("Invalid record");
       SyncScope::ID SSID = getDecodedSyncScopeID(Record[1]);
       I = new FenceInst(Context, Ordering, SSID);
       InstructionList.push_back(I);
       break;
     }
     case bitc::FUNC_CODE_INST_CALL: {
       // CALL: [paramattrs, cc, fmf, fnty, fnid, arg0, arg1...]
       if (Record.size() < 3)
         return error("Invalid record");
 
       unsigned OpNum = 0;
       AttributeList PAL = getAttributes(Record[OpNum++]);
       unsigned CCInfo = Record[OpNum++];
 
       FastMathFlags FMF;
       if ((CCInfo >> bitc::CALL_FMF) & 1) {
         FMF = getDecodedFastMathFlags(Record[OpNum++]);
         if (!FMF.any())
           return error("Fast math flags indicator set for call with no FMF");
       }
 
       FunctionType *FTy = nullptr;
       if (CCInfo >> bitc::CALL_EXPLICIT_TYPE & 1 &&
           !(FTy = dyn_cast<FunctionType>(getTypeByID(Record[OpNum++]))))
         return error("Explicit call type is not a function type");
 
       Value *Callee;
       if (getValueTypePair(Record, OpNum, NextValueNo, Callee))
         return error("Invalid record");
 
       PointerType *OpTy = dyn_cast<PointerType>(Callee->getType());
       if (!OpTy)
         return error("Callee is not a pointer type");
       if (!FTy) {
         FTy = dyn_cast<FunctionType>(OpTy->getElementType());
         if (!FTy)
           return error("Callee is not of pointer to function type");
       } else if (OpTy->getElementType() != FTy)
         return error("Explicit call type does not match pointee type of "
                      "callee operand");
       if (Record.size() < FTy->getNumParams() + OpNum)
         return error("Insufficient operands to call");
 
       SmallVector<Value*, 16> Args;
       // Read the fixed params.
       for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) {
         if (FTy->getParamType(i)->isLabelTy())
           Args.push_back(getBasicBlock(Record[OpNum]));
         else
           Args.push_back(getValue(Record, OpNum, NextValueNo,
                                   FTy->getParamType(i)));
         if (!Args.back())
           return error("Invalid record");
       }
 
       // Read type/value pairs for varargs params.
       if (!FTy->isVarArg()) {
         if (OpNum != Record.size())
           return error("Invalid record");
       } else {
         while (OpNum != Record.size()) {
           Value *Op;
           if (getValueTypePair(Record, OpNum, NextValueNo, Op))
             return error("Invalid record");
           Args.push_back(Op);
         }
       }
 
       I = CallInst::Create(FTy, Callee, Args, OperandBundles);
       OperandBundles.clear();
       InstructionList.push_back(I);
       cast<CallInst>(I)->setCallingConv(
           static_cast<CallingConv::ID>((0x7ff & CCInfo) >> bitc::CALL_CCONV));
       CallInst::TailCallKind TCK = CallInst::TCK_None;
       if (CCInfo & 1 << bitc::CALL_TAIL)
         TCK = CallInst::TCK_Tail;
       if (CCInfo & (1 << bitc::CALL_MUSTTAIL))
         TCK = CallInst::TCK_MustTail;
       if (CCInfo & (1 << bitc::CALL_NOTAIL))
         TCK = CallInst::TCK_NoTail;
       cast<CallInst>(I)->setTailCallKind(TCK);
       cast<CallInst>(I)->setAttributes(PAL);
       if (FMF.any()) {
         if (!isa<FPMathOperator>(I))
           return error("Fast-math-flags specified for call without "
                        "floating-point scalar or vector return type");
         I->setFastMathFlags(FMF);
       }
       break;
     }
     case bitc::FUNC_CODE_INST_VAARG: { // VAARG: [valistty, valist, instty]
       if (Record.size() < 3)
         return error("Invalid record");
       Type *OpTy = getTypeByID(Record[0]);
       Value *Op = getValue(Record, 1, NextValueNo, OpTy);
       Type *ResTy = getTypeByID(Record[2]);
       if (!OpTy || !Op || !ResTy)
         return error("Invalid record");
       I = new VAArgInst(Op, ResTy);
       InstructionList.push_back(I);
       break;
     }
 
     case bitc::FUNC_CODE_OPERAND_BUNDLE: {
       // A call or an invoke can be optionally prefixed with some variable
       // number of operand bundle blocks.  These blocks are read into
       // OperandBundles and consumed at the next call or invoke instruction.
 
       if (Record.size() < 1 || Record[0] >= BundleTags.size())
         return error("Invalid record");
 
       std::vector<Value *> Inputs;
 
       unsigned OpNum = 1;
       while (OpNum != Record.size()) {
         Value *Op;
         if (getValueTypePair(Record, OpNum, NextValueNo, Op))
           return error("Invalid record");
         Inputs.push_back(Op);
       }
 
       OperandBundles.emplace_back(BundleTags[Record[0]], std::move(Inputs));
       continue;
     }
     }
 
     // Add instruction to end of current BB.  If there is no current BB, reject
     // this file.
     if (!CurBB) {
       I->deleteValue();
       return error("Invalid instruction with no BB");
     }
     if (!OperandBundles.empty()) {
       I->deleteValue();
       return error("Operand bundles found with no consumer");
     }
     CurBB->getInstList().push_back(I);
 
     // If this was a terminator instruction, move to the next block.
     if (isa<TerminatorInst>(I)) {
       ++CurBBNo;
       CurBB = CurBBNo < FunctionBBs.size() ? FunctionBBs[CurBBNo] : nullptr;
     }
 
     // Non-void values get registered in the value table for future use.
     if (I && !I->getType()->isVoidTy())
       ValueList.assignValue(I, NextValueNo++);
   }
 
 OutOfRecordLoop:
 
   if (!OperandBundles.empty())
     return error("Operand bundles found with no consumer");
 
   // Check the function list for unresolved values.
   if (Argument *A = dyn_cast<Argument>(ValueList.back())) {
     if (!A->getParent()) {
       // We found at least one unresolved value.  Nuke them all to avoid leaks.
       for (unsigned i = ModuleValueListSize, e = ValueList.size(); i != e; ++i){
         if ((A = dyn_cast_or_null<Argument>(ValueList[i])) && !A->getParent()) {
           A->replaceAllUsesWith(UndefValue::get(A->getType()));
           delete A;
         }
       }
       return error("Never resolved value found in function");
     }
   }
 
   // Unexpected unresolved metadata about to be dropped.
   if (MDLoader->hasFwdRefs())
     return error("Invalid function metadata: outgoing forward refs");
 
   // Trim the value list down to the size it was before we parsed this function.
   ValueList.shrinkTo(ModuleValueListSize);
   MDLoader->shrinkTo(ModuleMDLoaderSize);
   std::vector<BasicBlock*>().swap(FunctionBBs);
   return Error::success();
 }
 
 /// Find the function body in the bitcode stream
 Error BitcodeReader::findFunctionInStream(
     Function *F,
     DenseMap<Function *, uint64_t>::iterator DeferredFunctionInfoIterator) {
   while (DeferredFunctionInfoIterator->second == 0) {
     // This is the fallback handling for the old format bitcode that
     // didn't contain the function index in the VST, or when we have
     // an anonymous function which would not have a VST entry.
     // Assert that we have one of those two cases.
     assert(VSTOffset == 0 || !F->hasName());
     // Parse the next body in the stream and set its position in the
     // DeferredFunctionInfo map.
     if (Error Err = rememberAndSkipFunctionBodies())
       return Err;
   }
   return Error::success();
 }
 
 SyncScope::ID BitcodeReader::getDecodedSyncScopeID(unsigned Val) {
   if (Val == SyncScope::SingleThread || Val == SyncScope::System)
     return SyncScope::ID(Val);
   if (Val >= SSIDs.size())
     return SyncScope::System; // Map unknown synchronization scopes to system.
   return SSIDs[Val];
 }
 
 //===----------------------------------------------------------------------===//
 // GVMaterializer implementation
 //===----------------------------------------------------------------------===//
 
 Error BitcodeReader::materialize(GlobalValue *GV) {
   Function *F = dyn_cast<Function>(GV);
   // If it's not a function or is already material, ignore the request.
   if (!F || !F->isMaterializable())
     return Error::success();
 
   DenseMap<Function*, uint64_t>::iterator DFII = DeferredFunctionInfo.find(F);
   assert(DFII != DeferredFunctionInfo.end() && "Deferred function not found!");
   // If its position is recorded as 0, its body is somewhere in the stream
   // but we haven't seen it yet.
   if (DFII->second == 0)
     if (Error Err = findFunctionInStream(F, DFII))
       return Err;
 
   // Materialize metadata before parsing any function bodies.
   if (Error Err = materializeMetadata())
     return Err;
 
   // Move the bit stream to the saved position of the deferred function body.
   Stream.JumpToBit(DFII->second);
 
   if (Error Err = parseFunctionBody(F))
     return Err;
   F->setIsMaterializable(false);
 
   if (StripDebugInfo)
     stripDebugInfo(*F);
 
   // Upgrade any old intrinsic calls in the function.
   for (auto &I : UpgradedIntrinsics) {
     for (auto UI = I.first->materialized_user_begin(), UE = I.first->user_end();
          UI != UE;) {
       User *U = *UI;
       ++UI;
       if (CallInst *CI = dyn_cast<CallInst>(U))
         UpgradeIntrinsicCall(CI, I.second);
     }
   }
 
   // Update calls to the remangled intrinsics
   for (auto &I : RemangledIntrinsics)
     for (auto UI = I.first->materialized_user_begin(), UE = I.first->user_end();
          UI != UE;)
       // Don't expect any other users than call sites
       CallSite(*UI++).setCalledFunction(I.second);
 
   // Finish fn->subprogram upgrade for materialized functions.
   if (DISubprogram *SP = MDLoader->lookupSubprogramForFunction(F))
     F->setSubprogram(SP);
 
   // Check if the TBAA Metadata are valid, otherwise we will need to strip them.
   if (!MDLoader->isStrippingTBAA()) {
     for (auto &I : instructions(F)) {
       MDNode *TBAA = I.getMetadata(LLVMContext::MD_tbaa);
       if (!TBAA || TBAAVerifyHelper.visitTBAAMetadata(I, TBAA))
         continue;
       MDLoader->setStripTBAA(true);
       stripTBAA(F->getParent());
     }
   }
 
   // Bring in any functions that this function forward-referenced via
   // blockaddresses.
   return materializeForwardReferencedFunctions();
 }
 
 Error BitcodeReader::materializeModule() {
   if (Error Err = materializeMetadata())
     return Err;
 
   // Promise to materialize all forward references.
   WillMaterializeAllForwardRefs = true;
 
   // Iterate over the module, deserializing any functions that are still on
   // disk.
   for (Function &F : *TheModule) {
     if (Error Err = materialize(&F))
       return Err;
   }
   // At this point, if there are any function bodies, parse the rest of
   // the bits in the module past the last function block we have recorded
   // through either lazy scanning or the VST.
   if (LastFunctionBlockBit || NextUnreadBit)
     if (Error Err = parseModule(LastFunctionBlockBit > NextUnreadBit
                                     ? LastFunctionBlockBit
                                     : NextUnreadBit))
       return Err;
 
   // Check that all block address forward references got resolved (as we
   // promised above).
   if (!BasicBlockFwdRefs.empty())
     return error("Never resolved function from blockaddress");
 
   // Upgrade any intrinsic calls that slipped through (should not happen!) and
   // delete the old functions to clean up. We can't do this unless the entire
   // module is materialized because there could always be another function body
   // with calls to the old function.
   for (auto &I : UpgradedIntrinsics) {
     for (auto *U : I.first->users()) {
       if (CallInst *CI = dyn_cast<CallInst>(U))
         UpgradeIntrinsicCall(CI, I.second);
     }
     if (!I.first->use_empty())
       I.first->replaceAllUsesWith(I.second);
     I.first->eraseFromParent();
   }
   UpgradedIntrinsics.clear();
   // Do the same for remangled intrinsics
   for (auto &I : RemangledIntrinsics) {
     I.first->replaceAllUsesWith(I.second);
     I.first->eraseFromParent();
   }
   RemangledIntrinsics.clear();
 
   UpgradeDebugInfo(*TheModule);
 
   UpgradeModuleFlags(*TheModule);
   return Error::success();
 }
 
 std::vector<StructType *> BitcodeReader::getIdentifiedStructTypes() const {
   return IdentifiedStructTypes;
 }
 
 ModuleSummaryIndexBitcodeReader::ModuleSummaryIndexBitcodeReader(
     BitstreamCursor Cursor, StringRef Strtab, ModuleSummaryIndex &TheIndex,
     StringRef ModulePath, unsigned ModuleId)
     : BitcodeReaderBase(std::move(Cursor), Strtab), TheIndex(TheIndex),
       ModulePath(ModulePath), ModuleId(ModuleId) {}
 
 ModuleSummaryIndex::ModuleInfo *
 ModuleSummaryIndexBitcodeReader::addThisModule() {
   return TheIndex.addModule(ModulePath, ModuleId);
 }
 
 std::pair<ValueInfo, GlobalValue::GUID>
 ModuleSummaryIndexBitcodeReader::getValueInfoFromValueId(unsigned ValueId) {
   auto VGI = ValueIdToValueInfoMap[ValueId];
   assert(VGI.first);
   return VGI;
 }
 
 void ModuleSummaryIndexBitcodeReader::setValueGUID(
     uint64_t ValueID, StringRef ValueName, GlobalValue::LinkageTypes Linkage,
     StringRef SourceFileName) {
   std::string GlobalId =
       GlobalValue::getGlobalIdentifier(ValueName, Linkage, SourceFileName);
   auto ValueGUID = GlobalValue::getGUID(GlobalId);
   auto OriginalNameID = ValueGUID;
   if (GlobalValue::isLocalLinkage(Linkage))
     OriginalNameID = GlobalValue::getGUID(ValueName);
   if (PrintSummaryGUIDs)
     dbgs() << "GUID " << ValueGUID << "(" << OriginalNameID << ") is "
            << ValueName << "\n";
   ValueIdToValueInfoMap[ValueID] =
       std::make_pair(TheIndex.getOrInsertValueInfo(ValueGUID), OriginalNameID);
 }
 
 // Specialized value symbol table parser used when reading module index
 // blocks where we don't actually create global values. The parsed information
 // is saved in the bitcode reader for use when later parsing summaries.
 Error ModuleSummaryIndexBitcodeReader::parseValueSymbolTable(
     uint64_t Offset,
     DenseMap<unsigned, GlobalValue::LinkageTypes> &ValueIdToLinkageMap) {
   // With a strtab the VST is not required to parse the summary.
   if (UseStrtab)
     return Error::success();
 
   assert(Offset > 0 && "Expected non-zero VST offset");
   uint64_t CurrentBit = jumpToValueSymbolTable(Offset, Stream);
 
   if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID))
     return error("Invalid record");
 
   SmallVector<uint64_t, 64> Record;
 
   // Read all the records for this value table.
   SmallString<128> ValueName;
 
   while (true) {
     BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
 
     switch (Entry.Kind) {
     case BitstreamEntry::SubBlock: // Handled for us already.
     case BitstreamEntry::Error:
       return error("Malformed block");
     case BitstreamEntry::EndBlock:
       // Done parsing VST, jump back to wherever we came from.
       Stream.JumpToBit(CurrentBit);
       return Error::success();
     case BitstreamEntry::Record:
       // The interesting case.
       break;
     }
 
     // Read a record.
     Record.clear();
     switch (Stream.readRecord(Entry.ID, Record)) {
     default: // Default behavior: ignore (e.g. VST_CODE_BBENTRY records).
       break;
     case bitc::VST_CODE_ENTRY: { // VST_CODE_ENTRY: [valueid, namechar x N]
       if (convertToString(Record, 1, ValueName))
         return error("Invalid record");
       unsigned ValueID = Record[0];
       assert(!SourceFileName.empty());
       auto VLI = ValueIdToLinkageMap.find(ValueID);
       assert(VLI != ValueIdToLinkageMap.end() &&
              "No linkage found for VST entry?");
       auto Linkage = VLI->second;
       setValueGUID(ValueID, ValueName, Linkage, SourceFileName);
       ValueName.clear();
       break;
     }
     case bitc::VST_CODE_FNENTRY: {
       // VST_CODE_FNENTRY: [valueid, offset, namechar x N]
       if (convertToString(Record, 2, ValueName))
         return error("Invalid record");
       unsigned ValueID = Record[0];
       assert(!SourceFileName.empty());
       auto VLI = ValueIdToLinkageMap.find(ValueID);
       assert(VLI != ValueIdToLinkageMap.end() &&
              "No linkage found for VST entry?");
       auto Linkage = VLI->second;
       setValueGUID(ValueID, ValueName, Linkage, SourceFileName);
       ValueName.clear();
       break;
     }
     case bitc::VST_CODE_COMBINED_ENTRY: {
       // VST_CODE_COMBINED_ENTRY: [valueid, refguid]
       unsigned ValueID = Record[0];
       GlobalValue::GUID RefGUID = Record[1];
       // The "original name", which is the second value of the pair will be
       // overriden later by a FS_COMBINED_ORIGINAL_NAME in the combined index.
       ValueIdToValueInfoMap[ValueID] =
           std::make_pair(TheIndex.getOrInsertValueInfo(RefGUID), RefGUID);
       break;
     }
     }
   }
 }
 
 // Parse just the blocks needed for building the index out of the module.
 // At the end of this routine the module Index is populated with a map
 // from global value id to GlobalValueSummary objects.
 Error ModuleSummaryIndexBitcodeReader::parseModule() {
   if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
     return error("Invalid record");
 
   SmallVector<uint64_t, 64> Record;
   DenseMap<unsigned, GlobalValue::LinkageTypes> ValueIdToLinkageMap;
   unsigned ValueId = 0;
 
   // Read the index for this module.
   while (true) {
     BitstreamEntry Entry = Stream.advance();
 
     switch (Entry.Kind) {
     case BitstreamEntry::Error:
       return error("Malformed block");
     case BitstreamEntry::EndBlock:
       return Error::success();
 
     case BitstreamEntry::SubBlock:
       switch (Entry.ID) {
       default: // Skip unknown content.
         if (Stream.SkipBlock())
           return error("Invalid record");
         break;
       case bitc::BLOCKINFO_BLOCK_ID:
         // Need to parse these to get abbrev ids (e.g. for VST)
         if (readBlockInfo())
           return error("Malformed block");
         break;
       case bitc::VALUE_SYMTAB_BLOCK_ID:
         // Should have been parsed earlier via VSTOffset, unless there
         // is no summary section.
         assert(((SeenValueSymbolTable && VSTOffset > 0) ||
                 !SeenGlobalValSummary) &&
                "Expected early VST parse via VSTOffset record");
         if (Stream.SkipBlock())
           return error("Invalid record");
         break;
       case bitc::GLOBALVAL_SUMMARY_BLOCK_ID:
       case bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID:
         assert(!SeenValueSymbolTable &&
                "Already read VST when parsing summary block?");
         // We might not have a VST if there were no values in the
         // summary. An empty summary block generated when we are
         // performing ThinLTO compiles so we don't later invoke
         // the regular LTO process on them.
         if (VSTOffset > 0) {
           if (Error Err = parseValueSymbolTable(VSTOffset, ValueIdToLinkageMap))
             return Err;
           SeenValueSymbolTable = true;
         }
         SeenGlobalValSummary = true;
         if (Error Err = parseEntireSummary(Entry.ID))
           return Err;
         break;
       case bitc::MODULE_STRTAB_BLOCK_ID:
         if (Error Err = parseModuleStringTable())
           return Err;
         break;
       }
       continue;
 
     case BitstreamEntry::Record: {
         Record.clear();
         auto BitCode = Stream.readRecord(Entry.ID, Record);
         switch (BitCode) {
         default:
           break; // Default behavior, ignore unknown content.
         case bitc::MODULE_CODE_VERSION: {
           if (Error Err = parseVersionRecord(Record).takeError())
             return Err;
           break;
         }
         /// MODULE_CODE_SOURCE_FILENAME: [namechar x N]
         case bitc::MODULE_CODE_SOURCE_FILENAME: {
           SmallString<128> ValueName;
           if (convertToString(Record, 0, ValueName))
             return error("Invalid record");
           SourceFileName = ValueName.c_str();
           break;
         }
         /// MODULE_CODE_HASH: [5*i32]
         case bitc::MODULE_CODE_HASH: {
           if (Record.size() != 5)
             return error("Invalid hash length " + Twine(Record.size()).str());
           auto &Hash = addThisModule()->second.second;
           int Pos = 0;
           for (auto &Val : Record) {
             assert(!(Val >> 32) && "Unexpected high bits set");
             Hash[Pos++] = Val;
           }
           break;
         }
         /// MODULE_CODE_VSTOFFSET: [offset]
         case bitc::MODULE_CODE_VSTOFFSET:
           if (Record.size() < 1)
             return error("Invalid record");
           // Note that we subtract 1 here because the offset is relative to one
           // word before the start of the identification or module block, which
           // was historically always the start of the regular bitcode header.
           VSTOffset = Record[0] - 1;
           break;
         // v1 GLOBALVAR: [pointer type, isconst,     initid,       linkage, ...]
         // v1 FUNCTION:  [type,         callingconv, isproto,      linkage, ...]
         // v1 ALIAS:     [alias type,   addrspace,   aliasee val#, linkage, ...]
         // v2: [strtab offset, strtab size, v1]
         case bitc::MODULE_CODE_GLOBALVAR:
         case bitc::MODULE_CODE_FUNCTION:
         case bitc::MODULE_CODE_ALIAS: {
           StringRef Name;
           ArrayRef<uint64_t> GVRecord;
           std::tie(Name, GVRecord) = readNameFromStrtab(Record);
           if (GVRecord.size() <= 3)
             return error("Invalid record");
           uint64_t RawLinkage = GVRecord[3];
           GlobalValue::LinkageTypes Linkage = getDecodedLinkage(RawLinkage);
           if (!UseStrtab) {
             ValueIdToLinkageMap[ValueId++] = Linkage;
             break;
           }
 
           setValueGUID(ValueId++, Name, Linkage, SourceFileName);
           break;
         }
         }
       }
       continue;
     }
   }
 }
 
 std::vector<ValueInfo>
 ModuleSummaryIndexBitcodeReader::makeRefList(ArrayRef<uint64_t> Record) {
   std::vector<ValueInfo> Ret;
   Ret.reserve(Record.size());
   for (uint64_t RefValueId : Record)
     Ret.push_back(getValueInfoFromValueId(RefValueId).first);
   return Ret;
 }
 
 std::vector<FunctionSummary::EdgeTy> ModuleSummaryIndexBitcodeReader::makeCallList(
     ArrayRef<uint64_t> Record, bool IsOldProfileFormat, bool HasProfile) {
   std::vector<FunctionSummary::EdgeTy> Ret;
   Ret.reserve(Record.size());
   for (unsigned I = 0, E = Record.size(); I != E; ++I) {
     CalleeInfo::HotnessType Hotness = CalleeInfo::HotnessType::Unknown;
     ValueInfo Callee = getValueInfoFromValueId(Record[I]).first;
     if (IsOldProfileFormat) {
       I += 1; // Skip old callsitecount field
       if (HasProfile)
         I += 1; // Skip old profilecount field
     } else if (HasProfile)
       Hotness = static_cast<CalleeInfo::HotnessType>(Record[++I]);
     Ret.push_back(FunctionSummary::EdgeTy{Callee, CalleeInfo{Hotness}});
   }
   return Ret;
 }
 
 // Eagerly parse the entire summary block. This populates the GlobalValueSummary
 // objects in the index.
 Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
   if (Stream.EnterSubBlock(ID))
     return error("Invalid record");
   SmallVector<uint64_t, 64> Record;
 
   // Parse version
   {
     BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
     if (Entry.Kind != BitstreamEntry::Record)
       return error("Invalid Summary Block: record for version expected");
     if (Stream.readRecord(Entry.ID, Record) != bitc::FS_VERSION)
       return error("Invalid Summary Block: version expected");
   }
   const uint64_t Version = Record[0];
   const bool IsOldProfileFormat = Version == 1;
   if (Version < 1 || Version > 4)
     return error("Invalid summary version " + Twine(Version) +
                  ", 1, 2, 3 or 4 expected");
   Record.clear();
 
   // Keep around the last seen summary to be used when we see an optional
   // "OriginalName" attachement.
   GlobalValueSummary *LastSeenSummary = nullptr;
   GlobalValue::GUID LastSeenGUID = 0;
 
   // We can expect to see any number of type ID information records before
   // each function summary records; these variables store the information
   // collected so far so that it can be used to create the summary object.
   std::vector<GlobalValue::GUID> PendingTypeTests;
   std::vector<FunctionSummary::VFuncId> PendingTypeTestAssumeVCalls,
       PendingTypeCheckedLoadVCalls;
   std::vector<FunctionSummary::ConstVCall> PendingTypeTestAssumeConstVCalls,
       PendingTypeCheckedLoadConstVCalls;
 
   while (true) {
     BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
 
     switch (Entry.Kind) {
     case BitstreamEntry::SubBlock: // Handled for us already.
     case BitstreamEntry::Error:
       return error("Malformed block");
     case BitstreamEntry::EndBlock:
       return Error::success();
     case BitstreamEntry::Record:
       // The interesting case.
       break;
     }
 
     // Read a record. The record format depends on whether this
     // is a per-module index or a combined index file. In the per-module
     // case the records contain the associated value's ID for correlation
     // with VST entries. In the combined index the correlation is done
     // via the bitcode offset of the summary records (which were saved
     // in the combined index VST entries). The records also contain
     // information used for ThinLTO renaming and importing.
     Record.clear();
     auto BitCode = Stream.readRecord(Entry.ID, Record);
     switch (BitCode) {
     default: // Default behavior: ignore.
       break;
     case bitc::FS_VALUE_GUID: { // [valueid, refguid]
       uint64_t ValueID = Record[0];
       GlobalValue::GUID RefGUID = Record[1];
       ValueIdToValueInfoMap[ValueID] =
           std::make_pair(TheIndex.getOrInsertValueInfo(RefGUID), RefGUID);
       break;
     }
     // FS_PERMODULE: [valueid, flags, instcount, fflags, numrefs,
     //                numrefs x valueid, n x (valueid)]
     // FS_PERMODULE_PROFILE: [valueid, flags, instcount, fflags, numrefs,
     //                        numrefs x valueid,
     //                        n x (valueid, hotness)]
     case bitc::FS_PERMODULE:
     case bitc::FS_PERMODULE_PROFILE: {
       unsigned ValueID = Record[0];
       uint64_t RawFlags = Record[1];
       unsigned InstCount = Record[2];
       uint64_t RawFunFlags = 0;
       unsigned NumRefs = Record[3];
       int RefListStartIndex = 4;
       if (Version >= 4) {
         RawFunFlags = Record[3];
         NumRefs = Record[4];
         RefListStartIndex = 5;
       }
 
       auto Flags = getDecodedGVSummaryFlags(RawFlags, Version);
       // The module path string ref set in the summary must be owned by the
       // index's module string table. Since we don't have a module path
       // string table section in the per-module index, we create a single
       // module path string table entry with an empty (0) ID to take
       // ownership.
       int CallGraphEdgeStartIndex = RefListStartIndex + NumRefs;
       assert(Record.size() >= RefListStartIndex + NumRefs &&
              "Record size inconsistent with number of references");
       std::vector<ValueInfo> Refs = makeRefList(
           ArrayRef<uint64_t>(Record).slice(RefListStartIndex, NumRefs));
       bool HasProfile = (BitCode == bitc::FS_PERMODULE_PROFILE);
       std::vector<FunctionSummary::EdgeTy> Calls = makeCallList(
           ArrayRef<uint64_t>(Record).slice(CallGraphEdgeStartIndex),
           IsOldProfileFormat, HasProfile);
       auto FS = llvm::make_unique<FunctionSummary>(
           Flags, InstCount, getDecodedFFlags(RawFunFlags), std::move(Refs),
           std::move(Calls), std::move(PendingTypeTests),
           std::move(PendingTypeTestAssumeVCalls),
           std::move(PendingTypeCheckedLoadVCalls),
           std::move(PendingTypeTestAssumeConstVCalls),
           std::move(PendingTypeCheckedLoadConstVCalls));
       PendingTypeTests.clear();
       PendingTypeTestAssumeVCalls.clear();
       PendingTypeCheckedLoadVCalls.clear();
       PendingTypeTestAssumeConstVCalls.clear();
       PendingTypeCheckedLoadConstVCalls.clear();
       auto VIAndOriginalGUID = getValueInfoFromValueId(ValueID);
       FS->setModulePath(addThisModule()->first());
       FS->setOriginalName(VIAndOriginalGUID.second);
       TheIndex.addGlobalValueSummary(VIAndOriginalGUID.first, std::move(FS));
       break;
     }
     // FS_ALIAS: [valueid, flags, valueid]
     // Aliases must be emitted (and parsed) after all FS_PERMODULE entries, as
     // they expect all aliasee summaries to be available.
     case bitc::FS_ALIAS: {
       unsigned ValueID = Record[0];
       uint64_t RawFlags = Record[1];
       unsigned AliaseeID = Record[2];
       auto Flags = getDecodedGVSummaryFlags(RawFlags, Version);
       auto AS = llvm::make_unique<AliasSummary>(Flags);
       // The module path string ref set in the summary must be owned by the
       // index's module string table. Since we don't have a module path
       // string table section in the per-module index, we create a single
       // module path string table entry with an empty (0) ID to take
       // ownership.
       AS->setModulePath(addThisModule()->first());
 
       GlobalValue::GUID AliaseeGUID =
           getValueInfoFromValueId(AliaseeID).first.getGUID();
       auto AliaseeInModule =
           TheIndex.findSummaryInModule(AliaseeGUID, ModulePath);
       if (!AliaseeInModule)
         return error("Alias expects aliasee summary to be parsed");
       AS->setAliasee(AliaseeInModule);
       AS->setAliaseeGUID(AliaseeGUID);
 
       auto GUID = getValueInfoFromValueId(ValueID);
       AS->setOriginalName(GUID.second);
       TheIndex.addGlobalValueSummary(GUID.first, std::move(AS));
       break;
     }
     // FS_PERMODULE_GLOBALVAR_INIT_REFS: [valueid, flags, n x valueid]
     case bitc::FS_PERMODULE_GLOBALVAR_INIT_REFS: {
       unsigned ValueID = Record[0];
       uint64_t RawFlags = Record[1];
       auto Flags = getDecodedGVSummaryFlags(RawFlags, Version);
       std::vector<ValueInfo> Refs =
           makeRefList(ArrayRef<uint64_t>(Record).slice(2));
       auto FS = llvm::make_unique<GlobalVarSummary>(Flags, std::move(Refs));
       FS->setModulePath(addThisModule()->first());
       auto GUID = getValueInfoFromValueId(ValueID);
       FS->setOriginalName(GUID.second);
       TheIndex.addGlobalValueSummary(GUID.first, std::move(FS));
       break;
     }
     // FS_COMBINED: [valueid, modid, flags, instcount, fflags, numrefs,
     //               numrefs x valueid, n x (valueid)]
     // FS_COMBINED_PROFILE: [valueid, modid, flags, instcount, fflags, numrefs,
     //                       numrefs x valueid, n x (valueid, hotness)]
     case bitc::FS_COMBINED:
     case bitc::FS_COMBINED_PROFILE: {
       unsigned ValueID = Record[0];
       uint64_t ModuleId = Record[1];
       uint64_t RawFlags = Record[2];
       unsigned InstCount = Record[3];
       uint64_t RawFunFlags = 0;
       unsigned NumRefs = Record[4];
       int RefListStartIndex = 5;
 
       if (Version >= 4) {
         RawFunFlags = Record[4];
         NumRefs = Record[5];
         RefListStartIndex = 6;
       }
 
       auto Flags = getDecodedGVSummaryFlags(RawFlags, Version);
       int CallGraphEdgeStartIndex = RefListStartIndex + NumRefs;
       assert(Record.size() >= RefListStartIndex + NumRefs &&
              "Record size inconsistent with number of references");
       std::vector<ValueInfo> Refs = makeRefList(
           ArrayRef<uint64_t>(Record).slice(RefListStartIndex, NumRefs));
       bool HasProfile = (BitCode == bitc::FS_COMBINED_PROFILE);
       std::vector<FunctionSummary::EdgeTy> Edges = makeCallList(
           ArrayRef<uint64_t>(Record).slice(CallGraphEdgeStartIndex),
           IsOldProfileFormat, HasProfile);
       ValueInfo VI = getValueInfoFromValueId(ValueID).first;
       auto FS = llvm::make_unique<FunctionSummary>(
           Flags, InstCount, getDecodedFFlags(RawFunFlags), std::move(Refs),
           std::move(Edges), std::move(PendingTypeTests),
           std::move(PendingTypeTestAssumeVCalls),
           std::move(PendingTypeCheckedLoadVCalls),
           std::move(PendingTypeTestAssumeConstVCalls),
           std::move(PendingTypeCheckedLoadConstVCalls));
       PendingTypeTests.clear();
       PendingTypeTestAssumeVCalls.clear();
       PendingTypeCheckedLoadVCalls.clear();
       PendingTypeTestAssumeConstVCalls.clear();
       PendingTypeCheckedLoadConstVCalls.clear();
       LastSeenSummary = FS.get();
       LastSeenGUID = VI.getGUID();
       FS->setModulePath(ModuleIdMap[ModuleId]);
       TheIndex.addGlobalValueSummary(VI, std::move(FS));
       break;
     }
     // FS_COMBINED_ALIAS: [valueid, modid, flags, valueid]
     // Aliases must be emitted (and parsed) after all FS_COMBINED entries, as
     // they expect all aliasee summaries to be available.
     case bitc::FS_COMBINED_ALIAS: {
       unsigned ValueID = Record[0];
       uint64_t ModuleId = Record[1];
       uint64_t RawFlags = Record[2];
       unsigned AliaseeValueId = Record[3];
       auto Flags = getDecodedGVSummaryFlags(RawFlags, Version);
       auto AS = llvm::make_unique<AliasSummary>(Flags);
       LastSeenSummary = AS.get();
       AS->setModulePath(ModuleIdMap[ModuleId]);
 
       auto AliaseeGUID =
           getValueInfoFromValueId(AliaseeValueId).first.getGUID();
       auto AliaseeInModule =
           TheIndex.findSummaryInModule(AliaseeGUID, AS->modulePath());
       AS->setAliasee(AliaseeInModule);
       AS->setAliaseeGUID(AliaseeGUID);
 
       ValueInfo VI = getValueInfoFromValueId(ValueID).first;
       LastSeenGUID = VI.getGUID();
       TheIndex.addGlobalValueSummary(VI, std::move(AS));
       break;
     }
     // FS_COMBINED_GLOBALVAR_INIT_REFS: [valueid, modid, flags, n x valueid]
     case bitc::FS_COMBINED_GLOBALVAR_INIT_REFS: {
       unsigned ValueID = Record[0];
       uint64_t ModuleId = Record[1];
       uint64_t RawFlags = Record[2];
       auto Flags = getDecodedGVSummaryFlags(RawFlags, Version);
       std::vector<ValueInfo> Refs =
           makeRefList(ArrayRef<uint64_t>(Record).slice(3));
       auto FS = llvm::make_unique<GlobalVarSummary>(Flags, std::move(Refs));
       LastSeenSummary = FS.get();
       FS->setModulePath(ModuleIdMap[ModuleId]);
       ValueInfo VI = getValueInfoFromValueId(ValueID).first;
       LastSeenGUID = VI.getGUID();
       TheIndex.addGlobalValueSummary(VI, std::move(FS));
       break;
     }
     // FS_COMBINED_ORIGINAL_NAME: [original_name]
     case bitc::FS_COMBINED_ORIGINAL_NAME: {
       uint64_t OriginalName = Record[0];
       if (!LastSeenSummary)
         return error("Name attachment that does not follow a combined record");
       LastSeenSummary->setOriginalName(OriginalName);
       TheIndex.addOriginalName(LastSeenGUID, OriginalName);
       // Reset the LastSeenSummary
       LastSeenSummary = nullptr;
       LastSeenGUID = 0;
       break;
     }
     case bitc::FS_TYPE_TESTS:
       assert(PendingTypeTests.empty());
       PendingTypeTests.insert(PendingTypeTests.end(), Record.begin(),
                               Record.end());
       break;
 
     case bitc::FS_TYPE_TEST_ASSUME_VCALLS:
       assert(PendingTypeTestAssumeVCalls.empty());
       for (unsigned I = 0; I != Record.size(); I += 2)
         PendingTypeTestAssumeVCalls.push_back({Record[I], Record[I+1]});
       break;
 
     case bitc::FS_TYPE_CHECKED_LOAD_VCALLS:
       assert(PendingTypeCheckedLoadVCalls.empty());
       for (unsigned I = 0; I != Record.size(); I += 2)
         PendingTypeCheckedLoadVCalls.push_back({Record[I], Record[I+1]});
       break;
 
     case bitc::FS_TYPE_TEST_ASSUME_CONST_VCALL:
       PendingTypeTestAssumeConstVCalls.push_back(
           {{Record[0], Record[1]}, {Record.begin() + 2, Record.end()}});
       break;
 
     case bitc::FS_TYPE_CHECKED_LOAD_CONST_VCALL:
       PendingTypeCheckedLoadConstVCalls.push_back(
           {{Record[0], Record[1]}, {Record.begin() + 2, Record.end()}});
       break;
 
     case bitc::FS_CFI_FUNCTION_DEFS: {
       std::set<std::string> &CfiFunctionDefs = TheIndex.cfiFunctionDefs();
       for (unsigned I = 0; I != Record.size(); I += 2)
         CfiFunctionDefs.insert(
             {Strtab.data() + Record[I], static_cast<size_t>(Record[I + 1])});
       break;
     }
     case bitc::FS_CFI_FUNCTION_DECLS: {
       std::set<std::string> &CfiFunctionDecls = TheIndex.cfiFunctionDecls();
       for (unsigned I = 0; I != Record.size(); I += 2)
         CfiFunctionDecls.insert(
             {Strtab.data() + Record[I], static_cast<size_t>(Record[I + 1])});
       break;
     }
     }
   }
   llvm_unreachable("Exit infinite loop");
 }
 
 // Parse the  module string table block into the Index.
 // This populates the ModulePathStringTable map in the index.
 Error ModuleSummaryIndexBitcodeReader::parseModuleStringTable() {
   if (Stream.EnterSubBlock(bitc::MODULE_STRTAB_BLOCK_ID))
     return error("Invalid record");
 
   SmallVector<uint64_t, 64> Record;
 
   SmallString<128> ModulePath;
   ModuleSummaryIndex::ModuleInfo *LastSeenModule = nullptr;
 
   while (true) {
     BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
 
     switch (Entry.Kind) {
     case BitstreamEntry::SubBlock: // Handled for us already.
     case BitstreamEntry::Error:
       return error("Malformed block");
     case BitstreamEntry::EndBlock:
       return Error::success();
     case BitstreamEntry::Record:
       // The interesting case.
       break;
     }
 
     Record.clear();
     switch (Stream.readRecord(Entry.ID, Record)) {
     default: // Default behavior: ignore.
       break;
     case bitc::MST_CODE_ENTRY: {
       // MST_ENTRY: [modid, namechar x N]
       uint64_t ModuleId = Record[0];
 
       if (convertToString(Record, 1, ModulePath))
         return error("Invalid record");
 
       LastSeenModule = TheIndex.addModule(ModulePath, ModuleId);
       ModuleIdMap[ModuleId] = LastSeenModule->first();
 
       ModulePath.clear();
       break;
     }
     /// MST_CODE_HASH: [5*i32]
     case bitc::MST_CODE_HASH: {
       if (Record.size() != 5)
         return error("Invalid hash length " + Twine(Record.size()).str());
       if (!LastSeenModule)
         return error("Invalid hash that does not follow a module path");
       int Pos = 0;
       for (auto &Val : Record) {
         assert(!(Val >> 32) && "Unexpected high bits set");
         LastSeenModule->second.second[Pos++] = Val;
       }
       // Reset LastSeenModule to avoid overriding the hash unexpectedly.
       LastSeenModule = nullptr;
       break;
     }
     }
   }
   llvm_unreachable("Exit infinite loop");
 }
 
 namespace {
 
 // FIXME: This class is only here to support the transition to llvm::Error. It
 // will be removed once this transition is complete. Clients should prefer to
 // deal with the Error value directly, rather than converting to error_code.
 class BitcodeErrorCategoryType : public std::error_category {
   const char *name() const noexcept override {
     return "llvm.bitcode";
   }
 
   std::string message(int IE) const override {
     BitcodeError E = static_cast<BitcodeError>(IE);
     switch (E) {
     case BitcodeError::CorruptedBitcode:
       return "Corrupted bitcode";
     }
     llvm_unreachable("Unknown error type!");
   }
 };
 
 } // end anonymous namespace
 
 static ManagedStatic<BitcodeErrorCategoryType> ErrorCategory;
 
 const std::error_category &llvm::BitcodeErrorCategory() {
   return *ErrorCategory;
 }
 
 static Expected<StringRef> readBlobInRecord(BitstreamCursor &Stream,
                                             unsigned Block, unsigned RecordID) {
   if (Stream.EnterSubBlock(Block))
     return error("Invalid record");
 
   StringRef Strtab;
   while (true) {
     BitstreamEntry Entry = Stream.advance();
     switch (Entry.Kind) {
     case BitstreamEntry::EndBlock:
       return Strtab;
 
     case BitstreamEntry::Error:
       return error("Malformed block");
 
     case BitstreamEntry::SubBlock:
       if (Stream.SkipBlock())
         return error("Malformed block");
       break;
 
     case BitstreamEntry::Record:
       StringRef Blob;
       SmallVector<uint64_t, 1> Record;
       if (Stream.readRecord(Entry.ID, Record, &Blob) == RecordID)
         Strtab = Blob;
       break;
     }
   }
 }
 
 //===----------------------------------------------------------------------===//
 // External interface
 //===----------------------------------------------------------------------===//
 
 Expected<std::vector<BitcodeModule>>
 llvm::getBitcodeModuleList(MemoryBufferRef Buffer) {
   auto FOrErr = getBitcodeFileContents(Buffer);
   if (!FOrErr)
     return FOrErr.takeError();
   return std::move(FOrErr->Mods);
 }
 
 Expected<BitcodeFileContents>
 llvm::getBitcodeFileContents(MemoryBufferRef Buffer) {
   Expected<BitstreamCursor> StreamOrErr = initStream(Buffer);
   if (!StreamOrErr)
     return StreamOrErr.takeError();
   BitstreamCursor &Stream = *StreamOrErr;
 
   BitcodeFileContents F;
   while (true) {
     uint64_t BCBegin = Stream.getCurrentByteNo();
 
     // We may be consuming bitcode from a client that leaves garbage at the end
     // of the bitcode stream (e.g. Apple's ar tool). If we are close enough to
     // the end that there cannot possibly be another module, stop looking.
     if (BCBegin + 8 >= Stream.getBitcodeBytes().size())
       return F;
 
     BitstreamEntry Entry = Stream.advance();
     switch (Entry.Kind) {
     case BitstreamEntry::EndBlock:
     case BitstreamEntry::Error:
       return error("Malformed block");
 
     case BitstreamEntry::SubBlock: {
       uint64_t IdentificationBit = -1ull;
       if (Entry.ID == bitc::IDENTIFICATION_BLOCK_ID) {
         IdentificationBit = Stream.GetCurrentBitNo() - BCBegin * 8;
         if (Stream.SkipBlock())
           return error("Malformed block");
 
         Entry = Stream.advance();
         if (Entry.Kind != BitstreamEntry::SubBlock ||
             Entry.ID != bitc::MODULE_BLOCK_ID)
           return error("Malformed block");
       }
 
       if (Entry.ID == bitc::MODULE_BLOCK_ID) {
         uint64_t ModuleBit = Stream.GetCurrentBitNo() - BCBegin * 8;
         if (Stream.SkipBlock())
           return error("Malformed block");
 
         F.Mods.push_back({Stream.getBitcodeBytes().slice(
                               BCBegin, Stream.getCurrentByteNo() - BCBegin),
                           Buffer.getBufferIdentifier(), IdentificationBit,
                           ModuleBit});
         continue;
       }
 
       if (Entry.ID == bitc::STRTAB_BLOCK_ID) {
         Expected<StringRef> Strtab =
             readBlobInRecord(Stream, bitc::STRTAB_BLOCK_ID, bitc::STRTAB_BLOB);
         if (!Strtab)
           return Strtab.takeError();
         // This string table is used by every preceding bitcode module that does
         // not have its own string table. A bitcode file may have multiple
         // string tables if it was created by binary concatenation, for example
         // with "llvm-cat -b".
         for (auto I = F.Mods.rbegin(), E = F.Mods.rend(); I != E; ++I) {
           if (!I->Strtab.empty())
             break;
           I->Strtab = *Strtab;
         }
         // Similarly, the string table is used by every preceding symbol table;
         // normally there will be just one unless the bitcode file was created
         // by binary concatenation.
         if (!F.Symtab.empty() && F.StrtabForSymtab.empty())
           F.StrtabForSymtab = *Strtab;
         continue;
       }
 
       if (Entry.ID == bitc::SYMTAB_BLOCK_ID) {
         Expected<StringRef> SymtabOrErr =
             readBlobInRecord(Stream, bitc::SYMTAB_BLOCK_ID, bitc::SYMTAB_BLOB);
         if (!SymtabOrErr)
           return SymtabOrErr.takeError();
 
         // We can expect the bitcode file to have multiple symbol tables if it
         // was created by binary concatenation. In that case we silently
         // ignore any subsequent symbol tables, which is fine because this is a
         // low level function. The client is expected to notice that the number
         // of modules in the symbol table does not match the number of modules
         // in the input file and regenerate the symbol table.
         if (F.Symtab.empty())
           F.Symtab = *SymtabOrErr;
         continue;
       }
 
       if (Stream.SkipBlock())
         return error("Malformed block");
       continue;
     }
     case BitstreamEntry::Record:
       Stream.skipRecord(Entry.ID);
       continue;
     }
   }
 }
 
 /// \brief Get a lazy one-at-time loading module from bitcode.
 ///
 /// This isn't always used in a lazy context.  In particular, it's also used by
 /// \a parseModule().  If this is truly lazy, then we need to eagerly pull
 /// in forward-referenced functions from block address references.
 ///
 /// \param[in] MaterializeAll Set to \c true if we should materialize
 /// everything.
 Expected<std::unique_ptr<Module>>
 BitcodeModule::getModuleImpl(LLVMContext &Context, bool MaterializeAll,
                              bool ShouldLazyLoadMetadata, bool IsImporting) {
   BitstreamCursor Stream(Buffer);
 
   std::string ProducerIdentification;
   if (IdentificationBit != -1ull) {
     Stream.JumpToBit(IdentificationBit);
     Expected<std::string> ProducerIdentificationOrErr =
         readIdentificationBlock(Stream);
     if (!ProducerIdentificationOrErr)
       return ProducerIdentificationOrErr.takeError();
 
     ProducerIdentification = *ProducerIdentificationOrErr;
   }
 
   Stream.JumpToBit(ModuleBit);
   auto *R = new BitcodeReader(std::move(Stream), Strtab, ProducerIdentification,
                               Context);
 
   std::unique_ptr<Module> M =
       llvm::make_unique<Module>(ModuleIdentifier, Context);
   M->setMaterializer(R);
 
   // Delay parsing Metadata if ShouldLazyLoadMetadata is true.
   if (Error Err =
           R->parseBitcodeInto(M.get(), ShouldLazyLoadMetadata, IsImporting))
     return std::move(Err);
 
   if (MaterializeAll) {
     // Read in the entire module, and destroy the BitcodeReader.
     if (Error Err = M->materializeAll())
       return std::move(Err);
   } else {
     // Resolve forward references from blockaddresses.
     if (Error Err = R->materializeForwardReferencedFunctions())
       return std::move(Err);
   }
   return std::move(M);
 }
 
 Expected<std::unique_ptr<Module>>
 BitcodeModule::getLazyModule(LLVMContext &Context, bool ShouldLazyLoadMetadata,
                              bool IsImporting) {
   return getModuleImpl(Context, false, ShouldLazyLoadMetadata, IsImporting);
 }
 
 // Parse the specified bitcode buffer and merge the index into CombinedIndex.
 // We don't use ModuleIdentifier here because the client may need to control the
 // module path used in the combined summary (e.g. when reading summaries for
 // regular LTO modules).
 Error BitcodeModule::readSummary(ModuleSummaryIndex &CombinedIndex,
                                  StringRef ModulePath, uint64_t ModuleId) {
   BitstreamCursor Stream(Buffer);
   Stream.JumpToBit(ModuleBit);
 
   ModuleSummaryIndexBitcodeReader R(std::move(Stream), Strtab, CombinedIndex,
                                     ModulePath, ModuleId);
   return R.parseModule();
 }
 
 // Parse the specified bitcode buffer, returning the function info index.
 Expected<std::unique_ptr<ModuleSummaryIndex>> BitcodeModule::getSummary() {
   BitstreamCursor Stream(Buffer);
   Stream.JumpToBit(ModuleBit);
 
   auto Index = llvm::make_unique<ModuleSummaryIndex>();
   ModuleSummaryIndexBitcodeReader R(std::move(Stream), Strtab, *Index,
                                     ModuleIdentifier, 0);
 
   if (Error Err = R.parseModule())
     return std::move(Err);
 
   return std::move(Index);
 }
 
 // Check if the given bitcode buffer contains a global value summary block.
 Expected<BitcodeLTOInfo> BitcodeModule::getLTOInfo() {
   BitstreamCursor Stream(Buffer);
   Stream.JumpToBit(ModuleBit);
 
   if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
     return error("Invalid record");
 
   while (true) {
     BitstreamEntry Entry = Stream.advance();
 
     switch (Entry.Kind) {
     case BitstreamEntry::Error:
       return error("Malformed block");
     case BitstreamEntry::EndBlock:
       return BitcodeLTOInfo{/*IsThinLTO=*/false, /*HasSummary=*/false};
 
     case BitstreamEntry::SubBlock:
       if (Entry.ID == bitc::GLOBALVAL_SUMMARY_BLOCK_ID)
         return BitcodeLTOInfo{/*IsThinLTO=*/true, /*HasSummary=*/true};
 
       if (Entry.ID == bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID)
         return BitcodeLTOInfo{/*IsThinLTO=*/false, /*HasSummary=*/true};
 
       // Ignore other sub-blocks.
       if (Stream.SkipBlock())
         return error("Malformed block");
       continue;
 
     case BitstreamEntry::Record:
       Stream.skipRecord(Entry.ID);
       continue;
     }
   }
 }
 
 static Expected<BitcodeModule> getSingleModule(MemoryBufferRef Buffer) {
   Expected<std::vector<BitcodeModule>> MsOrErr = getBitcodeModuleList(Buffer);
   if (!MsOrErr)
     return MsOrErr.takeError();
 
   if (MsOrErr->size() != 1)
     return error("Expected a single module");
 
   return (*MsOrErr)[0];
 }
 
 Expected<std::unique_ptr<Module>>
 llvm::getLazyBitcodeModule(MemoryBufferRef Buffer, LLVMContext &Context,
                            bool ShouldLazyLoadMetadata, bool IsImporting) {
   Expected<BitcodeModule> BM = getSingleModule(Buffer);
   if (!BM)
     return BM.takeError();
 
   return BM->getLazyModule(Context, ShouldLazyLoadMetadata, IsImporting);
 }
 
 Expected<std::unique_ptr<Module>> llvm::getOwningLazyBitcodeModule(
     std::unique_ptr<MemoryBuffer> &&Buffer, LLVMContext &Context,
     bool ShouldLazyLoadMetadata, bool IsImporting) {
   auto MOrErr = getLazyBitcodeModule(*Buffer, Context, ShouldLazyLoadMetadata,
                                      IsImporting);
   if (MOrErr)
     (*MOrErr)->setOwnedMemoryBuffer(std::move(Buffer));
   return MOrErr;
 }
 
 Expected<std::unique_ptr<Module>>
 BitcodeModule::parseModule(LLVMContext &Context) {
   return getModuleImpl(Context, true, false, false);
   // TODO: Restore the use-lists to the in-memory state when the bitcode was
   // written.  We must defer until the Module has been fully materialized.
 }
 
 Expected<std::unique_ptr<Module>> llvm::parseBitcodeFile(MemoryBufferRef Buffer,
                                                          LLVMContext &Context) {
   Expected<BitcodeModule> BM = getSingleModule(Buffer);
   if (!BM)
     return BM.takeError();
 
   return BM->parseModule(Context);
 }
 
 Expected<std::string> llvm::getBitcodeTargetTriple(MemoryBufferRef Buffer) {
   Expected<BitstreamCursor> StreamOrErr = initStream(Buffer);
   if (!StreamOrErr)
     return StreamOrErr.takeError();
 
   return readTriple(*StreamOrErr);
 }
 
 Expected<bool> llvm::isBitcodeContainingObjCCategory(MemoryBufferRef Buffer) {
   Expected<BitstreamCursor> StreamOrErr = initStream(Buffer);
   if (!StreamOrErr)
     return StreamOrErr.takeError();
 
   return hasObjCCategory(*StreamOrErr);
 }
 
 Expected<std::string> llvm::getBitcodeProducerString(MemoryBufferRef Buffer) {
   Expected<BitstreamCursor> StreamOrErr = initStream(Buffer);
   if (!StreamOrErr)
     return StreamOrErr.takeError();
 
   return readIdentificationCode(*StreamOrErr);
 }
 
 Error llvm::readModuleSummaryIndex(MemoryBufferRef Buffer,
                                    ModuleSummaryIndex &CombinedIndex,
                                    uint64_t ModuleId) {
   Expected<BitcodeModule> BM = getSingleModule(Buffer);
   if (!BM)
     return BM.takeError();
 
   return BM->readSummary(CombinedIndex, BM->getModuleIdentifier(), ModuleId);
 }
 
 Expected<std::unique_ptr<ModuleSummaryIndex>>
 llvm::getModuleSummaryIndex(MemoryBufferRef Buffer) {
   Expected<BitcodeModule> BM = getSingleModule(Buffer);
   if (!BM)
     return BM.takeError();
 
   return BM->getSummary();
 }
 
 Expected<BitcodeLTOInfo> llvm::getBitcodeLTOInfo(MemoryBufferRef Buffer) {
   Expected<BitcodeModule> BM = getSingleModule(Buffer);
   if (!BM)
     return BM.takeError();
 
   return BM->getLTOInfo();
 }
 
 Expected<std::unique_ptr<ModuleSummaryIndex>>
 llvm::getModuleSummaryIndexForFile(StringRef Path,
                                    bool IgnoreEmptyThinLTOIndexFile) {
   ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
       MemoryBuffer::getFileOrSTDIN(Path);
   if (!FileOrErr)
     return errorCodeToError(FileOrErr.getError());
   if (IgnoreEmptyThinLTOIndexFile && !(*FileOrErr)->getBufferSize())
     return nullptr;
   return getModuleSummaryIndex(**FileOrErr);
 }
Index: head/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
===================================================================
--- head/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp	(revision 329982)
+++ head/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp	(revision 329983)
@@ -1,4260 +1,4260 @@
 //===- Bitcode/Writer/BitcodeWriter.cpp - Bitcode Writer ------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // Bitcode writer implementation.
 //
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Bitcode/BitcodeWriter.h"
 #include "ValueEnumerator.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Bitcode/BitCodes.h"
 #include "llvm/Bitcode/BitstreamWriter.h"
 #include "llvm/Bitcode/LLVMBitCodes.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Comdat.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalAlias.h"
 #include "llvm/IR/GlobalIFunc.h"
 #include "llvm/IR/GlobalObject.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/ModuleSummaryIndex.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/UseListOrder.h"
 #include "llvm/IR/Value.h"
 #include "llvm/IR/ValueSymbolTable.h"
 #include "llvm/MC/StringTableBuilder.h"
 #include "llvm/Object/IRSymtab.h"
 #include "llvm/Support/AtomicOrdering.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/SHA1.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
 #include <iterator>
 #include <map>
 #include <memory>
 #include <string>
 #include <utility>
 #include <vector>
 
 using namespace llvm;
 
 static cl::opt<unsigned>
     IndexThreshold("bitcode-mdindex-threshold", cl::Hidden, cl::init(25),
                    cl::desc("Number of metadatas above which we emit an index "
                             "to enable lazy-loading"));
 
 namespace {
 
 /// These are manifest constants used by the bitcode writer. They do not need to
 /// be kept in sync with the reader, but need to be consistent within this file.
 enum {
   // VALUE_SYMTAB_BLOCK abbrev id's.
   VST_ENTRY_8_ABBREV = bitc::FIRST_APPLICATION_ABBREV,
   VST_ENTRY_7_ABBREV,
   VST_ENTRY_6_ABBREV,
   VST_BBENTRY_6_ABBREV,
 
   // CONSTANTS_BLOCK abbrev id's.
   CONSTANTS_SETTYPE_ABBREV = bitc::FIRST_APPLICATION_ABBREV,
   CONSTANTS_INTEGER_ABBREV,
   CONSTANTS_CE_CAST_Abbrev,
   CONSTANTS_NULL_Abbrev,
 
   // FUNCTION_BLOCK abbrev id's.
   FUNCTION_INST_LOAD_ABBREV = bitc::FIRST_APPLICATION_ABBREV,
   FUNCTION_INST_BINOP_ABBREV,
   FUNCTION_INST_BINOP_FLAGS_ABBREV,
   FUNCTION_INST_CAST_ABBREV,
   FUNCTION_INST_RET_VOID_ABBREV,
   FUNCTION_INST_RET_VAL_ABBREV,
   FUNCTION_INST_UNREACHABLE_ABBREV,
   FUNCTION_INST_GEP_ABBREV,
 };
 
 /// Abstract class to manage the bitcode writing, subclassed for each bitcode
 /// file type.
 class BitcodeWriterBase {
 protected:
   /// The stream created and owned by the client.
   BitstreamWriter &Stream;
 
   StringTableBuilder &StrtabBuilder;
 
 public:
   /// Constructs a BitcodeWriterBase object that writes to the provided
   /// \p Stream.
   BitcodeWriterBase(BitstreamWriter &Stream, StringTableBuilder &StrtabBuilder)
       : Stream(Stream), StrtabBuilder(StrtabBuilder) {}
 
 protected:
   void writeBitcodeHeader();
   void writeModuleVersion();
 };
 
 void BitcodeWriterBase::writeModuleVersion() {
   // VERSION: [version#]
   Stream.EmitRecord(bitc::MODULE_CODE_VERSION, ArrayRef<uint64_t>{2});
 }
 
 /// Base class to manage the module bitcode writing, currently subclassed for
 /// ModuleBitcodeWriter and ThinLinkBitcodeWriter.
 class ModuleBitcodeWriterBase : public BitcodeWriterBase {
 protected:
   /// The Module to write to bitcode.
   const Module &M;
 
   /// Enumerates ids for all values in the module.
   ValueEnumerator VE;
 
   /// Optional per-module index to write for ThinLTO.
   const ModuleSummaryIndex *Index;
 
   /// Map that holds the correspondence between GUIDs in the summary index,
   /// that came from indirect call profiles, and a value id generated by this
   /// class to use in the VST and summary block records.
   std::map<GlobalValue::GUID, unsigned> GUIDToValueIdMap;
 
   /// Tracks the last value id recorded in the GUIDToValueMap.
   unsigned GlobalValueId;
 
   /// Saves the offset of the VSTOffset record that must eventually be
   /// backpatched with the offset of the actual VST.
   uint64_t VSTOffsetPlaceholder = 0;
 
 public:
   /// Constructs a ModuleBitcodeWriterBase object for the given Module,
   /// writing to the provided \p Buffer.
   ModuleBitcodeWriterBase(const Module *M, StringTableBuilder &StrtabBuilder,
                           BitstreamWriter &Stream,
                           bool ShouldPreserveUseListOrder,
                           const ModuleSummaryIndex *Index)
       : BitcodeWriterBase(Stream, StrtabBuilder), M(*M),
         VE(*M, ShouldPreserveUseListOrder), Index(Index) {
     // Assign ValueIds to any callee values in the index that came from
     // indirect call profiles and were recorded as a GUID not a Value*
     // (which would have been assigned an ID by the ValueEnumerator).
     // The starting ValueId is just after the number of values in the
     // ValueEnumerator, so that they can be emitted in the VST.
     GlobalValueId = VE.getValues().size();
     if (!Index)
       return;
     for (const auto &GUIDSummaryLists : *Index)
       // Examine all summaries for this GUID.
       for (auto &Summary : GUIDSummaryLists.second.SummaryList)
         if (auto FS = dyn_cast<FunctionSummary>(Summary.get()))
           // For each call in the function summary, see if the call
           // is to a GUID (which means it is for an indirect call,
           // otherwise we would have a Value for it). If so, synthesize
           // a value id.
           for (auto &CallEdge : FS->calls())
             if (!CallEdge.first.getValue())
               assignValueId(CallEdge.first.getGUID());
   }
 
 protected:
   void writePerModuleGlobalValueSummary();
 
 private:
   void writePerModuleFunctionSummaryRecord(SmallVector<uint64_t, 64> &NameVals,
                                            GlobalValueSummary *Summary,
                                            unsigned ValueID,
                                            unsigned FSCallsAbbrev,
                                            unsigned FSCallsProfileAbbrev,
                                            const Function &F);
   void writeModuleLevelReferences(const GlobalVariable &V,
                                   SmallVector<uint64_t, 64> &NameVals,
                                   unsigned FSModRefsAbbrev);
 
   void assignValueId(GlobalValue::GUID ValGUID) {
     GUIDToValueIdMap[ValGUID] = ++GlobalValueId;
   }
 
   unsigned getValueId(GlobalValue::GUID ValGUID) {
     const auto &VMI = GUIDToValueIdMap.find(ValGUID);
     // Expect that any GUID value had a value Id assigned by an
     // earlier call to assignValueId.
     assert(VMI != GUIDToValueIdMap.end() &&
            "GUID does not have assigned value Id");
     return VMI->second;
   }
 
   // Helper to get the valueId for the type of value recorded in VI.
   unsigned getValueId(ValueInfo VI) {
     if (!VI.getValue())
       return getValueId(VI.getGUID());
     return VE.getValueID(VI.getValue());
   }
 
   std::map<GlobalValue::GUID, unsigned> &valueIds() { return GUIDToValueIdMap; }
 };
 
 /// Class to manage the bitcode writing for a module.
 class ModuleBitcodeWriter : public ModuleBitcodeWriterBase {
   /// Pointer to the buffer allocated by caller for bitcode writing.
   const SmallVectorImpl<char> &Buffer;
 
   /// True if a module hash record should be written.
   bool GenerateHash;
 
   /// If non-null, when GenerateHash is true, the resulting hash is written
   /// into ModHash.
   ModuleHash *ModHash;
 
   SHA1 Hasher;
 
   /// The start bit of the identification block.
   uint64_t BitcodeStartBit;
 
 public:
   /// Constructs a ModuleBitcodeWriter object for the given Module,
   /// writing to the provided \p Buffer.
   ModuleBitcodeWriter(const Module *M, SmallVectorImpl<char> &Buffer,
                       StringTableBuilder &StrtabBuilder,
                       BitstreamWriter &Stream, bool ShouldPreserveUseListOrder,
                       const ModuleSummaryIndex *Index, bool GenerateHash,
                       ModuleHash *ModHash = nullptr)
       : ModuleBitcodeWriterBase(M, StrtabBuilder, Stream,
                                 ShouldPreserveUseListOrder, Index),
         Buffer(Buffer), GenerateHash(GenerateHash), ModHash(ModHash),
         BitcodeStartBit(Stream.GetCurrentBitNo()) {}
 
   /// Emit the current module to the bitstream.
   void write();
 
 private:
   uint64_t bitcodeStartBit() { return BitcodeStartBit; }
 
   size_t addToStrtab(StringRef Str);
 
   void writeAttributeGroupTable();
   void writeAttributeTable();
   void writeTypeTable();
   void writeComdats();
   void writeValueSymbolTableForwardDecl();
   void writeModuleInfo();
   void writeValueAsMetadata(const ValueAsMetadata *MD,
                             SmallVectorImpl<uint64_t> &Record);
   void writeMDTuple(const MDTuple *N, SmallVectorImpl<uint64_t> &Record,
                     unsigned Abbrev);
   unsigned createDILocationAbbrev();
   void writeDILocation(const DILocation *N, SmallVectorImpl<uint64_t> &Record,
                        unsigned &Abbrev);
   unsigned createGenericDINodeAbbrev();
   void writeGenericDINode(const GenericDINode *N,
                           SmallVectorImpl<uint64_t> &Record, unsigned &Abbrev);
   void writeDISubrange(const DISubrange *N, SmallVectorImpl<uint64_t> &Record,
                        unsigned Abbrev);
   void writeDIEnumerator(const DIEnumerator *N,
                          SmallVectorImpl<uint64_t> &Record, unsigned Abbrev);
   void writeDIBasicType(const DIBasicType *N, SmallVectorImpl<uint64_t> &Record,
                         unsigned Abbrev);
   void writeDIDerivedType(const DIDerivedType *N,
                           SmallVectorImpl<uint64_t> &Record, unsigned Abbrev);
   void writeDICompositeType(const DICompositeType *N,
                             SmallVectorImpl<uint64_t> &Record, unsigned Abbrev);
   void writeDISubroutineType(const DISubroutineType *N,
                              SmallVectorImpl<uint64_t> &Record,
                              unsigned Abbrev);
   void writeDIFile(const DIFile *N, SmallVectorImpl<uint64_t> &Record,
                    unsigned Abbrev);
   void writeDICompileUnit(const DICompileUnit *N,
                           SmallVectorImpl<uint64_t> &Record, unsigned Abbrev);
   void writeDISubprogram(const DISubprogram *N,
                          SmallVectorImpl<uint64_t> &Record, unsigned Abbrev);
   void writeDILexicalBlock(const DILexicalBlock *N,
                            SmallVectorImpl<uint64_t> &Record, unsigned Abbrev);
   void writeDILexicalBlockFile(const DILexicalBlockFile *N,
                                SmallVectorImpl<uint64_t> &Record,
                                unsigned Abbrev);
   void writeDINamespace(const DINamespace *N, SmallVectorImpl<uint64_t> &Record,
                         unsigned Abbrev);
   void writeDIMacro(const DIMacro *N, SmallVectorImpl<uint64_t> &Record,
                     unsigned Abbrev);
   void writeDIMacroFile(const DIMacroFile *N, SmallVectorImpl<uint64_t> &Record,
                         unsigned Abbrev);
   void writeDIModule(const DIModule *N, SmallVectorImpl<uint64_t> &Record,
                      unsigned Abbrev);
   void writeDITemplateTypeParameter(const DITemplateTypeParameter *N,
                                     SmallVectorImpl<uint64_t> &Record,
                                     unsigned Abbrev);
   void writeDITemplateValueParameter(const DITemplateValueParameter *N,
                                      SmallVectorImpl<uint64_t> &Record,
                                      unsigned Abbrev);
   void writeDIGlobalVariable(const DIGlobalVariable *N,
                              SmallVectorImpl<uint64_t> &Record,
                              unsigned Abbrev);
   void writeDILocalVariable(const DILocalVariable *N,
                             SmallVectorImpl<uint64_t> &Record, unsigned Abbrev);
   void writeDIExpression(const DIExpression *N,
                          SmallVectorImpl<uint64_t> &Record, unsigned Abbrev);
   void writeDIGlobalVariableExpression(const DIGlobalVariableExpression *N,
                                        SmallVectorImpl<uint64_t> &Record,
                                        unsigned Abbrev);
   void writeDIObjCProperty(const DIObjCProperty *N,
                            SmallVectorImpl<uint64_t> &Record, unsigned Abbrev);
   void writeDIImportedEntity(const DIImportedEntity *N,
                              SmallVectorImpl<uint64_t> &Record,
                              unsigned Abbrev);
   unsigned createNamedMetadataAbbrev();
   void writeNamedMetadata(SmallVectorImpl<uint64_t> &Record);
   unsigned createMetadataStringsAbbrev();
   void writeMetadataStrings(ArrayRef<const Metadata *> Strings,
                             SmallVectorImpl<uint64_t> &Record);
   void writeMetadataRecords(ArrayRef<const Metadata *> MDs,
                             SmallVectorImpl<uint64_t> &Record,
                             std::vector<unsigned> *MDAbbrevs = nullptr,
                             std::vector<uint64_t> *IndexPos = nullptr);
   void writeModuleMetadata();
   void writeFunctionMetadata(const Function &F);
   void writeFunctionMetadataAttachment(const Function &F);
   void writeGlobalVariableMetadataAttachment(const GlobalVariable &GV);
   void pushGlobalMetadataAttachment(SmallVectorImpl<uint64_t> &Record,
                                     const GlobalObject &GO);
   void writeModuleMetadataKinds();
   void writeOperandBundleTags();
   void writeSyncScopeNames();
   void writeConstants(unsigned FirstVal, unsigned LastVal, bool isGlobal);
   void writeModuleConstants();
   bool pushValueAndType(const Value *V, unsigned InstID,
                         SmallVectorImpl<unsigned> &Vals);
   void writeOperandBundles(ImmutableCallSite CS, unsigned InstID);
   void pushValue(const Value *V, unsigned InstID,
                  SmallVectorImpl<unsigned> &Vals);
   void pushValueSigned(const Value *V, unsigned InstID,
                        SmallVectorImpl<uint64_t> &Vals);
   void writeInstruction(const Instruction &I, unsigned InstID,
                         SmallVectorImpl<unsigned> &Vals);
   void writeFunctionLevelValueSymbolTable(const ValueSymbolTable &VST);
   void writeGlobalValueSymbolTable(
       DenseMap<const Function *, uint64_t> &FunctionToBitcodeIndex);
   void writeUseList(UseListOrder &&Order);
   void writeUseListBlock(const Function *F);
   void
   writeFunction(const Function &F,
                 DenseMap<const Function *, uint64_t> &FunctionToBitcodeIndex);
   void writeBlockInfo();
   void writeModuleHash(size_t BlockStartPos);
 
   unsigned getEncodedSyncScopeID(SyncScope::ID SSID) {
     return unsigned(SSID);
   }
 };
 
 /// Class to manage the bitcode writing for a combined index.
 class IndexBitcodeWriter : public BitcodeWriterBase {
   /// The combined index to write to bitcode.
   const ModuleSummaryIndex &Index;
 
   /// When writing a subset of the index for distributed backends, client
   /// provides a map of modules to the corresponding GUIDs/summaries to write.
   const std::map<std::string, GVSummaryMapTy> *ModuleToSummariesForIndex;
 
   /// Map that holds the correspondence between the GUID used in the combined
   /// index and a value id generated by this class to use in references.
   std::map<GlobalValue::GUID, unsigned> GUIDToValueIdMap;
 
   /// Tracks the last value id recorded in the GUIDToValueMap.
   unsigned GlobalValueId = 0;
 
 public:
   /// Constructs a IndexBitcodeWriter object for the given combined index,
   /// writing to the provided \p Buffer. When writing a subset of the index
   /// for a distributed backend, provide a \p ModuleToSummariesForIndex map.
   IndexBitcodeWriter(BitstreamWriter &Stream, StringTableBuilder &StrtabBuilder,
                      const ModuleSummaryIndex &Index,
                      const std::map<std::string, GVSummaryMapTy>
                          *ModuleToSummariesForIndex = nullptr)
       : BitcodeWriterBase(Stream, StrtabBuilder), Index(Index),
         ModuleToSummariesForIndex(ModuleToSummariesForIndex) {
     // Assign unique value ids to all summaries to be written, for use
     // in writing out the call graph edges. Save the mapping from GUID
     // to the new global value id to use when writing those edges, which
     // are currently saved in the index in terms of GUID.
     forEachSummary([&](GVInfo I, bool) {
       GUIDToValueIdMap[I.first] = ++GlobalValueId;
     });
   }
 
   /// The below iterator returns the GUID and associated summary.
   using GVInfo = std::pair<GlobalValue::GUID, GlobalValueSummary *>;
 
   /// Calls the callback for each value GUID and summary to be written to
   /// bitcode. This hides the details of whether they are being pulled from the
   /// entire index or just those in a provided ModuleToSummariesForIndex map.
   template<typename Functor>
   void forEachSummary(Functor Callback) {
     if (ModuleToSummariesForIndex) {
       for (auto &M : *ModuleToSummariesForIndex)
         for (auto &Summary : M.second) {
           Callback(Summary, false);
           // Ensure aliasee is handled, e.g. for assigning a valueId,
           // even if we are not importing the aliasee directly (the
           // imported alias will contain a copy of aliasee).
           if (auto *AS = dyn_cast<AliasSummary>(Summary.getSecond()))
             Callback({AS->getAliaseeGUID(), &AS->getAliasee()}, true);
         }
     } else {
       for (auto &Summaries : Index)
         for (auto &Summary : Summaries.second.SummaryList)
           Callback({Summaries.first, Summary.get()}, false);
     }
   }
 
   /// Calls the callback for each entry in the modulePaths StringMap that
   /// should be written to the module path string table. This hides the details
   /// of whether they are being pulled from the entire index or just those in a
   /// provided ModuleToSummariesForIndex map.
   template <typename Functor> void forEachModule(Functor Callback) {
     if (ModuleToSummariesForIndex) {
       for (const auto &M : *ModuleToSummariesForIndex) {
         const auto &MPI = Index.modulePaths().find(M.first);
         if (MPI == Index.modulePaths().end()) {
           // This should only happen if the bitcode file was empty, in which
           // case we shouldn't be importing (the ModuleToSummariesForIndex
           // would only include the module we are writing and index for).
           assert(ModuleToSummariesForIndex->size() == 1);
           continue;
         }
         Callback(*MPI);
       }
     } else {
       for (const auto &MPSE : Index.modulePaths())
         Callback(MPSE);
     }
   }
 
   /// Main entry point for writing a combined index to bitcode.
   void write();
 
 private:
   void writeModStrings();
   void writeCombinedGlobalValueSummary();
 
   Optional<unsigned> getValueId(GlobalValue::GUID ValGUID) {
     auto VMI = GUIDToValueIdMap.find(ValGUID);
     if (VMI == GUIDToValueIdMap.end())
       return None;
     return VMI->second;
   }
 
   std::map<GlobalValue::GUID, unsigned> &valueIds() { return GUIDToValueIdMap; }
 };
 
 } // end anonymous namespace
 
 static unsigned getEncodedCastOpcode(unsigned Opcode) {
   switch (Opcode) {
   default: llvm_unreachable("Unknown cast instruction!");
   case Instruction::Trunc   : return bitc::CAST_TRUNC;
   case Instruction::ZExt    : return bitc::CAST_ZEXT;
   case Instruction::SExt    : return bitc::CAST_SEXT;
   case Instruction::FPToUI  : return bitc::CAST_FPTOUI;
   case Instruction::FPToSI  : return bitc::CAST_FPTOSI;
   case Instruction::UIToFP  : return bitc::CAST_UITOFP;
   case Instruction::SIToFP  : return bitc::CAST_SITOFP;
   case Instruction::FPTrunc : return bitc::CAST_FPTRUNC;
   case Instruction::FPExt   : return bitc::CAST_FPEXT;
   case Instruction::PtrToInt: return bitc::CAST_PTRTOINT;
   case Instruction::IntToPtr: return bitc::CAST_INTTOPTR;
   case Instruction::BitCast : return bitc::CAST_BITCAST;
   case Instruction::AddrSpaceCast: return bitc::CAST_ADDRSPACECAST;
   }
 }
 
 static unsigned getEncodedBinaryOpcode(unsigned Opcode) {
   switch (Opcode) {
   default: llvm_unreachable("Unknown binary instruction!");
   case Instruction::Add:
   case Instruction::FAdd: return bitc::BINOP_ADD;
   case Instruction::Sub:
   case Instruction::FSub: return bitc::BINOP_SUB;
   case Instruction::Mul:
   case Instruction::FMul: return bitc::BINOP_MUL;
   case Instruction::UDiv: return bitc::BINOP_UDIV;
   case Instruction::FDiv:
   case Instruction::SDiv: return bitc::BINOP_SDIV;
   case Instruction::URem: return bitc::BINOP_UREM;
   case Instruction::FRem:
   case Instruction::SRem: return bitc::BINOP_SREM;
   case Instruction::Shl:  return bitc::BINOP_SHL;
   case Instruction::LShr: return bitc::BINOP_LSHR;
   case Instruction::AShr: return bitc::BINOP_ASHR;
   case Instruction::And:  return bitc::BINOP_AND;
   case Instruction::Or:   return bitc::BINOP_OR;
   case Instruction::Xor:  return bitc::BINOP_XOR;
   }
 }
 
 static unsigned getEncodedRMWOperation(AtomicRMWInst::BinOp Op) {
   switch (Op) {
   default: llvm_unreachable("Unknown RMW operation!");
   case AtomicRMWInst::Xchg: return bitc::RMW_XCHG;
   case AtomicRMWInst::Add: return bitc::RMW_ADD;
   case AtomicRMWInst::Sub: return bitc::RMW_SUB;
   case AtomicRMWInst::And: return bitc::RMW_AND;
   case AtomicRMWInst::Nand: return bitc::RMW_NAND;
   case AtomicRMWInst::Or: return bitc::RMW_OR;
   case AtomicRMWInst::Xor: return bitc::RMW_XOR;
   case AtomicRMWInst::Max: return bitc::RMW_MAX;
   case AtomicRMWInst::Min: return bitc::RMW_MIN;
   case AtomicRMWInst::UMax: return bitc::RMW_UMAX;
   case AtomicRMWInst::UMin: return bitc::RMW_UMIN;
   }
 }
 
 static unsigned getEncodedOrdering(AtomicOrdering Ordering) {
   switch (Ordering) {
   case AtomicOrdering::NotAtomic: return bitc::ORDERING_NOTATOMIC;
   case AtomicOrdering::Unordered: return bitc::ORDERING_UNORDERED;
   case AtomicOrdering::Monotonic: return bitc::ORDERING_MONOTONIC;
   case AtomicOrdering::Acquire: return bitc::ORDERING_ACQUIRE;
   case AtomicOrdering::Release: return bitc::ORDERING_RELEASE;
   case AtomicOrdering::AcquireRelease: return bitc::ORDERING_ACQREL;
   case AtomicOrdering::SequentiallyConsistent: return bitc::ORDERING_SEQCST;
   }
   llvm_unreachable("Invalid ordering");
 }
 
 static void writeStringRecord(BitstreamWriter &Stream, unsigned Code,
                               StringRef Str, unsigned AbbrevToUse) {
   SmallVector<unsigned, 64> Vals;
 
   // Code: [strchar x N]
   for (unsigned i = 0, e = Str.size(); i != e; ++i) {
     if (AbbrevToUse && !BitCodeAbbrevOp::isChar6(Str[i]))
       AbbrevToUse = 0;
     Vals.push_back(Str[i]);
   }
 
   // Emit the finished record.
   Stream.EmitRecord(Code, Vals, AbbrevToUse);
 }
 
 static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
   switch (Kind) {
   case Attribute::Alignment:
     return bitc::ATTR_KIND_ALIGNMENT;
   case Attribute::AllocSize:
     return bitc::ATTR_KIND_ALLOC_SIZE;
   case Attribute::AlwaysInline:
     return bitc::ATTR_KIND_ALWAYS_INLINE;
   case Attribute::ArgMemOnly:
     return bitc::ATTR_KIND_ARGMEMONLY;
   case Attribute::Builtin:
     return bitc::ATTR_KIND_BUILTIN;
   case Attribute::ByVal:
     return bitc::ATTR_KIND_BY_VAL;
   case Attribute::Convergent:
     return bitc::ATTR_KIND_CONVERGENT;
   case Attribute::InAlloca:
     return bitc::ATTR_KIND_IN_ALLOCA;
   case Attribute::Cold:
     return bitc::ATTR_KIND_COLD;
   case Attribute::InaccessibleMemOnly:
     return bitc::ATTR_KIND_INACCESSIBLEMEM_ONLY;
   case Attribute::InaccessibleMemOrArgMemOnly:
     return bitc::ATTR_KIND_INACCESSIBLEMEM_OR_ARGMEMONLY;
   case Attribute::InlineHint:
     return bitc::ATTR_KIND_INLINE_HINT;
   case Attribute::InReg:
     return bitc::ATTR_KIND_IN_REG;
   case Attribute::JumpTable:
     return bitc::ATTR_KIND_JUMP_TABLE;
   case Attribute::MinSize:
     return bitc::ATTR_KIND_MIN_SIZE;
   case Attribute::Naked:
     return bitc::ATTR_KIND_NAKED;
   case Attribute::Nest:
     return bitc::ATTR_KIND_NEST;
   case Attribute::NoAlias:
     return bitc::ATTR_KIND_NO_ALIAS;
   case Attribute::NoBuiltin:
     return bitc::ATTR_KIND_NO_BUILTIN;
   case Attribute::NoCapture:
     return bitc::ATTR_KIND_NO_CAPTURE;
   case Attribute::NoDuplicate:
     return bitc::ATTR_KIND_NO_DUPLICATE;
   case Attribute::NoImplicitFloat:
     return bitc::ATTR_KIND_NO_IMPLICIT_FLOAT;
   case Attribute::NoInline:
     return bitc::ATTR_KIND_NO_INLINE;
   case Attribute::NoRecurse:
     return bitc::ATTR_KIND_NO_RECURSE;
   case Attribute::NonLazyBind:
     return bitc::ATTR_KIND_NON_LAZY_BIND;
   case Attribute::NonNull:
     return bitc::ATTR_KIND_NON_NULL;
   case Attribute::Dereferenceable:
     return bitc::ATTR_KIND_DEREFERENCEABLE;
   case Attribute::DereferenceableOrNull:
     return bitc::ATTR_KIND_DEREFERENCEABLE_OR_NULL;
   case Attribute::NoRedZone:
     return bitc::ATTR_KIND_NO_RED_ZONE;
   case Attribute::NoReturn:
     return bitc::ATTR_KIND_NO_RETURN;
   case Attribute::NoUnwind:
     return bitc::ATTR_KIND_NO_UNWIND;
   case Attribute::OptimizeForSize:
     return bitc::ATTR_KIND_OPTIMIZE_FOR_SIZE;
   case Attribute::OptimizeNone:
     return bitc::ATTR_KIND_OPTIMIZE_NONE;
   case Attribute::ReadNone:
     return bitc::ATTR_KIND_READ_NONE;
   case Attribute::ReadOnly:
     return bitc::ATTR_KIND_READ_ONLY;
   case Attribute::Returned:
     return bitc::ATTR_KIND_RETURNED;
   case Attribute::ReturnsTwice:
     return bitc::ATTR_KIND_RETURNS_TWICE;
   case Attribute::SExt:
     return bitc::ATTR_KIND_S_EXT;
   case Attribute::Speculatable:
     return bitc::ATTR_KIND_SPECULATABLE;
   case Attribute::StackAlignment:
     return bitc::ATTR_KIND_STACK_ALIGNMENT;
   case Attribute::StackProtect:
     return bitc::ATTR_KIND_STACK_PROTECT;
   case Attribute::StackProtectReq:
     return bitc::ATTR_KIND_STACK_PROTECT_REQ;
   case Attribute::StackProtectStrong:
     return bitc::ATTR_KIND_STACK_PROTECT_STRONG;
   case Attribute::SafeStack:
     return bitc::ATTR_KIND_SAFESTACK;
   case Attribute::StrictFP:
     return bitc::ATTR_KIND_STRICT_FP;
   case Attribute::StructRet:
     return bitc::ATTR_KIND_STRUCT_RET;
   case Attribute::SanitizeAddress:
     return bitc::ATTR_KIND_SANITIZE_ADDRESS;
   case Attribute::SanitizeHWAddress:
     return bitc::ATTR_KIND_SANITIZE_HWADDRESS;
   case Attribute::SanitizeThread:
     return bitc::ATTR_KIND_SANITIZE_THREAD;
   case Attribute::SanitizeMemory:
     return bitc::ATTR_KIND_SANITIZE_MEMORY;
   case Attribute::SwiftError:
     return bitc::ATTR_KIND_SWIFT_ERROR;
   case Attribute::SwiftSelf:
     return bitc::ATTR_KIND_SWIFT_SELF;
   case Attribute::UWTable:
     return bitc::ATTR_KIND_UW_TABLE;
   case Attribute::WriteOnly:
     return bitc::ATTR_KIND_WRITEONLY;
   case Attribute::ZExt:
     return bitc::ATTR_KIND_Z_EXT;
   case Attribute::EndAttrKinds:
     llvm_unreachable("Can not encode end-attribute kinds marker.");
   case Attribute::None:
     llvm_unreachable("Can not encode none-attribute.");
   }
 
   llvm_unreachable("Trying to encode unknown attribute");
 }
 
 void ModuleBitcodeWriter::writeAttributeGroupTable() {
   const std::vector<ValueEnumerator::IndexAndAttrSet> &AttrGrps =
       VE.getAttributeGroups();
   if (AttrGrps.empty()) return;
 
   Stream.EnterSubblock(bitc::PARAMATTR_GROUP_BLOCK_ID, 3);
 
   SmallVector<uint64_t, 64> Record;
   for (ValueEnumerator::IndexAndAttrSet Pair : AttrGrps) {
     unsigned AttrListIndex = Pair.first;
     AttributeSet AS = Pair.second;
     Record.push_back(VE.getAttributeGroupID(Pair));
     Record.push_back(AttrListIndex);
 
     for (Attribute Attr : AS) {
       if (Attr.isEnumAttribute()) {
         Record.push_back(0);
         Record.push_back(getAttrKindEncoding(Attr.getKindAsEnum()));
       } else if (Attr.isIntAttribute()) {
         Record.push_back(1);
         Record.push_back(getAttrKindEncoding(Attr.getKindAsEnum()));
         Record.push_back(Attr.getValueAsInt());
       } else {
         StringRef Kind = Attr.getKindAsString();
         StringRef Val = Attr.getValueAsString();
 
         Record.push_back(Val.empty() ? 3 : 4);
         Record.append(Kind.begin(), Kind.end());
         Record.push_back(0);
         if (!Val.empty()) {
           Record.append(Val.begin(), Val.end());
           Record.push_back(0);
         }
       }
     }
 
     Stream.EmitRecord(bitc::PARAMATTR_GRP_CODE_ENTRY, Record);
     Record.clear();
   }
 
   Stream.ExitBlock();
 }
 
 void ModuleBitcodeWriter::writeAttributeTable() {
   const std::vector<AttributeList> &Attrs = VE.getAttributeLists();
   if (Attrs.empty()) return;
 
   Stream.EnterSubblock(bitc::PARAMATTR_BLOCK_ID, 3);
 
   SmallVector<uint64_t, 64> Record;
   for (unsigned i = 0, e = Attrs.size(); i != e; ++i) {
     AttributeList AL = Attrs[i];
     for (unsigned i = AL.index_begin(), e = AL.index_end(); i != e; ++i) {
       AttributeSet AS = AL.getAttributes(i);
       if (AS.hasAttributes())
         Record.push_back(VE.getAttributeGroupID({i, AS}));
     }
 
     Stream.EmitRecord(bitc::PARAMATTR_CODE_ENTRY, Record);
     Record.clear();
   }
 
   Stream.ExitBlock();
 }
 
 /// WriteTypeTable - Write out the type table for a module.
 void ModuleBitcodeWriter::writeTypeTable() {
   const ValueEnumerator::TypeList &TypeList = VE.getTypes();
 
   Stream.EnterSubblock(bitc::TYPE_BLOCK_ID_NEW, 4 /*count from # abbrevs */);
   SmallVector<uint64_t, 64> TypeVals;
 
   uint64_t NumBits = VE.computeBitsRequiredForTypeIndicies();
 
   // Abbrev for TYPE_CODE_POINTER.
   auto Abbv = std::make_shared<BitCodeAbbrev>();
   Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_POINTER));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
   Abbv->Add(BitCodeAbbrevOp(0));  // Addrspace = 0
   unsigned PtrAbbrev = Stream.EmitAbbrev(std::move(Abbv));
 
   // Abbrev for TYPE_CODE_FUNCTION.
   Abbv = std::make_shared<BitCodeAbbrev>();
   Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_FUNCTION));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));  // isvararg
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
   unsigned FunctionAbbrev = Stream.EmitAbbrev(std::move(Abbv));
 
   // Abbrev for TYPE_CODE_STRUCT_ANON.
   Abbv = std::make_shared<BitCodeAbbrev>();
   Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_ANON));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));  // ispacked
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
   unsigned StructAnonAbbrev = Stream.EmitAbbrev(std::move(Abbv));
 
   // Abbrev for TYPE_CODE_STRUCT_NAME.
   Abbv = std::make_shared<BitCodeAbbrev>();
   Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_NAME));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
   unsigned StructNameAbbrev = Stream.EmitAbbrev(std::move(Abbv));
 
   // Abbrev for TYPE_CODE_STRUCT_NAMED.
   Abbv = std::make_shared<BitCodeAbbrev>();
   Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_NAMED));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));  // ispacked
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
   unsigned StructNamedAbbrev = Stream.EmitAbbrev(std::move(Abbv));
 
   // Abbrev for TYPE_CODE_ARRAY.
   Abbv = std::make_shared<BitCodeAbbrev>();
   Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_ARRAY));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // size
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
   unsigned ArrayAbbrev = Stream.EmitAbbrev(std::move(Abbv));
 
   // Emit an entry count so the reader can reserve space.
   TypeVals.push_back(TypeList.size());
   Stream.EmitRecord(bitc::TYPE_CODE_NUMENTRY, TypeVals);
   TypeVals.clear();
 
   // Loop over all of the types, emitting each in turn.
   for (unsigned i = 0, e = TypeList.size(); i != e; ++i) {
     Type *T = TypeList[i];
     int AbbrevToUse = 0;
     unsigned Code = 0;
 
     switch (T->getTypeID()) {
     case Type::VoidTyID:      Code = bitc::TYPE_CODE_VOID;      break;
     case Type::HalfTyID:      Code = bitc::TYPE_CODE_HALF;      break;
     case Type::FloatTyID:     Code = bitc::TYPE_CODE_FLOAT;     break;
     case Type::DoubleTyID:    Code = bitc::TYPE_CODE_DOUBLE;    break;
     case Type::X86_FP80TyID:  Code = bitc::TYPE_CODE_X86_FP80;  break;
     case Type::FP128TyID:     Code = bitc::TYPE_CODE_FP128;     break;
     case Type::PPC_FP128TyID: Code = bitc::TYPE_CODE_PPC_FP128; break;
     case Type::LabelTyID:     Code = bitc::TYPE_CODE_LABEL;     break;
     case Type::MetadataTyID:  Code = bitc::TYPE_CODE_METADATA;  break;
     case Type::X86_MMXTyID:   Code = bitc::TYPE_CODE_X86_MMX;   break;
     case Type::TokenTyID:     Code = bitc::TYPE_CODE_TOKEN;     break;
     case Type::IntegerTyID:
       // INTEGER: [width]
       Code = bitc::TYPE_CODE_INTEGER;
       TypeVals.push_back(cast<IntegerType>(T)->getBitWidth());
       break;
     case Type::PointerTyID: {
       PointerType *PTy = cast<PointerType>(T);
       // POINTER: [pointee type, address space]
       Code = bitc::TYPE_CODE_POINTER;
       TypeVals.push_back(VE.getTypeID(PTy->getElementType()));
       unsigned AddressSpace = PTy->getAddressSpace();
       TypeVals.push_back(AddressSpace);
       if (AddressSpace == 0) AbbrevToUse = PtrAbbrev;
       break;
     }
     case Type::FunctionTyID: {
       FunctionType *FT = cast<FunctionType>(T);
       // FUNCTION: [isvararg, retty, paramty x N]
       Code = bitc::TYPE_CODE_FUNCTION;
       TypeVals.push_back(FT->isVarArg());
       TypeVals.push_back(VE.getTypeID(FT->getReturnType()));
       for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i)
         TypeVals.push_back(VE.getTypeID(FT->getParamType(i)));
       AbbrevToUse = FunctionAbbrev;
       break;
     }
     case Type::StructTyID: {
       StructType *ST = cast<StructType>(T);
       // STRUCT: [ispacked, eltty x N]
       TypeVals.push_back(ST->isPacked());
       // Output all of the element types.
       for (StructType::element_iterator I = ST->element_begin(),
            E = ST->element_end(); I != E; ++I)
         TypeVals.push_back(VE.getTypeID(*I));
 
       if (ST->isLiteral()) {
         Code = bitc::TYPE_CODE_STRUCT_ANON;
         AbbrevToUse = StructAnonAbbrev;
       } else {
         if (ST->isOpaque()) {
           Code = bitc::TYPE_CODE_OPAQUE;
         } else {
           Code = bitc::TYPE_CODE_STRUCT_NAMED;
           AbbrevToUse = StructNamedAbbrev;
         }
 
         // Emit the name if it is present.
         if (!ST->getName().empty())
           writeStringRecord(Stream, bitc::TYPE_CODE_STRUCT_NAME, ST->getName(),
                             StructNameAbbrev);
       }
       break;
     }
     case Type::ArrayTyID: {
       ArrayType *AT = cast<ArrayType>(T);
       // ARRAY: [numelts, eltty]
       Code = bitc::TYPE_CODE_ARRAY;
       TypeVals.push_back(AT->getNumElements());
       TypeVals.push_back(VE.getTypeID(AT->getElementType()));
       AbbrevToUse = ArrayAbbrev;
       break;
     }
     case Type::VectorTyID: {
       VectorType *VT = cast<VectorType>(T);
       // VECTOR [numelts, eltty]
       Code = bitc::TYPE_CODE_VECTOR;
       TypeVals.push_back(VT->getNumElements());
       TypeVals.push_back(VE.getTypeID(VT->getElementType()));
       break;
     }
     }
 
     // Emit the finished record.
     Stream.EmitRecord(Code, TypeVals, AbbrevToUse);
     TypeVals.clear();
   }
 
   Stream.ExitBlock();
 }
 
 static unsigned getEncodedLinkage(const GlobalValue::LinkageTypes Linkage) {
   switch (Linkage) {
   case GlobalValue::ExternalLinkage:
     return 0;
   case GlobalValue::WeakAnyLinkage:
     return 16;
   case GlobalValue::AppendingLinkage:
     return 2;
   case GlobalValue::InternalLinkage:
     return 3;
   case GlobalValue::LinkOnceAnyLinkage:
     return 18;
   case GlobalValue::ExternalWeakLinkage:
     return 7;
   case GlobalValue::CommonLinkage:
     return 8;
   case GlobalValue::PrivateLinkage:
     return 9;
   case GlobalValue::WeakODRLinkage:
     return 17;
   case GlobalValue::LinkOnceODRLinkage:
     return 19;
   case GlobalValue::AvailableExternallyLinkage:
     return 12;
   }
   llvm_unreachable("Invalid linkage");
 }
 
 static unsigned getEncodedLinkage(const GlobalValue &GV) {
   return getEncodedLinkage(GV.getLinkage());
 }
 
 static uint64_t getEncodedFFlags(FunctionSummary::FFlags Flags) {
   uint64_t RawFlags = 0;
   RawFlags |= Flags.ReadNone;
   RawFlags |= (Flags.ReadOnly << 1);
   RawFlags |= (Flags.NoRecurse << 2);
   RawFlags |= (Flags.ReturnDoesNotAlias << 3);
   return RawFlags;
 }
 
 // Decode the flags for GlobalValue in the summary
 static uint64_t getEncodedGVSummaryFlags(GlobalValueSummary::GVFlags Flags) {
   uint64_t RawFlags = 0;
 
   RawFlags |= Flags.NotEligibleToImport; // bool
   RawFlags |= (Flags.Live << 1);
   RawFlags |= (Flags.DSOLocal << 2);
 
   // Linkage don't need to be remapped at that time for the summary. Any future
   // change to the getEncodedLinkage() function will need to be taken into
   // account here as well.
   RawFlags = (RawFlags << 4) | Flags.Linkage; // 4 bits
 
   return RawFlags;
 }
 
 static unsigned getEncodedVisibility(const GlobalValue &GV) {
   switch (GV.getVisibility()) {
   case GlobalValue::DefaultVisibility:   return 0;
   case GlobalValue::HiddenVisibility:    return 1;
   case GlobalValue::ProtectedVisibility: return 2;
   }
   llvm_unreachable("Invalid visibility");
 }
 
 static unsigned getEncodedDLLStorageClass(const GlobalValue &GV) {
   switch (GV.getDLLStorageClass()) {
   case GlobalValue::DefaultStorageClass:   return 0;
   case GlobalValue::DLLImportStorageClass: return 1;
   case GlobalValue::DLLExportStorageClass: return 2;
   }
   llvm_unreachable("Invalid DLL storage class");
 }
 
 static unsigned getEncodedThreadLocalMode(const GlobalValue &GV) {
   switch (GV.getThreadLocalMode()) {
     case GlobalVariable::NotThreadLocal:         return 0;
     case GlobalVariable::GeneralDynamicTLSModel: return 1;
     case GlobalVariable::LocalDynamicTLSModel:   return 2;
     case GlobalVariable::InitialExecTLSModel:    return 3;
     case GlobalVariable::LocalExecTLSModel:      return 4;
   }
   llvm_unreachable("Invalid TLS model");
 }
 
 static unsigned getEncodedComdatSelectionKind(const Comdat &C) {
   switch (C.getSelectionKind()) {
   case Comdat::Any:
     return bitc::COMDAT_SELECTION_KIND_ANY;
   case Comdat::ExactMatch:
     return bitc::COMDAT_SELECTION_KIND_EXACT_MATCH;
   case Comdat::Largest:
     return bitc::COMDAT_SELECTION_KIND_LARGEST;
   case Comdat::NoDuplicates:
     return bitc::COMDAT_SELECTION_KIND_NO_DUPLICATES;
   case Comdat::SameSize:
     return bitc::COMDAT_SELECTION_KIND_SAME_SIZE;
   }
   llvm_unreachable("Invalid selection kind");
 }
 
 static unsigned getEncodedUnnamedAddr(const GlobalValue &GV) {
   switch (GV.getUnnamedAddr()) {
   case GlobalValue::UnnamedAddr::None:   return 0;
   case GlobalValue::UnnamedAddr::Local:  return 2;
   case GlobalValue::UnnamedAddr::Global: return 1;
   }
   llvm_unreachable("Invalid unnamed_addr");
 }
 
 size_t ModuleBitcodeWriter::addToStrtab(StringRef Str) {
   if (GenerateHash)
     Hasher.update(Str);
   return StrtabBuilder.add(Str);
 }
 
 void ModuleBitcodeWriter::writeComdats() {
   SmallVector<unsigned, 64> Vals;
   for (const Comdat *C : VE.getComdats()) {
     // COMDAT: [strtab offset, strtab size, selection_kind]
     Vals.push_back(addToStrtab(C->getName()));
     Vals.push_back(C->getName().size());
     Vals.push_back(getEncodedComdatSelectionKind(*C));
     Stream.EmitRecord(bitc::MODULE_CODE_COMDAT, Vals, /*AbbrevToUse=*/0);
     Vals.clear();
   }
 }
 
 /// Write a record that will eventually hold the word offset of the
 /// module-level VST. For now the offset is 0, which will be backpatched
 /// after the real VST is written. Saves the bit offset to backpatch.
 void ModuleBitcodeWriter::writeValueSymbolTableForwardDecl() {
   // Write a placeholder value in for the offset of the real VST,
   // which is written after the function blocks so that it can include
   // the offset of each function. The placeholder offset will be
   // updated when the real VST is written.
   auto Abbv = std::make_shared<BitCodeAbbrev>();
   Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_VSTOFFSET));
   // Blocks are 32-bit aligned, so we can use a 32-bit word offset to
   // hold the real VST offset. Must use fixed instead of VBR as we don't
   // know how many VBR chunks to reserve ahead of time.
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
   unsigned VSTOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbv));
 
   // Emit the placeholder
   uint64_t Vals[] = {bitc::MODULE_CODE_VSTOFFSET, 0};
   Stream.EmitRecordWithAbbrev(VSTOffsetAbbrev, Vals);
 
   // Compute and save the bit offset to the placeholder, which will be
   // patched when the real VST is written. We can simply subtract the 32-bit
   // fixed size from the current bit number to get the location to backpatch.
   VSTOffsetPlaceholder = Stream.GetCurrentBitNo() - 32;
 }
 
 enum StringEncoding { SE_Char6, SE_Fixed7, SE_Fixed8 };
 
 /// Determine the encoding to use for the given string name and length.
 static StringEncoding getStringEncoding(StringRef Str) {
   bool isChar6 = true;
   for (char C : Str) {
     if (isChar6)
       isChar6 = BitCodeAbbrevOp::isChar6(C);
     if ((unsigned char)C & 128)
       // don't bother scanning the rest.
       return SE_Fixed8;
   }
   if (isChar6)
     return SE_Char6;
   return SE_Fixed7;
 }
 
 /// Emit top-level description of module, including target triple, inline asm,
 /// descriptors for global variables, and function prototype info.
 /// Returns the bit offset to backpatch with the location of the real VST.
 void ModuleBitcodeWriter::writeModuleInfo() {
   // Emit various pieces of data attached to a module.
   if (!M.getTargetTriple().empty())
     writeStringRecord(Stream, bitc::MODULE_CODE_TRIPLE, M.getTargetTriple(),
                       0 /*TODO*/);
   const std::string &DL = M.getDataLayoutStr();
   if (!DL.empty())
     writeStringRecord(Stream, bitc::MODULE_CODE_DATALAYOUT, DL, 0 /*TODO*/);
   if (!M.getModuleInlineAsm().empty())
     writeStringRecord(Stream, bitc::MODULE_CODE_ASM, M.getModuleInlineAsm(),
                       0 /*TODO*/);
 
   // Emit information about sections and GC, computing how many there are. Also
   // compute the maximum alignment value.
   std::map<std::string, unsigned> SectionMap;
   std::map<std::string, unsigned> GCMap;
   unsigned MaxAlignment = 0;
   unsigned MaxGlobalType = 0;
   for (const GlobalValue &GV : M.globals()) {
     MaxAlignment = std::max(MaxAlignment, GV.getAlignment());
     MaxGlobalType = std::max(MaxGlobalType, VE.getTypeID(GV.getValueType()));
     if (GV.hasSection()) {
       // Give section names unique ID's.
       unsigned &Entry = SectionMap[GV.getSection()];
       if (!Entry) {
         writeStringRecord(Stream, bitc::MODULE_CODE_SECTIONNAME, GV.getSection(),
                           0 /*TODO*/);
         Entry = SectionMap.size();
       }
     }
   }
   for (const Function &F : M) {
     MaxAlignment = std::max(MaxAlignment, F.getAlignment());
     if (F.hasSection()) {
       // Give section names unique ID's.
       unsigned &Entry = SectionMap[F.getSection()];
       if (!Entry) {
         writeStringRecord(Stream, bitc::MODULE_CODE_SECTIONNAME, F.getSection(),
                           0 /*TODO*/);
         Entry = SectionMap.size();
       }
     }
     if (F.hasGC()) {
       // Same for GC names.
       unsigned &Entry = GCMap[F.getGC()];
       if (!Entry) {
         writeStringRecord(Stream, bitc::MODULE_CODE_GCNAME, F.getGC(),
                           0 /*TODO*/);
         Entry = GCMap.size();
       }
     }
   }
 
   // Emit abbrev for globals, now that we know # sections and max alignment.
   unsigned SimpleGVarAbbrev = 0;
   if (!M.global_empty()) {
     // Add an abbrev for common globals with no visibility or thread localness.
     auto Abbv = std::make_shared<BitCodeAbbrev>();
     Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_GLOBALVAR));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
                               Log2_32_Ceil(MaxGlobalType+1)));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));   // AddrSpace << 2
                                                            //| explicitType << 1
                                                            //| constant
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));   // Initializer.
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 5)); // Linkage.
     if (MaxAlignment == 0)                                 // Alignment.
       Abbv->Add(BitCodeAbbrevOp(0));
     else {
       unsigned MaxEncAlignment = Log2_32(MaxAlignment)+1;
       Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
                                Log2_32_Ceil(MaxEncAlignment+1)));
     }
     if (SectionMap.empty())                                    // Section.
       Abbv->Add(BitCodeAbbrevOp(0));
     else
       Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
                                Log2_32_Ceil(SectionMap.size()+1)));
     // Don't bother emitting vis + thread local.
     SimpleGVarAbbrev = Stream.EmitAbbrev(std::move(Abbv));
   }
 
   SmallVector<unsigned, 64> Vals;
   // Emit the module's source file name.
   {
     StringEncoding Bits = getStringEncoding(M.getSourceFileName());
     BitCodeAbbrevOp AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8);
     if (Bits == SE_Char6)
       AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Char6);
     else if (Bits == SE_Fixed7)
       AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7);
 
     // MODULE_CODE_SOURCE_FILENAME: [namechar x N]
     auto Abbv = std::make_shared<BitCodeAbbrev>();
     Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_SOURCE_FILENAME));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
     Abbv->Add(AbbrevOpToUse);
     unsigned FilenameAbbrev = Stream.EmitAbbrev(std::move(Abbv));
 
     for (const auto P : M.getSourceFileName())
       Vals.push_back((unsigned char)P);
 
     // Emit the finished record.
     Stream.EmitRecord(bitc::MODULE_CODE_SOURCE_FILENAME, Vals, FilenameAbbrev);
     Vals.clear();
   }
 
   // Emit the global variable information.
   for (const GlobalVariable &GV : M.globals()) {
     unsigned AbbrevToUse = 0;
 
     // GLOBALVAR: [strtab offset, strtab size, type, isconst, initid,
     //             linkage, alignment, section, visibility, threadlocal,
     //             unnamed_addr, externally_initialized, dllstorageclass,
     //             comdat, attributes, DSO_Local]
     Vals.push_back(addToStrtab(GV.getName()));
     Vals.push_back(GV.getName().size());
     Vals.push_back(VE.getTypeID(GV.getValueType()));
     Vals.push_back(GV.getType()->getAddressSpace() << 2 | 2 | GV.isConstant());
     Vals.push_back(GV.isDeclaration() ? 0 :
                    (VE.getValueID(GV.getInitializer()) + 1));
     Vals.push_back(getEncodedLinkage(GV));
     Vals.push_back(Log2_32(GV.getAlignment())+1);
     Vals.push_back(GV.hasSection() ? SectionMap[GV.getSection()] : 0);
     if (GV.isThreadLocal() ||
         GV.getVisibility() != GlobalValue::DefaultVisibility ||
         GV.getUnnamedAddr() != GlobalValue::UnnamedAddr::None ||
         GV.isExternallyInitialized() ||
         GV.getDLLStorageClass() != GlobalValue::DefaultStorageClass ||
         GV.hasComdat() ||
         GV.hasAttributes() ||
         GV.isDSOLocal()) {
       Vals.push_back(getEncodedVisibility(GV));
       Vals.push_back(getEncodedThreadLocalMode(GV));
       Vals.push_back(getEncodedUnnamedAddr(GV));
       Vals.push_back(GV.isExternallyInitialized());
       Vals.push_back(getEncodedDLLStorageClass(GV));
       Vals.push_back(GV.hasComdat() ? VE.getComdatID(GV.getComdat()) : 0);
 
       auto AL = GV.getAttributesAsList(AttributeList::FunctionIndex);
       Vals.push_back(VE.getAttributeListID(AL));
 
       Vals.push_back(GV.isDSOLocal());
     } else {
       AbbrevToUse = SimpleGVarAbbrev;
     }
 
     Stream.EmitRecord(bitc::MODULE_CODE_GLOBALVAR, Vals, AbbrevToUse);
     Vals.clear();
   }
 
   // Emit the function proto information.
   for (const Function &F : M) {
     // FUNCTION:  [strtab offset, strtab size, type, callingconv, isproto,
     //             linkage, paramattrs, alignment, section, visibility, gc,
     //             unnamed_addr, prologuedata, dllstorageclass, comdat,
     //             prefixdata, personalityfn, DSO_Local]
     Vals.push_back(addToStrtab(F.getName()));
     Vals.push_back(F.getName().size());
     Vals.push_back(VE.getTypeID(F.getFunctionType()));
     Vals.push_back(F.getCallingConv());
     Vals.push_back(F.isDeclaration());
     Vals.push_back(getEncodedLinkage(F));
     Vals.push_back(VE.getAttributeListID(F.getAttributes()));
     Vals.push_back(Log2_32(F.getAlignment())+1);
     Vals.push_back(F.hasSection() ? SectionMap[F.getSection()] : 0);
     Vals.push_back(getEncodedVisibility(F));
     Vals.push_back(F.hasGC() ? GCMap[F.getGC()] : 0);
     Vals.push_back(getEncodedUnnamedAddr(F));
     Vals.push_back(F.hasPrologueData() ? (VE.getValueID(F.getPrologueData()) + 1)
                                        : 0);
     Vals.push_back(getEncodedDLLStorageClass(F));
     Vals.push_back(F.hasComdat() ? VE.getComdatID(F.getComdat()) : 0);
     Vals.push_back(F.hasPrefixData() ? (VE.getValueID(F.getPrefixData()) + 1)
                                      : 0);
     Vals.push_back(
         F.hasPersonalityFn() ? (VE.getValueID(F.getPersonalityFn()) + 1) : 0);
 
     Vals.push_back(F.isDSOLocal());
     unsigned AbbrevToUse = 0;
     Stream.EmitRecord(bitc::MODULE_CODE_FUNCTION, Vals, AbbrevToUse);
     Vals.clear();
   }
 
   // Emit the alias information.
   for (const GlobalAlias &A : M.aliases()) {
     // ALIAS: [strtab offset, strtab size, alias type, aliasee val#, linkage,
     //         visibility, dllstorageclass, threadlocal, unnamed_addr,
     //         DSO_Local]
     Vals.push_back(addToStrtab(A.getName()));
     Vals.push_back(A.getName().size());
     Vals.push_back(VE.getTypeID(A.getValueType()));
     Vals.push_back(A.getType()->getAddressSpace());
     Vals.push_back(VE.getValueID(A.getAliasee()));
     Vals.push_back(getEncodedLinkage(A));
     Vals.push_back(getEncodedVisibility(A));
     Vals.push_back(getEncodedDLLStorageClass(A));
     Vals.push_back(getEncodedThreadLocalMode(A));
     Vals.push_back(getEncodedUnnamedAddr(A));
     Vals.push_back(A.isDSOLocal());
 
     unsigned AbbrevToUse = 0;
     Stream.EmitRecord(bitc::MODULE_CODE_ALIAS, Vals, AbbrevToUse);
     Vals.clear();
   }
 
   // Emit the ifunc information.
   for (const GlobalIFunc &I : M.ifuncs()) {
     // IFUNC: [strtab offset, strtab size, ifunc type, address space, resolver
     //         val#, linkage, visibility]
     Vals.push_back(addToStrtab(I.getName()));
     Vals.push_back(I.getName().size());
     Vals.push_back(VE.getTypeID(I.getValueType()));
     Vals.push_back(I.getType()->getAddressSpace());
     Vals.push_back(VE.getValueID(I.getResolver()));
     Vals.push_back(getEncodedLinkage(I));
     Vals.push_back(getEncodedVisibility(I));
     Stream.EmitRecord(bitc::MODULE_CODE_IFUNC, Vals);
     Vals.clear();
   }
 
   writeValueSymbolTableForwardDecl();
 }
 
 static uint64_t getOptimizationFlags(const Value *V) {
   uint64_t Flags = 0;
 
   if (const auto *OBO = dyn_cast<OverflowingBinaryOperator>(V)) {
     if (OBO->hasNoSignedWrap())
       Flags |= 1 << bitc::OBO_NO_SIGNED_WRAP;
     if (OBO->hasNoUnsignedWrap())
       Flags |= 1 << bitc::OBO_NO_UNSIGNED_WRAP;
   } else if (const auto *PEO = dyn_cast<PossiblyExactOperator>(V)) {
     if (PEO->isExact())
       Flags |= 1 << bitc::PEO_EXACT;
   } else if (const auto *FPMO = dyn_cast<FPMathOperator>(V)) {
     if (FPMO->hasAllowReassoc())
-      Flags |= FastMathFlags::AllowReassoc;
+      Flags |= bitc::AllowReassoc;
     if (FPMO->hasNoNaNs())
-      Flags |= FastMathFlags::NoNaNs;
+      Flags |= bitc::NoNaNs;
     if (FPMO->hasNoInfs())
-      Flags |= FastMathFlags::NoInfs;
+      Flags |= bitc::NoInfs;
     if (FPMO->hasNoSignedZeros())
-      Flags |= FastMathFlags::NoSignedZeros;
+      Flags |= bitc::NoSignedZeros;
     if (FPMO->hasAllowReciprocal())
-      Flags |= FastMathFlags::AllowReciprocal;
+      Flags |= bitc::AllowReciprocal;
     if (FPMO->hasAllowContract())
-      Flags |= FastMathFlags::AllowContract;
+      Flags |= bitc::AllowContract;
     if (FPMO->hasApproxFunc())
-      Flags |= FastMathFlags::ApproxFunc;
+      Flags |= bitc::ApproxFunc;
   }
 
   return Flags;
 }
 
 void ModuleBitcodeWriter::writeValueAsMetadata(
     const ValueAsMetadata *MD, SmallVectorImpl<uint64_t> &Record) {
   // Mimic an MDNode with a value as one operand.
   Value *V = MD->getValue();
   Record.push_back(VE.getTypeID(V->getType()));
   Record.push_back(VE.getValueID(V));
   Stream.EmitRecord(bitc::METADATA_VALUE, Record, 0);
   Record.clear();
 }
 
 void ModuleBitcodeWriter::writeMDTuple(const MDTuple *N,
                                        SmallVectorImpl<uint64_t> &Record,
                                        unsigned Abbrev) {
   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
     Metadata *MD = N->getOperand(i);
     assert(!(MD && isa<LocalAsMetadata>(MD)) &&
            "Unexpected function-local metadata");
     Record.push_back(VE.getMetadataOrNullID(MD));
   }
   Stream.EmitRecord(N->isDistinct() ? bitc::METADATA_DISTINCT_NODE
                                     : bitc::METADATA_NODE,
                     Record, Abbrev);
   Record.clear();
 }
 
 unsigned ModuleBitcodeWriter::createDILocationAbbrev() {
   // Assume the column is usually under 128, and always output the inlined-at
   // location (it's never more expensive than building an array size 1).
   auto Abbv = std::make_shared<BitCodeAbbrev>();
   Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_LOCATION));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
   return Stream.EmitAbbrev(std::move(Abbv));
 }
 
 void ModuleBitcodeWriter::writeDILocation(const DILocation *N,
                                           SmallVectorImpl<uint64_t> &Record,
                                           unsigned &Abbrev) {
   if (!Abbrev)
     Abbrev = createDILocationAbbrev();
 
   Record.push_back(N->isDistinct());
   Record.push_back(N->getLine());
   Record.push_back(N->getColumn());
   Record.push_back(VE.getMetadataID(N->getScope()));
   Record.push_back(VE.getMetadataOrNullID(N->getInlinedAt()));
 
   Stream.EmitRecord(bitc::METADATA_LOCATION, Record, Abbrev);
   Record.clear();
 }
 
 unsigned ModuleBitcodeWriter::createGenericDINodeAbbrev() {
   // Assume the column is usually under 128, and always output the inlined-at
   // location (it's never more expensive than building an array size 1).
   auto Abbv = std::make_shared<BitCodeAbbrev>();
   Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_GENERIC_DEBUG));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
   return Stream.EmitAbbrev(std::move(Abbv));
 }
 
 void ModuleBitcodeWriter::writeGenericDINode(const GenericDINode *N,
                                              SmallVectorImpl<uint64_t> &Record,
                                              unsigned &Abbrev) {
   if (!Abbrev)
     Abbrev = createGenericDINodeAbbrev();
 
   Record.push_back(N->isDistinct());
   Record.push_back(N->getTag());
   Record.push_back(0); // Per-tag version field; unused for now.
 
   for (auto &I : N->operands())
     Record.push_back(VE.getMetadataOrNullID(I));
 
   Stream.EmitRecord(bitc::METADATA_GENERIC_DEBUG, Record, Abbrev);
   Record.clear();
 }
 
 static uint64_t rotateSign(int64_t I) {
   uint64_t U = I;
   return I < 0 ? ~(U << 1) : U << 1;
 }
 
 void ModuleBitcodeWriter::writeDISubrange(const DISubrange *N,
                                           SmallVectorImpl<uint64_t> &Record,
                                           unsigned Abbrev) {
   Record.push_back(N->isDistinct());
   Record.push_back(N->getCount());
   Record.push_back(rotateSign(N->getLowerBound()));
 
   Stream.EmitRecord(bitc::METADATA_SUBRANGE, Record, Abbrev);
   Record.clear();
 }
 
 void ModuleBitcodeWriter::writeDIEnumerator(const DIEnumerator *N,
                                             SmallVectorImpl<uint64_t> &Record,
                                             unsigned Abbrev) {
   Record.push_back(N->isDistinct());
   Record.push_back(rotateSign(N->getValue()));
   Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
 
   Stream.EmitRecord(bitc::METADATA_ENUMERATOR, Record, Abbrev);
   Record.clear();
 }
 
 void ModuleBitcodeWriter::writeDIBasicType(const DIBasicType *N,
                                            SmallVectorImpl<uint64_t> &Record,
                                            unsigned Abbrev) {
   Record.push_back(N->isDistinct());
   Record.push_back(N->getTag());
   Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
   Record.push_back(N->getSizeInBits());
   Record.push_back(N->getAlignInBits());
   Record.push_back(N->getEncoding());
 
   Stream.EmitRecord(bitc::METADATA_BASIC_TYPE, Record, Abbrev);
   Record.clear();
 }
 
 void ModuleBitcodeWriter::writeDIDerivedType(const DIDerivedType *N,
                                              SmallVectorImpl<uint64_t> &Record,
                                              unsigned Abbrev) {
   Record.push_back(N->isDistinct());
   Record.push_back(N->getTag());
   Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
   Record.push_back(VE.getMetadataOrNullID(N->getFile()));
   Record.push_back(N->getLine());
   Record.push_back(VE.getMetadataOrNullID(N->getScope()));
   Record.push_back(VE.getMetadataOrNullID(N->getBaseType()));
   Record.push_back(N->getSizeInBits());
   Record.push_back(N->getAlignInBits());
   Record.push_back(N->getOffsetInBits());
   Record.push_back(N->getFlags());
   Record.push_back(VE.getMetadataOrNullID(N->getExtraData()));
 
   // DWARF address space is encoded as N->getDWARFAddressSpace() + 1. 0 means
   // that there is no DWARF address space associated with DIDerivedType.
   if (const auto &DWARFAddressSpace = N->getDWARFAddressSpace())
     Record.push_back(*DWARFAddressSpace + 1);
   else
     Record.push_back(0);
 
   Stream.EmitRecord(bitc::METADATA_DERIVED_TYPE, Record, Abbrev);
   Record.clear();
 }
 
 void ModuleBitcodeWriter::writeDICompositeType(
     const DICompositeType *N, SmallVectorImpl<uint64_t> &Record,
     unsigned Abbrev) {
   const unsigned IsNotUsedInOldTypeRef = 0x2;
   Record.push_back(IsNotUsedInOldTypeRef | (unsigned)N->isDistinct());
   Record.push_back(N->getTag());
   Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
   Record.push_back(VE.getMetadataOrNullID(N->getFile()));
   Record.push_back(N->getLine());
   Record.push_back(VE.getMetadataOrNullID(N->getScope()));
   Record.push_back(VE.getMetadataOrNullID(N->getBaseType()));
   Record.push_back(N->getSizeInBits());
   Record.push_back(N->getAlignInBits());
   Record.push_back(N->getOffsetInBits());
   Record.push_back(N->getFlags());
   Record.push_back(VE.getMetadataOrNullID(N->getElements().get()));
   Record.push_back(N->getRuntimeLang());
   Record.push_back(VE.getMetadataOrNullID(N->getVTableHolder()));
   Record.push_back(VE.getMetadataOrNullID(N->getTemplateParams().get()));
   Record.push_back(VE.getMetadataOrNullID(N->getRawIdentifier()));
 
   Stream.EmitRecord(bitc::METADATA_COMPOSITE_TYPE, Record, Abbrev);
   Record.clear();
 }
 
 void ModuleBitcodeWriter::writeDISubroutineType(
     const DISubroutineType *N, SmallVectorImpl<uint64_t> &Record,
     unsigned Abbrev) {
   const unsigned HasNoOldTypeRefs = 0x2;
   Record.push_back(HasNoOldTypeRefs | (unsigned)N->isDistinct());
   Record.push_back(N->getFlags());
   Record.push_back(VE.getMetadataOrNullID(N->getTypeArray().get()));
   Record.push_back(N->getCC());
 
   Stream.EmitRecord(bitc::METADATA_SUBROUTINE_TYPE, Record, Abbrev);
   Record.clear();
 }
 
 void ModuleBitcodeWriter::writeDIFile(const DIFile *N,
                                       SmallVectorImpl<uint64_t> &Record,
                                       unsigned Abbrev) {
   Record.push_back(N->isDistinct());
   Record.push_back(VE.getMetadataOrNullID(N->getRawFilename()));
   Record.push_back(VE.getMetadataOrNullID(N->getRawDirectory()));
   Record.push_back(N->getChecksumKind());
   Record.push_back(VE.getMetadataOrNullID(N->getRawChecksum()));
 
   Stream.EmitRecord(bitc::METADATA_FILE, Record, Abbrev);
   Record.clear();
 }
 
 void ModuleBitcodeWriter::writeDICompileUnit(const DICompileUnit *N,
                                              SmallVectorImpl<uint64_t> &Record,
                                              unsigned Abbrev) {
   assert(N->isDistinct() && "Expected distinct compile units");
   Record.push_back(/* IsDistinct */ true);
   Record.push_back(N->getSourceLanguage());
   Record.push_back(VE.getMetadataOrNullID(N->getFile()));
   Record.push_back(VE.getMetadataOrNullID(N->getRawProducer()));
   Record.push_back(N->isOptimized());
   Record.push_back(VE.getMetadataOrNullID(N->getRawFlags()));
   Record.push_back(N->getRuntimeVersion());
   Record.push_back(VE.getMetadataOrNullID(N->getRawSplitDebugFilename()));
   Record.push_back(N->getEmissionKind());
   Record.push_back(VE.getMetadataOrNullID(N->getEnumTypes().get()));
   Record.push_back(VE.getMetadataOrNullID(N->getRetainedTypes().get()));
   Record.push_back(/* subprograms */ 0);
   Record.push_back(VE.getMetadataOrNullID(N->getGlobalVariables().get()));
   Record.push_back(VE.getMetadataOrNullID(N->getImportedEntities().get()));
   Record.push_back(N->getDWOId());
   Record.push_back(VE.getMetadataOrNullID(N->getMacros().get()));
   Record.push_back(N->getSplitDebugInlining());
   Record.push_back(N->getDebugInfoForProfiling());
   Record.push_back(N->getGnuPubnames());
 
   Stream.EmitRecord(bitc::METADATA_COMPILE_UNIT, Record, Abbrev);
   Record.clear();
 }
 
 void ModuleBitcodeWriter::writeDISubprogram(const DISubprogram *N,
                                             SmallVectorImpl<uint64_t> &Record,
                                             unsigned Abbrev) {
   uint64_t HasUnitFlag = 1 << 1;
   Record.push_back(N->isDistinct() | HasUnitFlag);
   Record.push_back(VE.getMetadataOrNullID(N->getScope()));
   Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
   Record.push_back(VE.getMetadataOrNullID(N->getRawLinkageName()));
   Record.push_back(VE.getMetadataOrNullID(N->getFile()));
   Record.push_back(N->getLine());
   Record.push_back(VE.getMetadataOrNullID(N->getType()));
   Record.push_back(N->isLocalToUnit());
   Record.push_back(N->isDefinition());
   Record.push_back(N->getScopeLine());
   Record.push_back(VE.getMetadataOrNullID(N->getContainingType()));
   Record.push_back(N->getVirtuality());
   Record.push_back(N->getVirtualIndex());
   Record.push_back(N->getFlags());
   Record.push_back(N->isOptimized());
   Record.push_back(VE.getMetadataOrNullID(N->getRawUnit()));
   Record.push_back(VE.getMetadataOrNullID(N->getTemplateParams().get()));
   Record.push_back(VE.getMetadataOrNullID(N->getDeclaration()));
   Record.push_back(VE.getMetadataOrNullID(N->getVariables().get()));
   Record.push_back(N->getThisAdjustment());
   Record.push_back(VE.getMetadataOrNullID(N->getThrownTypes().get()));
 
   Stream.EmitRecord(bitc::METADATA_SUBPROGRAM, Record, Abbrev);
   Record.clear();
 }
 
 void ModuleBitcodeWriter::writeDILexicalBlock(const DILexicalBlock *N,
                                               SmallVectorImpl<uint64_t> &Record,
                                               unsigned Abbrev) {
   Record.push_back(N->isDistinct());
   Record.push_back(VE.getMetadataOrNullID(N->getScope()));
   Record.push_back(VE.getMetadataOrNullID(N->getFile()));
   Record.push_back(N->getLine());
   Record.push_back(N->getColumn());
 
   Stream.EmitRecord(bitc::METADATA_LEXICAL_BLOCK, Record, Abbrev);
   Record.clear();
 }
 
 void ModuleBitcodeWriter::writeDILexicalBlockFile(
     const DILexicalBlockFile *N, SmallVectorImpl<uint64_t> &Record,
     unsigned Abbrev) {
   Record.push_back(N->isDistinct());
   Record.push_back(VE.getMetadataOrNullID(N->getScope()));
   Record.push_back(VE.getMetadataOrNullID(N->getFile()));
   Record.push_back(N->getDiscriminator());
 
   Stream.EmitRecord(bitc::METADATA_LEXICAL_BLOCK_FILE, Record, Abbrev);
   Record.clear();
 }
 
 void ModuleBitcodeWriter::writeDINamespace(const DINamespace *N,
                                            SmallVectorImpl<uint64_t> &Record,
                                            unsigned Abbrev) {
   Record.push_back(N->isDistinct() | N->getExportSymbols() << 1);
   Record.push_back(VE.getMetadataOrNullID(N->getScope()));
   Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
 
   Stream.EmitRecord(bitc::METADATA_NAMESPACE, Record, Abbrev);
   Record.clear();
 }
 
 void ModuleBitcodeWriter::writeDIMacro(const DIMacro *N,
                                        SmallVectorImpl<uint64_t> &Record,
                                        unsigned Abbrev) {
   Record.push_back(N->isDistinct());
   Record.push_back(N->getMacinfoType());
   Record.push_back(N->getLine());
   Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
   Record.push_back(VE.getMetadataOrNullID(N->getRawValue()));
 
   Stream.EmitRecord(bitc::METADATA_MACRO, Record, Abbrev);
   Record.clear();
 }
 
 void ModuleBitcodeWriter::writeDIMacroFile(const DIMacroFile *N,
                                            SmallVectorImpl<uint64_t> &Record,
                                            unsigned Abbrev) {
   Record.push_back(N->isDistinct());
   Record.push_back(N->getMacinfoType());
   Record.push_back(N->getLine());
   Record.push_back(VE.getMetadataOrNullID(N->getFile()));
   Record.push_back(VE.getMetadataOrNullID(N->getElements().get()));
 
   Stream.EmitRecord(bitc::METADATA_MACRO_FILE, Record, Abbrev);
   Record.clear();
 }
 
 void ModuleBitcodeWriter::writeDIModule(const DIModule *N,
                                         SmallVectorImpl<uint64_t> &Record,
                                         unsigned Abbrev) {
   Record.push_back(N->isDistinct());
   for (auto &I : N->operands())
     Record.push_back(VE.getMetadataOrNullID(I));
 
   Stream.EmitRecord(bitc::METADATA_MODULE, Record, Abbrev);
   Record.clear();
 }
 
 void ModuleBitcodeWriter::writeDITemplateTypeParameter(
     const DITemplateTypeParameter *N, SmallVectorImpl<uint64_t> &Record,
     unsigned Abbrev) {
   Record.push_back(N->isDistinct());
   Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
   Record.push_back(VE.getMetadataOrNullID(N->getType()));
 
   Stream.EmitRecord(bitc::METADATA_TEMPLATE_TYPE, Record, Abbrev);
   Record.clear();
 }
 
 void ModuleBitcodeWriter::writeDITemplateValueParameter(
     const DITemplateValueParameter *N, SmallVectorImpl<uint64_t> &Record,
     unsigned Abbrev) {
   Record.push_back(N->isDistinct());
   Record.push_back(N->getTag());
   Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
   Record.push_back(VE.getMetadataOrNullID(N->getType()));
   Record.push_back(VE.getMetadataOrNullID(N->getValue()));
 
   Stream.EmitRecord(bitc::METADATA_TEMPLATE_VALUE, Record, Abbrev);
   Record.clear();
 }
 
 void ModuleBitcodeWriter::writeDIGlobalVariable(
     const DIGlobalVariable *N, SmallVectorImpl<uint64_t> &Record,
     unsigned Abbrev) {
   const uint64_t Version = 1 << 1;
   Record.push_back((uint64_t)N->isDistinct() | Version);
   Record.push_back(VE.getMetadataOrNullID(N->getScope()));
   Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
   Record.push_back(VE.getMetadataOrNullID(N->getRawLinkageName()));
   Record.push_back(VE.getMetadataOrNullID(N->getFile()));
   Record.push_back(N->getLine());
   Record.push_back(VE.getMetadataOrNullID(N->getType()));
   Record.push_back(N->isLocalToUnit());
   Record.push_back(N->isDefinition());
   Record.push_back(/* expr */ 0);
   Record.push_back(VE.getMetadataOrNullID(N->getStaticDataMemberDeclaration()));
   Record.push_back(N->getAlignInBits());
 
   Stream.EmitRecord(bitc::METADATA_GLOBAL_VAR, Record, Abbrev);
   Record.clear();
 }
 
 void ModuleBitcodeWriter::writeDILocalVariable(
     const DILocalVariable *N, SmallVectorImpl<uint64_t> &Record,
     unsigned Abbrev) {
   // In order to support all possible bitcode formats in BitcodeReader we need
   // to distinguish the following cases:
   // 1) Record has no artificial tag (Record[1]),
   //   has no obsolete inlinedAt field (Record[9]).
   //   In this case Record size will be 8, HasAlignment flag is false.
   // 2) Record has artificial tag (Record[1]),
   //   has no obsolete inlignedAt field (Record[9]).
   //   In this case Record size will be 9, HasAlignment flag is false.
   // 3) Record has both artificial tag (Record[1]) and
   //   obsolete inlignedAt field (Record[9]).
   //   In this case Record size will be 10, HasAlignment flag is false.
   // 4) Record has neither artificial tag, nor inlignedAt field, but
   //   HasAlignment flag is true and Record[8] contains alignment value.
   const uint64_t HasAlignmentFlag = 1 << 1;
   Record.push_back((uint64_t)N->isDistinct() | HasAlignmentFlag);
   Record.push_back(VE.getMetadataOrNullID(N->getScope()));
   Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
   Record.push_back(VE.getMetadataOrNullID(N->getFile()));
   Record.push_back(N->getLine());
   Record.push_back(VE.getMetadataOrNullID(N->getType()));
   Record.push_back(N->getArg());
   Record.push_back(N->getFlags());
   Record.push_back(N->getAlignInBits());
 
   Stream.EmitRecord(bitc::METADATA_LOCAL_VAR, Record, Abbrev);
   Record.clear();
 }
 
 void ModuleBitcodeWriter::writeDIExpression(const DIExpression *N,
                                             SmallVectorImpl<uint64_t> &Record,
                                             unsigned Abbrev) {
   Record.reserve(N->getElements().size() + 1);
   const uint64_t Version = 3 << 1;
   Record.push_back((uint64_t)N->isDistinct() | Version);
   Record.append(N->elements_begin(), N->elements_end());
 
   Stream.EmitRecord(bitc::METADATA_EXPRESSION, Record, Abbrev);
   Record.clear();
 }
 
 void ModuleBitcodeWriter::writeDIGlobalVariableExpression(
     const DIGlobalVariableExpression *N, SmallVectorImpl<uint64_t> &Record,
     unsigned Abbrev) {
   Record.push_back(N->isDistinct());
   Record.push_back(VE.getMetadataOrNullID(N->getVariable()));
   Record.push_back(VE.getMetadataOrNullID(N->getExpression()));
 
   Stream.EmitRecord(bitc::METADATA_GLOBAL_VAR_EXPR, Record, Abbrev);
   Record.clear();
 }
 
 void ModuleBitcodeWriter::writeDIObjCProperty(const DIObjCProperty *N,
                                               SmallVectorImpl<uint64_t> &Record,
                                               unsigned Abbrev) {
   Record.push_back(N->isDistinct());
   Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
   Record.push_back(VE.getMetadataOrNullID(N->getFile()));
   Record.push_back(N->getLine());
   Record.push_back(VE.getMetadataOrNullID(N->getRawSetterName()));
   Record.push_back(VE.getMetadataOrNullID(N->getRawGetterName()));
   Record.push_back(N->getAttributes());
   Record.push_back(VE.getMetadataOrNullID(N->getType()));
 
   Stream.EmitRecord(bitc::METADATA_OBJC_PROPERTY, Record, Abbrev);
   Record.clear();
 }
 
 void ModuleBitcodeWriter::writeDIImportedEntity(
     const DIImportedEntity *N, SmallVectorImpl<uint64_t> &Record,
     unsigned Abbrev) {
   Record.push_back(N->isDistinct());
   Record.push_back(N->getTag());
   Record.push_back(VE.getMetadataOrNullID(N->getScope()));
   Record.push_back(VE.getMetadataOrNullID(N->getEntity()));
   Record.push_back(N->getLine());
   Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
   Record.push_back(VE.getMetadataOrNullID(N->getRawFile()));
 
   Stream.EmitRecord(bitc::METADATA_IMPORTED_ENTITY, Record, Abbrev);
   Record.clear();
 }
 
 unsigned ModuleBitcodeWriter::createNamedMetadataAbbrev() {
   auto Abbv = std::make_shared<BitCodeAbbrev>();
   Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_NAME));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
   return Stream.EmitAbbrev(std::move(Abbv));
 }
 
 void ModuleBitcodeWriter::writeNamedMetadata(
     SmallVectorImpl<uint64_t> &Record) {
   if (M.named_metadata_empty())
     return;
 
   unsigned Abbrev = createNamedMetadataAbbrev();
   for (const NamedMDNode &NMD : M.named_metadata()) {
     // Write name.
     StringRef Str = NMD.getName();
     Record.append(Str.bytes_begin(), Str.bytes_end());
     Stream.EmitRecord(bitc::METADATA_NAME, Record, Abbrev);
     Record.clear();
 
     // Write named metadata operands.
     for (const MDNode *N : NMD.operands())
       Record.push_back(VE.getMetadataID(N));
     Stream.EmitRecord(bitc::METADATA_NAMED_NODE, Record, 0);
     Record.clear();
   }
 }
 
 unsigned ModuleBitcodeWriter::createMetadataStringsAbbrev() {
   auto Abbv = std::make_shared<BitCodeAbbrev>();
   Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_STRINGS));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // # of strings
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // offset to chars
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
   return Stream.EmitAbbrev(std::move(Abbv));
 }
 
 /// Write out a record for MDString.
 ///
 /// All the metadata strings in a metadata block are emitted in a single
 /// record.  The sizes and strings themselves are shoved into a blob.
 void ModuleBitcodeWriter::writeMetadataStrings(
     ArrayRef<const Metadata *> Strings, SmallVectorImpl<uint64_t> &Record) {
   if (Strings.empty())
     return;
 
   // Start the record with the number of strings.
   Record.push_back(bitc::METADATA_STRINGS);
   Record.push_back(Strings.size());
 
   // Emit the sizes of the strings in the blob.
   SmallString<256> Blob;
   {
     BitstreamWriter W(Blob);
     for (const Metadata *MD : Strings)
       W.EmitVBR(cast<MDString>(MD)->getLength(), 6);
     W.FlushToWord();
   }
 
   // Add the offset to the strings to the record.
   Record.push_back(Blob.size());
 
   // Add the strings to the blob.
   for (const Metadata *MD : Strings)
     Blob.append(cast<MDString>(MD)->getString());
 
   // Emit the final record.
   Stream.EmitRecordWithBlob(createMetadataStringsAbbrev(), Record, Blob);
   Record.clear();
 }
 
 // Generates an enum to use as an index in the Abbrev array of Metadata record.
 enum MetadataAbbrev : unsigned {
 #define HANDLE_MDNODE_LEAF(CLASS) CLASS##AbbrevID,
 #include "llvm/IR/Metadata.def"
   LastPlusOne
 };
 
 void ModuleBitcodeWriter::writeMetadataRecords(
     ArrayRef<const Metadata *> MDs, SmallVectorImpl<uint64_t> &Record,
     std::vector<unsigned> *MDAbbrevs, std::vector<uint64_t> *IndexPos) {
   if (MDs.empty())
     return;
 
   // Initialize MDNode abbreviations.
 #define HANDLE_MDNODE_LEAF(CLASS) unsigned CLASS##Abbrev = 0;
 #include "llvm/IR/Metadata.def"
 
   for (const Metadata *MD : MDs) {
     if (IndexPos)
       IndexPos->push_back(Stream.GetCurrentBitNo());
     if (const MDNode *N = dyn_cast<MDNode>(MD)) {
       assert(N->isResolved() && "Expected forward references to be resolved");
 
       switch (N->getMetadataID()) {
       default:
         llvm_unreachable("Invalid MDNode subclass");
 #define HANDLE_MDNODE_LEAF(CLASS)                                              \
   case Metadata::CLASS##Kind:                                                  \
     if (MDAbbrevs)                                                             \
       write##CLASS(cast<CLASS>(N), Record,                                     \
                    (*MDAbbrevs)[MetadataAbbrev::CLASS##AbbrevID]);             \
     else                                                                       \
       write##CLASS(cast<CLASS>(N), Record, CLASS##Abbrev);                     \
     continue;
 #include "llvm/IR/Metadata.def"
       }
     }
     writeValueAsMetadata(cast<ValueAsMetadata>(MD), Record);
   }
 }
 
 void ModuleBitcodeWriter::writeModuleMetadata() {
   if (!VE.hasMDs() && M.named_metadata_empty())
     return;
 
   Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 4);
   SmallVector<uint64_t, 64> Record;
 
   // Emit all abbrevs upfront, so that the reader can jump in the middle of the
   // block and load any metadata.
   std::vector<unsigned> MDAbbrevs;
 
   MDAbbrevs.resize(MetadataAbbrev::LastPlusOne);
   MDAbbrevs[MetadataAbbrev::DILocationAbbrevID] = createDILocationAbbrev();
   MDAbbrevs[MetadataAbbrev::GenericDINodeAbbrevID] =
       createGenericDINodeAbbrev();
 
   auto Abbv = std::make_shared<BitCodeAbbrev>();
   Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_INDEX_OFFSET));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
   unsigned OffsetAbbrev = Stream.EmitAbbrev(std::move(Abbv));
 
   Abbv = std::make_shared<BitCodeAbbrev>();
   Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_INDEX));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
   unsigned IndexAbbrev = Stream.EmitAbbrev(std::move(Abbv));
 
   // Emit MDStrings together upfront.
   writeMetadataStrings(VE.getMDStrings(), Record);
 
   // We only emit an index for the metadata record if we have more than a given
   // (naive) threshold of metadatas, otherwise it is not worth it.
   if (VE.getNonMDStrings().size() > IndexThreshold) {
     // Write a placeholder value in for the offset of the metadata index,
     // which is written after the records, so that it can include
     // the offset of each entry. The placeholder offset will be
     // updated after all records are emitted.
     uint64_t Vals[] = {0, 0};
     Stream.EmitRecord(bitc::METADATA_INDEX_OFFSET, Vals, OffsetAbbrev);
   }
 
   // Compute and save the bit offset to the current position, which will be
   // patched when we emit the index later. We can simply subtract the 64-bit
   // fixed size from the current bit number to get the location to backpatch.
   uint64_t IndexOffsetRecordBitPos = Stream.GetCurrentBitNo();
 
   // This index will contain the bitpos for each individual record.
   std::vector<uint64_t> IndexPos;
   IndexPos.reserve(VE.getNonMDStrings().size());
 
   // Write all the records
   writeMetadataRecords(VE.getNonMDStrings(), Record, &MDAbbrevs, &IndexPos);
 
   if (VE.getNonMDStrings().size() > IndexThreshold) {
     // Now that we have emitted all the records we will emit the index. But
     // first
     // backpatch the forward reference so that the reader can skip the records
     // efficiently.
     Stream.BackpatchWord64(IndexOffsetRecordBitPos - 64,
                            Stream.GetCurrentBitNo() - IndexOffsetRecordBitPos);
 
     // Delta encode the index.
     uint64_t PreviousValue = IndexOffsetRecordBitPos;
     for (auto &Elt : IndexPos) {
       auto EltDelta = Elt - PreviousValue;
       PreviousValue = Elt;
       Elt = EltDelta;
     }
     // Emit the index record.
     Stream.EmitRecord(bitc::METADATA_INDEX, IndexPos, IndexAbbrev);
     IndexPos.clear();
   }
 
   // Write the named metadata now.
   writeNamedMetadata(Record);
 
   auto AddDeclAttachedMetadata = [&](const GlobalObject &GO) {
     SmallVector<uint64_t, 4> Record;
     Record.push_back(VE.getValueID(&GO));
     pushGlobalMetadataAttachment(Record, GO);
     Stream.EmitRecord(bitc::METADATA_GLOBAL_DECL_ATTACHMENT, Record);
   };
   for (const Function &F : M)
     if (F.isDeclaration() && F.hasMetadata())
       AddDeclAttachedMetadata(F);
   // FIXME: Only store metadata for declarations here, and move data for global
   // variable definitions to a separate block (PR28134).
   for (const GlobalVariable &GV : M.globals())
     if (GV.hasMetadata())
       AddDeclAttachedMetadata(GV);
 
   Stream.ExitBlock();
 }
 
 void ModuleBitcodeWriter::writeFunctionMetadata(const Function &F) {
   if (!VE.hasMDs())
     return;
 
   Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
   SmallVector<uint64_t, 64> Record;
   writeMetadataStrings(VE.getMDStrings(), Record);
   writeMetadataRecords(VE.getNonMDStrings(), Record);
   Stream.ExitBlock();
 }
 
 void ModuleBitcodeWriter::pushGlobalMetadataAttachment(
     SmallVectorImpl<uint64_t> &Record, const GlobalObject &GO) {
   // [n x [id, mdnode]]
   SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
   GO.getAllMetadata(MDs);
   for (const auto &I : MDs) {
     Record.push_back(I.first);
     Record.push_back(VE.getMetadataID(I.second));
   }
 }
 
 void ModuleBitcodeWriter::writeFunctionMetadataAttachment(const Function &F) {
   Stream.EnterSubblock(bitc::METADATA_ATTACHMENT_ID, 3);
 
   SmallVector<uint64_t, 64> Record;
 
   if (F.hasMetadata()) {
     pushGlobalMetadataAttachment(Record, F);
     Stream.EmitRecord(bitc::METADATA_ATTACHMENT, Record, 0);
     Record.clear();
   }
 
   // Write metadata attachments
   // METADATA_ATTACHMENT - [m x [value, [n x [id, mdnode]]]
   SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
   for (const BasicBlock &BB : F)
     for (const Instruction &I : BB) {
       MDs.clear();
       I.getAllMetadataOtherThanDebugLoc(MDs);
 
       // If no metadata, ignore instruction.
       if (MDs.empty()) continue;
 
       Record.push_back(VE.getInstructionID(&I));
 
       for (unsigned i = 0, e = MDs.size(); i != e; ++i) {
         Record.push_back(MDs[i].first);
         Record.push_back(VE.getMetadataID(MDs[i].second));
       }
       Stream.EmitRecord(bitc::METADATA_ATTACHMENT, Record, 0);
       Record.clear();
     }
 
   Stream.ExitBlock();
 }
 
 void ModuleBitcodeWriter::writeModuleMetadataKinds() {
   SmallVector<uint64_t, 64> Record;
 
   // Write metadata kinds
   // METADATA_KIND - [n x [id, name]]
   SmallVector<StringRef, 8> Names;
   M.getMDKindNames(Names);
 
   if (Names.empty()) return;
 
   Stream.EnterSubblock(bitc::METADATA_KIND_BLOCK_ID, 3);
 
   for (unsigned MDKindID = 0, e = Names.size(); MDKindID != e; ++MDKindID) {
     Record.push_back(MDKindID);
     StringRef KName = Names[MDKindID];
     Record.append(KName.begin(), KName.end());
 
     Stream.EmitRecord(bitc::METADATA_KIND, Record, 0);
     Record.clear();
   }
 
   Stream.ExitBlock();
 }
 
 void ModuleBitcodeWriter::writeOperandBundleTags() {
   // Write metadata kinds
   //
   // OPERAND_BUNDLE_TAGS_BLOCK_ID : N x OPERAND_BUNDLE_TAG
   //
   // OPERAND_BUNDLE_TAG - [strchr x N]
 
   SmallVector<StringRef, 8> Tags;
   M.getOperandBundleTags(Tags);
 
   if (Tags.empty())
     return;
 
   Stream.EnterSubblock(bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID, 3);
 
   SmallVector<uint64_t, 64> Record;
 
   for (auto Tag : Tags) {
     Record.append(Tag.begin(), Tag.end());
 
     Stream.EmitRecord(bitc::OPERAND_BUNDLE_TAG, Record, 0);
     Record.clear();
   }
 
   Stream.ExitBlock();
 }
 
 void ModuleBitcodeWriter::writeSyncScopeNames() {
   SmallVector<StringRef, 8> SSNs;
   M.getContext().getSyncScopeNames(SSNs);
   if (SSNs.empty())
     return;
 
   Stream.EnterSubblock(bitc::SYNC_SCOPE_NAMES_BLOCK_ID, 2);
 
   SmallVector<uint64_t, 64> Record;
   for (auto SSN : SSNs) {
     Record.append(SSN.begin(), SSN.end());
     Stream.EmitRecord(bitc::SYNC_SCOPE_NAME, Record, 0);
     Record.clear();
   }
 
   Stream.ExitBlock();
 }
 
 static void emitSignedInt64(SmallVectorImpl<uint64_t> &Vals, uint64_t V) {
   if ((int64_t)V >= 0)
     Vals.push_back(V << 1);
   else
     Vals.push_back((-V << 1) | 1);
 }
 
 void ModuleBitcodeWriter::writeConstants(unsigned FirstVal, unsigned LastVal,
                                          bool isGlobal) {
   if (FirstVal == LastVal) return;
 
   Stream.EnterSubblock(bitc::CONSTANTS_BLOCK_ID, 4);
 
   unsigned AggregateAbbrev = 0;
   unsigned String8Abbrev = 0;
   unsigned CString7Abbrev = 0;
   unsigned CString6Abbrev = 0;
   // If this is a constant pool for the module, emit module-specific abbrevs.
   if (isGlobal) {
     // Abbrev for CST_CODE_AGGREGATE.
     auto Abbv = std::make_shared<BitCodeAbbrev>();
     Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_AGGREGATE));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, Log2_32_Ceil(LastVal+1)));
     AggregateAbbrev = Stream.EmitAbbrev(std::move(Abbv));
 
     // Abbrev for CST_CODE_STRING.
     Abbv = std::make_shared<BitCodeAbbrev>();
     Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_STRING));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
     String8Abbrev = Stream.EmitAbbrev(std::move(Abbv));
     // Abbrev for CST_CODE_CSTRING.
     Abbv = std::make_shared<BitCodeAbbrev>();
     Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CSTRING));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7));
     CString7Abbrev = Stream.EmitAbbrev(std::move(Abbv));
     // Abbrev for CST_CODE_CSTRING.
     Abbv = std::make_shared<BitCodeAbbrev>();
     Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CSTRING));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
     CString6Abbrev = Stream.EmitAbbrev(std::move(Abbv));
   }
 
   SmallVector<uint64_t, 64> Record;
 
   const ValueEnumerator::ValueList &Vals = VE.getValues();
   Type *LastTy = nullptr;
   for (unsigned i = FirstVal; i != LastVal; ++i) {
     const Value *V = Vals[i].first;
     // If we need to switch types, do so now.
     if (V->getType() != LastTy) {
       LastTy = V->getType();
       Record.push_back(VE.getTypeID(LastTy));
       Stream.EmitRecord(bitc::CST_CODE_SETTYPE, Record,
                         CONSTANTS_SETTYPE_ABBREV);
       Record.clear();
     }
 
     if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) {
       Record.push_back(unsigned(IA->hasSideEffects()) |
                        unsigned(IA->isAlignStack()) << 1 |
                        unsigned(IA->getDialect()&1) << 2);
 
       // Add the asm string.
       const std::string &AsmStr = IA->getAsmString();
       Record.push_back(AsmStr.size());
       Record.append(AsmStr.begin(), AsmStr.end());
 
       // Add the constraint string.
       const std::string &ConstraintStr = IA->getConstraintString();
       Record.push_back(ConstraintStr.size());
       Record.append(ConstraintStr.begin(), ConstraintStr.end());
       Stream.EmitRecord(bitc::CST_CODE_INLINEASM, Record);
       Record.clear();
       continue;
     }
     const Constant *C = cast<Constant>(V);
     unsigned Code = -1U;
     unsigned AbbrevToUse = 0;
     if (C->isNullValue()) {
       Code = bitc::CST_CODE_NULL;
     } else if (isa<UndefValue>(C)) {
       Code = bitc::CST_CODE_UNDEF;
     } else if (const ConstantInt *IV = dyn_cast<ConstantInt>(C)) {
       if (IV->getBitWidth() <= 64) {
         uint64_t V = IV->getSExtValue();
         emitSignedInt64(Record, V);
         Code = bitc::CST_CODE_INTEGER;
         AbbrevToUse = CONSTANTS_INTEGER_ABBREV;
       } else {                             // Wide integers, > 64 bits in size.
         // We have an arbitrary precision integer value to write whose
         // bit width is > 64. However, in canonical unsigned integer
         // format it is likely that the high bits are going to be zero.
         // So, we only write the number of active words.
         unsigned NWords = IV->getValue().getActiveWords();
         const uint64_t *RawWords = IV->getValue().getRawData();
         for (unsigned i = 0; i != NWords; ++i) {
           emitSignedInt64(Record, RawWords[i]);
         }
         Code = bitc::CST_CODE_WIDE_INTEGER;
       }
     } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
       Code = bitc::CST_CODE_FLOAT;
       Type *Ty = CFP->getType();
       if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy()) {
         Record.push_back(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
       } else if (Ty->isX86_FP80Ty()) {
         // api needed to prevent premature destruction
         // bits are not in the same order as a normal i80 APInt, compensate.
         APInt api = CFP->getValueAPF().bitcastToAPInt();
         const uint64_t *p = api.getRawData();
         Record.push_back((p[1] << 48) | (p[0] >> 16));
         Record.push_back(p[0] & 0xffffLL);
       } else if (Ty->isFP128Ty() || Ty->isPPC_FP128Ty()) {
         APInt api = CFP->getValueAPF().bitcastToAPInt();
         const uint64_t *p = api.getRawData();
         Record.push_back(p[0]);
         Record.push_back(p[1]);
       } else {
         assert(0 && "Unknown FP type!");
       }
     } else if (isa<ConstantDataSequential>(C) &&
                cast<ConstantDataSequential>(C)->isString()) {
       const ConstantDataSequential *Str = cast<ConstantDataSequential>(C);
       // Emit constant strings specially.
       unsigned NumElts = Str->getNumElements();
       // If this is a null-terminated string, use the denser CSTRING encoding.
       if (Str->isCString()) {
         Code = bitc::CST_CODE_CSTRING;
         --NumElts;  // Don't encode the null, which isn't allowed by char6.
       } else {
         Code = bitc::CST_CODE_STRING;
         AbbrevToUse = String8Abbrev;
       }
       bool isCStr7 = Code == bitc::CST_CODE_CSTRING;
       bool isCStrChar6 = Code == bitc::CST_CODE_CSTRING;
       for (unsigned i = 0; i != NumElts; ++i) {
         unsigned char V = Str->getElementAsInteger(i);
         Record.push_back(V);
         isCStr7 &= (V & 128) == 0;
         if (isCStrChar6)
           isCStrChar6 = BitCodeAbbrevOp::isChar6(V);
       }
 
       if (isCStrChar6)
         AbbrevToUse = CString6Abbrev;
       else if (isCStr7)
         AbbrevToUse = CString7Abbrev;
     } else if (const ConstantDataSequential *CDS =
                   dyn_cast<ConstantDataSequential>(C)) {
       Code = bitc::CST_CODE_DATA;
       Type *EltTy = CDS->getType()->getElementType();
       if (isa<IntegerType>(EltTy)) {
         for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i)
           Record.push_back(CDS->getElementAsInteger(i));
       } else {
         for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i)
           Record.push_back(
               CDS->getElementAsAPFloat(i).bitcastToAPInt().getLimitedValue());
       }
     } else if (isa<ConstantAggregate>(C)) {
       Code = bitc::CST_CODE_AGGREGATE;
       for (const Value *Op : C->operands())
         Record.push_back(VE.getValueID(Op));
       AbbrevToUse = AggregateAbbrev;
     } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
       switch (CE->getOpcode()) {
       default:
         if (Instruction::isCast(CE->getOpcode())) {
           Code = bitc::CST_CODE_CE_CAST;
           Record.push_back(getEncodedCastOpcode(CE->getOpcode()));
           Record.push_back(VE.getTypeID(C->getOperand(0)->getType()));
           Record.push_back(VE.getValueID(C->getOperand(0)));
           AbbrevToUse = CONSTANTS_CE_CAST_Abbrev;
         } else {
           assert(CE->getNumOperands() == 2 && "Unknown constant expr!");
           Code = bitc::CST_CODE_CE_BINOP;
           Record.push_back(getEncodedBinaryOpcode(CE->getOpcode()));
           Record.push_back(VE.getValueID(C->getOperand(0)));
           Record.push_back(VE.getValueID(C->getOperand(1)));
           uint64_t Flags = getOptimizationFlags(CE);
           if (Flags != 0)
             Record.push_back(Flags);
         }
         break;
       case Instruction::GetElementPtr: {
         Code = bitc::CST_CODE_CE_GEP;
         const auto *GO = cast<GEPOperator>(C);
         Record.push_back(VE.getTypeID(GO->getSourceElementType()));
         if (Optional<unsigned> Idx = GO->getInRangeIndex()) {
           Code = bitc::CST_CODE_CE_GEP_WITH_INRANGE_INDEX;
           Record.push_back((*Idx << 1) | GO->isInBounds());
         } else if (GO->isInBounds())
           Code = bitc::CST_CODE_CE_INBOUNDS_GEP;
         for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i) {
           Record.push_back(VE.getTypeID(C->getOperand(i)->getType()));
           Record.push_back(VE.getValueID(C->getOperand(i)));
         }
         break;
       }
       case Instruction::Select:
         Code = bitc::CST_CODE_CE_SELECT;
         Record.push_back(VE.getValueID(C->getOperand(0)));
         Record.push_back(VE.getValueID(C->getOperand(1)));
         Record.push_back(VE.getValueID(C->getOperand(2)));
         break;
       case Instruction::ExtractElement:
         Code = bitc::CST_CODE_CE_EXTRACTELT;
         Record.push_back(VE.getTypeID(C->getOperand(0)->getType()));
         Record.push_back(VE.getValueID(C->getOperand(0)));
         Record.push_back(VE.getTypeID(C->getOperand(1)->getType()));
         Record.push_back(VE.getValueID(C->getOperand(1)));
         break;
       case Instruction::InsertElement:
         Code = bitc::CST_CODE_CE_INSERTELT;
         Record.push_back(VE.getValueID(C->getOperand(0)));
         Record.push_back(VE.getValueID(C->getOperand(1)));
         Record.push_back(VE.getTypeID(C->getOperand(2)->getType()));
         Record.push_back(VE.getValueID(C->getOperand(2)));
         break;
       case Instruction::ShuffleVector:
         // If the return type and argument types are the same, this is a
         // standard shufflevector instruction.  If the types are different,
         // then the shuffle is widening or truncating the input vectors, and
         // the argument type must also be encoded.
         if (C->getType() == C->getOperand(0)->getType()) {
           Code = bitc::CST_CODE_CE_SHUFFLEVEC;
         } else {
           Code = bitc::CST_CODE_CE_SHUFVEC_EX;
           Record.push_back(VE.getTypeID(C->getOperand(0)->getType()));
         }
         Record.push_back(VE.getValueID(C->getOperand(0)));
         Record.push_back(VE.getValueID(C->getOperand(1)));
         Record.push_back(VE.getValueID(C->getOperand(2)));
         break;
       case Instruction::ICmp:
       case Instruction::FCmp:
         Code = bitc::CST_CODE_CE_CMP;
         Record.push_back(VE.getTypeID(C->getOperand(0)->getType()));
         Record.push_back(VE.getValueID(C->getOperand(0)));
         Record.push_back(VE.getValueID(C->getOperand(1)));
         Record.push_back(CE->getPredicate());
         break;
       }
     } else if (const BlockAddress *BA = dyn_cast<BlockAddress>(C)) {
       Code = bitc::CST_CODE_BLOCKADDRESS;
       Record.push_back(VE.getTypeID(BA->getFunction()->getType()));
       Record.push_back(VE.getValueID(BA->getFunction()));
       Record.push_back(VE.getGlobalBasicBlockID(BA->getBasicBlock()));
     } else {
 #ifndef NDEBUG
       C->dump();
 #endif
       llvm_unreachable("Unknown constant!");
     }
     Stream.EmitRecord(Code, Record, AbbrevToUse);
     Record.clear();
   }
 
   Stream.ExitBlock();
 }
 
 void ModuleBitcodeWriter::writeModuleConstants() {
   const ValueEnumerator::ValueList &Vals = VE.getValues();
 
   // Find the first constant to emit, which is the first non-globalvalue value.
   // We know globalvalues have been emitted by WriteModuleInfo.
   for (unsigned i = 0, e = Vals.size(); i != e; ++i) {
     if (!isa<GlobalValue>(Vals[i].first)) {
       writeConstants(i, Vals.size(), true);
       return;
     }
   }
 }
 
 /// pushValueAndType - The file has to encode both the value and type id for
 /// many values, because we need to know what type to create for forward
 /// references.  However, most operands are not forward references, so this type
 /// field is not needed.
 ///
 /// This function adds V's value ID to Vals.  If the value ID is higher than the
 /// instruction ID, then it is a forward reference, and it also includes the
 /// type ID.  The value ID that is written is encoded relative to the InstID.
 bool ModuleBitcodeWriter::pushValueAndType(const Value *V, unsigned InstID,
                                            SmallVectorImpl<unsigned> &Vals) {
   unsigned ValID = VE.getValueID(V);
   // Make encoding relative to the InstID.
   Vals.push_back(InstID - ValID);
   if (ValID >= InstID) {
     Vals.push_back(VE.getTypeID(V->getType()));
     return true;
   }
   return false;
 }
 
 void ModuleBitcodeWriter::writeOperandBundles(ImmutableCallSite CS,
                                               unsigned InstID) {
   SmallVector<unsigned, 64> Record;
   LLVMContext &C = CS.getInstruction()->getContext();
 
   for (unsigned i = 0, e = CS.getNumOperandBundles(); i != e; ++i) {
     const auto &Bundle = CS.getOperandBundleAt(i);
     Record.push_back(C.getOperandBundleTagID(Bundle.getTagName()));
 
     for (auto &Input : Bundle.Inputs)
       pushValueAndType(Input, InstID, Record);
 
     Stream.EmitRecord(bitc::FUNC_CODE_OPERAND_BUNDLE, Record);
     Record.clear();
   }
 }
 
 /// pushValue - Like pushValueAndType, but where the type of the value is
 /// omitted (perhaps it was already encoded in an earlier operand).
 void ModuleBitcodeWriter::pushValue(const Value *V, unsigned InstID,
                                     SmallVectorImpl<unsigned> &Vals) {
   unsigned ValID = VE.getValueID(V);
   Vals.push_back(InstID - ValID);
 }
 
 void ModuleBitcodeWriter::pushValueSigned(const Value *V, unsigned InstID,
                                           SmallVectorImpl<uint64_t> &Vals) {
   unsigned ValID = VE.getValueID(V);
   int64_t diff = ((int32_t)InstID - (int32_t)ValID);
   emitSignedInt64(Vals, diff);
 }
 
 /// WriteInstruction - Emit an instruction to the specified stream.
 void ModuleBitcodeWriter::writeInstruction(const Instruction &I,
                                            unsigned InstID,
                                            SmallVectorImpl<unsigned> &Vals) {
   unsigned Code = 0;
   unsigned AbbrevToUse = 0;
   VE.setInstructionID(&I);
   switch (I.getOpcode()) {
   default:
     if (Instruction::isCast(I.getOpcode())) {
       Code = bitc::FUNC_CODE_INST_CAST;
       if (!pushValueAndType(I.getOperand(0), InstID, Vals))
         AbbrevToUse = FUNCTION_INST_CAST_ABBREV;
       Vals.push_back(VE.getTypeID(I.getType()));
       Vals.push_back(getEncodedCastOpcode(I.getOpcode()));
     } else {
       assert(isa<BinaryOperator>(I) && "Unknown instruction!");
       Code = bitc::FUNC_CODE_INST_BINOP;
       if (!pushValueAndType(I.getOperand(0), InstID, Vals))
         AbbrevToUse = FUNCTION_INST_BINOP_ABBREV;
       pushValue(I.getOperand(1), InstID, Vals);
       Vals.push_back(getEncodedBinaryOpcode(I.getOpcode()));
       uint64_t Flags = getOptimizationFlags(&I);
       if (Flags != 0) {
         if (AbbrevToUse == FUNCTION_INST_BINOP_ABBREV)
           AbbrevToUse = FUNCTION_INST_BINOP_FLAGS_ABBREV;
         Vals.push_back(Flags);
       }
     }
     break;
 
   case Instruction::GetElementPtr: {
     Code = bitc::FUNC_CODE_INST_GEP;
     AbbrevToUse = FUNCTION_INST_GEP_ABBREV;
     auto &GEPInst = cast<GetElementPtrInst>(I);
     Vals.push_back(GEPInst.isInBounds());
     Vals.push_back(VE.getTypeID(GEPInst.getSourceElementType()));
     for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
       pushValueAndType(I.getOperand(i), InstID, Vals);
     break;
   }
   case Instruction::ExtractValue: {
     Code = bitc::FUNC_CODE_INST_EXTRACTVAL;
     pushValueAndType(I.getOperand(0), InstID, Vals);
     const ExtractValueInst *EVI = cast<ExtractValueInst>(&I);
     Vals.append(EVI->idx_begin(), EVI->idx_end());
     break;
   }
   case Instruction::InsertValue: {
     Code = bitc::FUNC_CODE_INST_INSERTVAL;
     pushValueAndType(I.getOperand(0), InstID, Vals);
     pushValueAndType(I.getOperand(1), InstID, Vals);
     const InsertValueInst *IVI = cast<InsertValueInst>(&I);
     Vals.append(IVI->idx_begin(), IVI->idx_end());
     break;
   }
   case Instruction::Select:
     Code = bitc::FUNC_CODE_INST_VSELECT;
     pushValueAndType(I.getOperand(1), InstID, Vals);
     pushValue(I.getOperand(2), InstID, Vals);
     pushValueAndType(I.getOperand(0), InstID, Vals);
     break;
   case Instruction::ExtractElement:
     Code = bitc::FUNC_CODE_INST_EXTRACTELT;
     pushValueAndType(I.getOperand(0), InstID, Vals);
     pushValueAndType(I.getOperand(1), InstID, Vals);
     break;
   case Instruction::InsertElement:
     Code = bitc::FUNC_CODE_INST_INSERTELT;
     pushValueAndType(I.getOperand(0), InstID, Vals);
     pushValue(I.getOperand(1), InstID, Vals);
     pushValueAndType(I.getOperand(2), InstID, Vals);
     break;
   case Instruction::ShuffleVector:
     Code = bitc::FUNC_CODE_INST_SHUFFLEVEC;
     pushValueAndType(I.getOperand(0), InstID, Vals);
     pushValue(I.getOperand(1), InstID, Vals);
     pushValue(I.getOperand(2), InstID, Vals);
     break;
   case Instruction::ICmp:
   case Instruction::FCmp: {
     // compare returning Int1Ty or vector of Int1Ty
     Code = bitc::FUNC_CODE_INST_CMP2;
     pushValueAndType(I.getOperand(0), InstID, Vals);
     pushValue(I.getOperand(1), InstID, Vals);
     Vals.push_back(cast<CmpInst>(I).getPredicate());
     uint64_t Flags = getOptimizationFlags(&I);
     if (Flags != 0)
       Vals.push_back(Flags);
     break;
   }
 
   case Instruction::Ret:
     {
       Code = bitc::FUNC_CODE_INST_RET;
       unsigned NumOperands = I.getNumOperands();
       if (NumOperands == 0)
         AbbrevToUse = FUNCTION_INST_RET_VOID_ABBREV;
       else if (NumOperands == 1) {
         if (!pushValueAndType(I.getOperand(0), InstID, Vals))
           AbbrevToUse = FUNCTION_INST_RET_VAL_ABBREV;
       } else {
         for (unsigned i = 0, e = NumOperands; i != e; ++i)
           pushValueAndType(I.getOperand(i), InstID, Vals);
       }
     }
     break;
   case Instruction::Br:
     {
       Code = bitc::FUNC_CODE_INST_BR;
       const BranchInst &II = cast<BranchInst>(I);
       Vals.push_back(VE.getValueID(II.getSuccessor(0)));
       if (II.isConditional()) {
         Vals.push_back(VE.getValueID(II.getSuccessor(1)));
         pushValue(II.getCondition(), InstID, Vals);
       }
     }
     break;
   case Instruction::Switch:
     {
       Code = bitc::FUNC_CODE_INST_SWITCH;
       const SwitchInst &SI = cast<SwitchInst>(I);
       Vals.push_back(VE.getTypeID(SI.getCondition()->getType()));
       pushValue(SI.getCondition(), InstID, Vals);
       Vals.push_back(VE.getValueID(SI.getDefaultDest()));
       for (auto Case : SI.cases()) {
         Vals.push_back(VE.getValueID(Case.getCaseValue()));
         Vals.push_back(VE.getValueID(Case.getCaseSuccessor()));
       }
     }
     break;
   case Instruction::IndirectBr:
     Code = bitc::FUNC_CODE_INST_INDIRECTBR;
     Vals.push_back(VE.getTypeID(I.getOperand(0)->getType()));
     // Encode the address operand as relative, but not the basic blocks.
     pushValue(I.getOperand(0), InstID, Vals);
     for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i)
       Vals.push_back(VE.getValueID(I.getOperand(i)));
     break;
 
   case Instruction::Invoke: {
     const InvokeInst *II = cast<InvokeInst>(&I);
     const Value *Callee = II->getCalledValue();
     FunctionType *FTy = II->getFunctionType();
 
     if (II->hasOperandBundles())
       writeOperandBundles(II, InstID);
 
     Code = bitc::FUNC_CODE_INST_INVOKE;
 
     Vals.push_back(VE.getAttributeListID(II->getAttributes()));
     Vals.push_back(II->getCallingConv() | 1 << 13);
     Vals.push_back(VE.getValueID(II->getNormalDest()));
     Vals.push_back(VE.getValueID(II->getUnwindDest()));
     Vals.push_back(VE.getTypeID(FTy));
     pushValueAndType(Callee, InstID, Vals);
 
     // Emit value #'s for the fixed parameters.
     for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
       pushValue(I.getOperand(i), InstID, Vals); // fixed param.
 
     // Emit type/value pairs for varargs params.
     if (FTy->isVarArg()) {
       for (unsigned i = FTy->getNumParams(), e = II->getNumArgOperands();
            i != e; ++i)
         pushValueAndType(I.getOperand(i), InstID, Vals); // vararg
     }
     break;
   }
   case Instruction::Resume:
     Code = bitc::FUNC_CODE_INST_RESUME;
     pushValueAndType(I.getOperand(0), InstID, Vals);
     break;
   case Instruction::CleanupRet: {
     Code = bitc::FUNC_CODE_INST_CLEANUPRET;
     const auto &CRI = cast<CleanupReturnInst>(I);
     pushValue(CRI.getCleanupPad(), InstID, Vals);
     if (CRI.hasUnwindDest())
       Vals.push_back(VE.getValueID(CRI.getUnwindDest()));
     break;
   }
   case Instruction::CatchRet: {
     Code = bitc::FUNC_CODE_INST_CATCHRET;
     const auto &CRI = cast<CatchReturnInst>(I);
     pushValue(CRI.getCatchPad(), InstID, Vals);
     Vals.push_back(VE.getValueID(CRI.getSuccessor()));
     break;
   }
   case Instruction::CleanupPad:
   case Instruction::CatchPad: {
     const auto &FuncletPad = cast<FuncletPadInst>(I);
     Code = isa<CatchPadInst>(FuncletPad) ? bitc::FUNC_CODE_INST_CATCHPAD
                                          : bitc::FUNC_CODE_INST_CLEANUPPAD;
     pushValue(FuncletPad.getParentPad(), InstID, Vals);
 
     unsigned NumArgOperands = FuncletPad.getNumArgOperands();
     Vals.push_back(NumArgOperands);
     for (unsigned Op = 0; Op != NumArgOperands; ++Op)
       pushValueAndType(FuncletPad.getArgOperand(Op), InstID, Vals);
     break;
   }
   case Instruction::CatchSwitch: {
     Code = bitc::FUNC_CODE_INST_CATCHSWITCH;
     const auto &CatchSwitch = cast<CatchSwitchInst>(I);
 
     pushValue(CatchSwitch.getParentPad(), InstID, Vals);
 
     unsigned NumHandlers = CatchSwitch.getNumHandlers();
     Vals.push_back(NumHandlers);
     for (const BasicBlock *CatchPadBB : CatchSwitch.handlers())
       Vals.push_back(VE.getValueID(CatchPadBB));
 
     if (CatchSwitch.hasUnwindDest())
       Vals.push_back(VE.getValueID(CatchSwitch.getUnwindDest()));
     break;
   }
   case Instruction::Unreachable:
     Code = bitc::FUNC_CODE_INST_UNREACHABLE;
     AbbrevToUse = FUNCTION_INST_UNREACHABLE_ABBREV;
     break;
 
   case Instruction::PHI: {
     const PHINode &PN = cast<PHINode>(I);
     Code = bitc::FUNC_CODE_INST_PHI;
     // With the newer instruction encoding, forward references could give
     // negative valued IDs.  This is most common for PHIs, so we use
     // signed VBRs.
     SmallVector<uint64_t, 128> Vals64;
     Vals64.push_back(VE.getTypeID(PN.getType()));
     for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
       pushValueSigned(PN.getIncomingValue(i), InstID, Vals64);
       Vals64.push_back(VE.getValueID(PN.getIncomingBlock(i)));
     }
     // Emit a Vals64 vector and exit.
     Stream.EmitRecord(Code, Vals64, AbbrevToUse);
     Vals64.clear();
     return;
   }
 
   case Instruction::LandingPad: {
     const LandingPadInst &LP = cast<LandingPadInst>(I);
     Code = bitc::FUNC_CODE_INST_LANDINGPAD;
     Vals.push_back(VE.getTypeID(LP.getType()));
     Vals.push_back(LP.isCleanup());
     Vals.push_back(LP.getNumClauses());
     for (unsigned I = 0, E = LP.getNumClauses(); I != E; ++I) {
       if (LP.isCatch(I))
         Vals.push_back(LandingPadInst::Catch);
       else
         Vals.push_back(LandingPadInst::Filter);
       pushValueAndType(LP.getClause(I), InstID, Vals);
     }
     break;
   }
 
   case Instruction::Alloca: {
     Code = bitc::FUNC_CODE_INST_ALLOCA;
     const AllocaInst &AI = cast<AllocaInst>(I);
     Vals.push_back(VE.getTypeID(AI.getAllocatedType()));
     Vals.push_back(VE.getTypeID(I.getOperand(0)->getType()));
     Vals.push_back(VE.getValueID(I.getOperand(0))); // size.
     unsigned AlignRecord = Log2_32(AI.getAlignment()) + 1;
     assert(Log2_32(Value::MaximumAlignment) + 1 < 1 << 5 &&
            "not enough bits for maximum alignment");
     assert(AlignRecord < 1 << 5 && "alignment greater than 1 << 64");
     AlignRecord |= AI.isUsedWithInAlloca() << 5;
     AlignRecord |= 1 << 6;
     AlignRecord |= AI.isSwiftError() << 7;
     Vals.push_back(AlignRecord);
     break;
   }
 
   case Instruction::Load:
     if (cast<LoadInst>(I).isAtomic()) {
       Code = bitc::FUNC_CODE_INST_LOADATOMIC;
       pushValueAndType(I.getOperand(0), InstID, Vals);
     } else {
       Code = bitc::FUNC_CODE_INST_LOAD;
       if (!pushValueAndType(I.getOperand(0), InstID, Vals)) // ptr
         AbbrevToUse = FUNCTION_INST_LOAD_ABBREV;
     }
     Vals.push_back(VE.getTypeID(I.getType()));
     Vals.push_back(Log2_32(cast<LoadInst>(I).getAlignment())+1);
     Vals.push_back(cast<LoadInst>(I).isVolatile());
     if (cast<LoadInst>(I).isAtomic()) {
       Vals.push_back(getEncodedOrdering(cast<LoadInst>(I).getOrdering()));
       Vals.push_back(getEncodedSyncScopeID(cast<LoadInst>(I).getSyncScopeID()));
     }
     break;
   case Instruction::Store:
     if (cast<StoreInst>(I).isAtomic())
       Code = bitc::FUNC_CODE_INST_STOREATOMIC;
     else
       Code = bitc::FUNC_CODE_INST_STORE;
     pushValueAndType(I.getOperand(1), InstID, Vals); // ptrty + ptr
     pushValueAndType(I.getOperand(0), InstID, Vals); // valty + val
     Vals.push_back(Log2_32(cast<StoreInst>(I).getAlignment())+1);
     Vals.push_back(cast<StoreInst>(I).isVolatile());
     if (cast<StoreInst>(I).isAtomic()) {
       Vals.push_back(getEncodedOrdering(cast<StoreInst>(I).getOrdering()));
       Vals.push_back(
           getEncodedSyncScopeID(cast<StoreInst>(I).getSyncScopeID()));
     }
     break;
   case Instruction::AtomicCmpXchg:
     Code = bitc::FUNC_CODE_INST_CMPXCHG;
     pushValueAndType(I.getOperand(0), InstID, Vals); // ptrty + ptr
     pushValueAndType(I.getOperand(1), InstID, Vals); // cmp.
     pushValue(I.getOperand(2), InstID, Vals);        // newval.
     Vals.push_back(cast<AtomicCmpXchgInst>(I).isVolatile());
     Vals.push_back(
         getEncodedOrdering(cast<AtomicCmpXchgInst>(I).getSuccessOrdering()));
     Vals.push_back(
         getEncodedSyncScopeID(cast<AtomicCmpXchgInst>(I).getSyncScopeID()));
     Vals.push_back(
         getEncodedOrdering(cast<AtomicCmpXchgInst>(I).getFailureOrdering()));
     Vals.push_back(cast<AtomicCmpXchgInst>(I).isWeak());
     break;
   case Instruction::AtomicRMW:
     Code = bitc::FUNC_CODE_INST_ATOMICRMW;
     pushValueAndType(I.getOperand(0), InstID, Vals); // ptrty + ptr
     pushValue(I.getOperand(1), InstID, Vals);        // val.
     Vals.push_back(
         getEncodedRMWOperation(cast<AtomicRMWInst>(I).getOperation()));
     Vals.push_back(cast<AtomicRMWInst>(I).isVolatile());
     Vals.push_back(getEncodedOrdering(cast<AtomicRMWInst>(I).getOrdering()));
     Vals.push_back(
         getEncodedSyncScopeID(cast<AtomicRMWInst>(I).getSyncScopeID()));
     break;
   case Instruction::Fence:
     Code = bitc::FUNC_CODE_INST_FENCE;
     Vals.push_back(getEncodedOrdering(cast<FenceInst>(I).getOrdering()));
     Vals.push_back(getEncodedSyncScopeID(cast<FenceInst>(I).getSyncScopeID()));
     break;
   case Instruction::Call: {
     const CallInst &CI = cast<CallInst>(I);
     FunctionType *FTy = CI.getFunctionType();
 
     if (CI.hasOperandBundles())
       writeOperandBundles(&CI, InstID);
 
     Code = bitc::FUNC_CODE_INST_CALL;
 
     Vals.push_back(VE.getAttributeListID(CI.getAttributes()));
 
     unsigned Flags = getOptimizationFlags(&I);
     Vals.push_back(CI.getCallingConv() << bitc::CALL_CCONV |
                    unsigned(CI.isTailCall()) << bitc::CALL_TAIL |
                    unsigned(CI.isMustTailCall()) << bitc::CALL_MUSTTAIL |
                    1 << bitc::CALL_EXPLICIT_TYPE |
                    unsigned(CI.isNoTailCall()) << bitc::CALL_NOTAIL |
                    unsigned(Flags != 0) << bitc::CALL_FMF);
     if (Flags != 0)
       Vals.push_back(Flags);
 
     Vals.push_back(VE.getTypeID(FTy));
     pushValueAndType(CI.getCalledValue(), InstID, Vals); // Callee
 
     // Emit value #'s for the fixed parameters.
     for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) {
       // Check for labels (can happen with asm labels).
       if (FTy->getParamType(i)->isLabelTy())
         Vals.push_back(VE.getValueID(CI.getArgOperand(i)));
       else
         pushValue(CI.getArgOperand(i), InstID, Vals); // fixed param.
     }
 
     // Emit type/value pairs for varargs params.
     if (FTy->isVarArg()) {
       for (unsigned i = FTy->getNumParams(), e = CI.getNumArgOperands();
            i != e; ++i)
         pushValueAndType(CI.getArgOperand(i), InstID, Vals); // varargs
     }
     break;
   }
   case Instruction::VAArg:
     Code = bitc::FUNC_CODE_INST_VAARG;
     Vals.push_back(VE.getTypeID(I.getOperand(0)->getType()));   // valistty
     pushValue(I.getOperand(0), InstID, Vals);                   // valist.
     Vals.push_back(VE.getTypeID(I.getType())); // restype.
     break;
   }
 
   Stream.EmitRecord(Code, Vals, AbbrevToUse);
   Vals.clear();
 }
 
 /// Write a GlobalValue VST to the module. The purpose of this data structure is
 /// to allow clients to efficiently find the function body.
 void ModuleBitcodeWriter::writeGlobalValueSymbolTable(
   DenseMap<const Function *, uint64_t> &FunctionToBitcodeIndex) {
   // Get the offset of the VST we are writing, and backpatch it into
   // the VST forward declaration record.
   uint64_t VSTOffset = Stream.GetCurrentBitNo();
   // The BitcodeStartBit was the stream offset of the identification block.
   VSTOffset -= bitcodeStartBit();
   assert((VSTOffset & 31) == 0 && "VST block not 32-bit aligned");
   // Note that we add 1 here because the offset is relative to one word
   // before the start of the identification block, which was historically
   // always the start of the regular bitcode header.
   Stream.BackpatchWord(VSTOffsetPlaceholder, VSTOffset / 32 + 1);
 
   Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4);
 
   auto Abbv = std::make_shared<BitCodeAbbrev>();
   Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset
   unsigned FnEntryAbbrev = Stream.EmitAbbrev(std::move(Abbv));
 
   for (const Function &F : M) {
     uint64_t Record[2];
 
     if (F.isDeclaration())
       continue;
 
     Record[0] = VE.getValueID(&F);
 
     // Save the word offset of the function (from the start of the
     // actual bitcode written to the stream).
     uint64_t BitcodeIndex = FunctionToBitcodeIndex[&F] - bitcodeStartBit();
     assert((BitcodeIndex & 31) == 0 && "function block not 32-bit aligned");
     // Note that we add 1 here because the offset is relative to one word
     // before the start of the identification block, which was historically
     // always the start of the regular bitcode header.
     Record[1] = BitcodeIndex / 32 + 1;
 
     Stream.EmitRecord(bitc::VST_CODE_FNENTRY, Record, FnEntryAbbrev);
   }
 
   Stream.ExitBlock();
 }
 
 /// Emit names for arguments, instructions and basic blocks in a function.
 void ModuleBitcodeWriter::writeFunctionLevelValueSymbolTable(
     const ValueSymbolTable &VST) {
   if (VST.empty())
     return;
 
   Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4);
 
   // FIXME: Set up the abbrev, we know how many values there are!
   // FIXME: We know if the type names can use 7-bit ascii.
   SmallVector<uint64_t, 64> NameVals;
 
   for (const ValueName &Name : VST) {
     // Figure out the encoding to use for the name.
     StringEncoding Bits = getStringEncoding(Name.getKey());
 
     unsigned AbbrevToUse = VST_ENTRY_8_ABBREV;
     NameVals.push_back(VE.getValueID(Name.getValue()));
 
     // VST_CODE_ENTRY:   [valueid, namechar x N]
     // VST_CODE_BBENTRY: [bbid, namechar x N]
     unsigned Code;
     if (isa<BasicBlock>(Name.getValue())) {
       Code = bitc::VST_CODE_BBENTRY;
       if (Bits == SE_Char6)
         AbbrevToUse = VST_BBENTRY_6_ABBREV;
     } else {
       Code = bitc::VST_CODE_ENTRY;
       if (Bits == SE_Char6)
         AbbrevToUse = VST_ENTRY_6_ABBREV;
       else if (Bits == SE_Fixed7)
         AbbrevToUse = VST_ENTRY_7_ABBREV;
     }
 
     for (const auto P : Name.getKey())
       NameVals.push_back((unsigned char)P);
 
     // Emit the finished record.
     Stream.EmitRecord(Code, NameVals, AbbrevToUse);
     NameVals.clear();
   }
 
   Stream.ExitBlock();
 }
 
 void ModuleBitcodeWriter::writeUseList(UseListOrder &&Order) {
   assert(Order.Shuffle.size() >= 2 && "Shuffle too small");
   unsigned Code;
   if (isa<BasicBlock>(Order.V))
     Code = bitc::USELIST_CODE_BB;
   else
     Code = bitc::USELIST_CODE_DEFAULT;
 
   SmallVector<uint64_t, 64> Record(Order.Shuffle.begin(), Order.Shuffle.end());
   Record.push_back(VE.getValueID(Order.V));
   Stream.EmitRecord(Code, Record);
 }
 
 void ModuleBitcodeWriter::writeUseListBlock(const Function *F) {
   assert(VE.shouldPreserveUseListOrder() &&
          "Expected to be preserving use-list order");
 
   auto hasMore = [&]() {
     return !VE.UseListOrders.empty() && VE.UseListOrders.back().F == F;
   };
   if (!hasMore())
     // Nothing to do.
     return;
 
   Stream.EnterSubblock(bitc::USELIST_BLOCK_ID, 3);
   while (hasMore()) {
     writeUseList(std::move(VE.UseListOrders.back()));
     VE.UseListOrders.pop_back();
   }
   Stream.ExitBlock();
 }
 
 /// Emit a function body to the module stream.
 void ModuleBitcodeWriter::writeFunction(
     const Function &F,
     DenseMap<const Function *, uint64_t> &FunctionToBitcodeIndex) {
   // Save the bitcode index of the start of this function block for recording
   // in the VST.
   FunctionToBitcodeIndex[&F] = Stream.GetCurrentBitNo();
 
   Stream.EnterSubblock(bitc::FUNCTION_BLOCK_ID, 4);
   VE.incorporateFunction(F);
 
   SmallVector<unsigned, 64> Vals;
 
   // Emit the number of basic blocks, so the reader can create them ahead of
   // time.
   Vals.push_back(VE.getBasicBlocks().size());
   Stream.EmitRecord(bitc::FUNC_CODE_DECLAREBLOCKS, Vals);
   Vals.clear();
 
   // If there are function-local constants, emit them now.
   unsigned CstStart, CstEnd;
   VE.getFunctionConstantRange(CstStart, CstEnd);
   writeConstants(CstStart, CstEnd, false);
 
   // If there is function-local metadata, emit it now.
   writeFunctionMetadata(F);
 
   // Keep a running idea of what the instruction ID is.
   unsigned InstID = CstEnd;
 
   bool NeedsMetadataAttachment = F.hasMetadata();
 
   DILocation *LastDL = nullptr;
   // Finally, emit all the instructions, in order.
   for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
     for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
          I != E; ++I) {
       writeInstruction(*I, InstID, Vals);
 
       if (!I->getType()->isVoidTy())
         ++InstID;
 
       // If the instruction has metadata, write a metadata attachment later.
       NeedsMetadataAttachment |= I->hasMetadataOtherThanDebugLoc();
 
       // If the instruction has a debug location, emit it.
       DILocation *DL = I->getDebugLoc();
       if (!DL)
         continue;
 
       if (DL == LastDL) {
         // Just repeat the same debug loc as last time.
         Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC_AGAIN, Vals);
         continue;
       }
 
       Vals.push_back(DL->getLine());
       Vals.push_back(DL->getColumn());
       Vals.push_back(VE.getMetadataOrNullID(DL->getScope()));
       Vals.push_back(VE.getMetadataOrNullID(DL->getInlinedAt()));
       Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC, Vals);
       Vals.clear();
 
       LastDL = DL;
     }
 
   // Emit names for all the instructions etc.
   if (auto *Symtab = F.getValueSymbolTable())
     writeFunctionLevelValueSymbolTable(*Symtab);
 
   if (NeedsMetadataAttachment)
     writeFunctionMetadataAttachment(F);
   if (VE.shouldPreserveUseListOrder())
     writeUseListBlock(&F);
   VE.purgeFunction();
   Stream.ExitBlock();
 }
 
 // Emit blockinfo, which defines the standard abbreviations etc.
 void ModuleBitcodeWriter::writeBlockInfo() {
   // We only want to emit block info records for blocks that have multiple
   // instances: CONSTANTS_BLOCK, FUNCTION_BLOCK and VALUE_SYMTAB_BLOCK.
   // Other blocks can define their abbrevs inline.
   Stream.EnterBlockInfoBlock();
 
   { // 8-bit fixed-width VST_CODE_ENTRY/VST_CODE_BBENTRY strings.
     auto Abbv = std::make_shared<BitCodeAbbrev>();
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
     if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID, Abbv) !=
         VST_ENTRY_8_ABBREV)
       llvm_unreachable("Unexpected abbrev ordering!");
   }
 
   { // 7-bit fixed width VST_CODE_ENTRY strings.
     auto Abbv = std::make_shared<BitCodeAbbrev>();
     Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_ENTRY));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7));
     if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID, Abbv) !=
         VST_ENTRY_7_ABBREV)
       llvm_unreachable("Unexpected abbrev ordering!");
   }
   { // 6-bit char6 VST_CODE_ENTRY strings.
     auto Abbv = std::make_shared<BitCodeAbbrev>();
     Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_ENTRY));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
     if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID, Abbv) !=
         VST_ENTRY_6_ABBREV)
       llvm_unreachable("Unexpected abbrev ordering!");
   }
   { // 6-bit char6 VST_CODE_BBENTRY strings.
     auto Abbv = std::make_shared<BitCodeAbbrev>();
     Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_BBENTRY));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
     if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID, Abbv) !=
         VST_BBENTRY_6_ABBREV)
       llvm_unreachable("Unexpected abbrev ordering!");
   }
 
   { // SETTYPE abbrev for CONSTANTS_BLOCK.
     auto Abbv = std::make_shared<BitCodeAbbrev>();
     Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_SETTYPE));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
                               VE.computeBitsRequiredForTypeIndicies()));
     if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID, Abbv) !=
         CONSTANTS_SETTYPE_ABBREV)
       llvm_unreachable("Unexpected abbrev ordering!");
   }
 
   { // INTEGER abbrev for CONSTANTS_BLOCK.
     auto Abbv = std::make_shared<BitCodeAbbrev>();
     Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_INTEGER));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
     if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID, Abbv) !=
         CONSTANTS_INTEGER_ABBREV)
       llvm_unreachable("Unexpected abbrev ordering!");
   }
 
   { // CE_CAST abbrev for CONSTANTS_BLOCK.
     auto Abbv = std::make_shared<BitCodeAbbrev>();
     Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CE_CAST));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4));  // cast opc
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,       // typeid
                               VE.computeBitsRequiredForTypeIndicies()));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));    // value id
 
     if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID, Abbv) !=
         CONSTANTS_CE_CAST_Abbrev)
       llvm_unreachable("Unexpected abbrev ordering!");
   }
   { // NULL abbrev for CONSTANTS_BLOCK.
     auto Abbv = std::make_shared<BitCodeAbbrev>();
     Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_NULL));
     if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID, Abbv) !=
         CONSTANTS_NULL_Abbrev)
       llvm_unreachable("Unexpected abbrev ordering!");
   }
 
   // FIXME: This should only use space for first class types!
 
   { // INST_LOAD abbrev for FUNCTION_BLOCK.
     auto Abbv = std::make_shared<BitCodeAbbrev>();
     Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_LOAD));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Ptr
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,    // dest ty
                               VE.computeBitsRequiredForTypeIndicies()));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // Align
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // volatile
     if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) !=
         FUNCTION_INST_LOAD_ABBREV)
       llvm_unreachable("Unexpected abbrev ordering!");
   }
   { // INST_BINOP abbrev for FUNCTION_BLOCK.
     auto Abbv = std::make_shared<BitCodeAbbrev>();
     Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_BINOP));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LHS
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // RHS
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // opc
     if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) !=
         FUNCTION_INST_BINOP_ABBREV)
       llvm_unreachable("Unexpected abbrev ordering!");
   }
   { // INST_BINOP_FLAGS abbrev for FUNCTION_BLOCK.
     auto Abbv = std::make_shared<BitCodeAbbrev>();
     Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_BINOP));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LHS
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // RHS
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // opc
-    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); // flags
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); // flags
     if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) !=
         FUNCTION_INST_BINOP_FLAGS_ABBREV)
       llvm_unreachable("Unexpected abbrev ordering!");
   }
   { // INST_CAST abbrev for FUNCTION_BLOCK.
     auto Abbv = std::make_shared<BitCodeAbbrev>();
     Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_CAST));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));    // OpVal
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,       // dest ty
                               VE.computeBitsRequiredForTypeIndicies()));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4));  // opc
     if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) !=
         FUNCTION_INST_CAST_ABBREV)
       llvm_unreachable("Unexpected abbrev ordering!");
   }
 
   { // INST_RET abbrev for FUNCTION_BLOCK.
     auto Abbv = std::make_shared<BitCodeAbbrev>();
     Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_RET));
     if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) !=
         FUNCTION_INST_RET_VOID_ABBREV)
       llvm_unreachable("Unexpected abbrev ordering!");
   }
   { // INST_RET abbrev for FUNCTION_BLOCK.
     auto Abbv = std::make_shared<BitCodeAbbrev>();
     Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_RET));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // ValID
     if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) !=
         FUNCTION_INST_RET_VAL_ABBREV)
       llvm_unreachable("Unexpected abbrev ordering!");
   }
   { // INST_UNREACHABLE abbrev for FUNCTION_BLOCK.
     auto Abbv = std::make_shared<BitCodeAbbrev>();
     Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_UNREACHABLE));
     if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) !=
         FUNCTION_INST_UNREACHABLE_ABBREV)
       llvm_unreachable("Unexpected abbrev ordering!");
   }
   {
     auto Abbv = std::make_shared<BitCodeAbbrev>();
     Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_GEP));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, // dest ty
                               Log2_32_Ceil(VE.getTypes().size() + 1)));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
     if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) !=
         FUNCTION_INST_GEP_ABBREV)
       llvm_unreachable("Unexpected abbrev ordering!");
   }
 
   Stream.ExitBlock();
 }
 
 /// Write the module path strings, currently only used when generating
 /// a combined index file.
 void IndexBitcodeWriter::writeModStrings() {
   Stream.EnterSubblock(bitc::MODULE_STRTAB_BLOCK_ID, 3);
 
   // TODO: See which abbrev sizes we actually need to emit
 
   // 8-bit fixed-width MST_ENTRY strings.
   auto Abbv = std::make_shared<BitCodeAbbrev>();
   Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_ENTRY));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
   unsigned Abbrev8Bit = Stream.EmitAbbrev(std::move(Abbv));
 
   // 7-bit fixed width MST_ENTRY strings.
   Abbv = std::make_shared<BitCodeAbbrev>();
   Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_ENTRY));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7));
   unsigned Abbrev7Bit = Stream.EmitAbbrev(std::move(Abbv));
 
   // 6-bit char6 MST_ENTRY strings.
   Abbv = std::make_shared<BitCodeAbbrev>();
   Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_ENTRY));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
   unsigned Abbrev6Bit = Stream.EmitAbbrev(std::move(Abbv));
 
   // Module Hash, 160 bits SHA1. Optionally, emitted after each MST_CODE_ENTRY.
   Abbv = std::make_shared<BitCodeAbbrev>();
   Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_HASH));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
   unsigned AbbrevHash = Stream.EmitAbbrev(std::move(Abbv));
 
   SmallVector<unsigned, 64> Vals;
   forEachModule(
       [&](const StringMapEntry<std::pair<uint64_t, ModuleHash>> &MPSE) {
         StringRef Key = MPSE.getKey();
         const auto &Value = MPSE.getValue();
         StringEncoding Bits = getStringEncoding(Key);
         unsigned AbbrevToUse = Abbrev8Bit;
         if (Bits == SE_Char6)
           AbbrevToUse = Abbrev6Bit;
         else if (Bits == SE_Fixed7)
           AbbrevToUse = Abbrev7Bit;
 
         Vals.push_back(Value.first);
         Vals.append(Key.begin(), Key.end());
 
         // Emit the finished record.
         Stream.EmitRecord(bitc::MST_CODE_ENTRY, Vals, AbbrevToUse);
 
         // Emit an optional hash for the module now
         const auto &Hash = Value.second;
         if (llvm::any_of(Hash, [](uint32_t H) { return H; })) {
           Vals.assign(Hash.begin(), Hash.end());
           // Emit the hash record.
           Stream.EmitRecord(bitc::MST_CODE_HASH, Vals, AbbrevHash);
         }
 
         Vals.clear();
       });
   Stream.ExitBlock();
 }
 
 /// Write the function type metadata related records that need to appear before
 /// a function summary entry (whether per-module or combined).
 static void writeFunctionTypeMetadataRecords(BitstreamWriter &Stream,
                                              FunctionSummary *FS) {
   if (!FS->type_tests().empty())
     Stream.EmitRecord(bitc::FS_TYPE_TESTS, FS->type_tests());
 
   SmallVector<uint64_t, 64> Record;
 
   auto WriteVFuncIdVec = [&](uint64_t Ty,
                              ArrayRef<FunctionSummary::VFuncId> VFs) {
     if (VFs.empty())
       return;
     Record.clear();
     for (auto &VF : VFs) {
       Record.push_back(VF.GUID);
       Record.push_back(VF.Offset);
     }
     Stream.EmitRecord(Ty, Record);
   };
 
   WriteVFuncIdVec(bitc::FS_TYPE_TEST_ASSUME_VCALLS,
                   FS->type_test_assume_vcalls());
   WriteVFuncIdVec(bitc::FS_TYPE_CHECKED_LOAD_VCALLS,
                   FS->type_checked_load_vcalls());
 
   auto WriteConstVCallVec = [&](uint64_t Ty,
                                 ArrayRef<FunctionSummary::ConstVCall> VCs) {
     for (auto &VC : VCs) {
       Record.clear();
       Record.push_back(VC.VFunc.GUID);
       Record.push_back(VC.VFunc.Offset);
       Record.insert(Record.end(), VC.Args.begin(), VC.Args.end());
       Stream.EmitRecord(Ty, Record);
     }
   };
 
   WriteConstVCallVec(bitc::FS_TYPE_TEST_ASSUME_CONST_VCALL,
                      FS->type_test_assume_const_vcalls());
   WriteConstVCallVec(bitc::FS_TYPE_CHECKED_LOAD_CONST_VCALL,
                      FS->type_checked_load_const_vcalls());
 }
 
 // Helper to emit a single function summary record.
 void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
     SmallVector<uint64_t, 64> &NameVals, GlobalValueSummary *Summary,
     unsigned ValueID, unsigned FSCallsAbbrev, unsigned FSCallsProfileAbbrev,
     const Function &F) {
   NameVals.push_back(ValueID);
 
   FunctionSummary *FS = cast<FunctionSummary>(Summary);
   writeFunctionTypeMetadataRecords(Stream, FS);
 
   NameVals.push_back(getEncodedGVSummaryFlags(FS->flags()));
   NameVals.push_back(FS->instCount());
   NameVals.push_back(getEncodedFFlags(FS->fflags()));
   NameVals.push_back(FS->refs().size());
 
   for (auto &RI : FS->refs())
     NameVals.push_back(VE.getValueID(RI.getValue()));
 
   bool HasProfileData = F.hasProfileData();
   for (auto &ECI : FS->calls()) {
     NameVals.push_back(getValueId(ECI.first));
     if (HasProfileData)
       NameVals.push_back(static_cast<uint8_t>(ECI.second.Hotness));
   }
 
   unsigned FSAbbrev = (HasProfileData ? FSCallsProfileAbbrev : FSCallsAbbrev);
   unsigned Code =
       (HasProfileData ? bitc::FS_PERMODULE_PROFILE : bitc::FS_PERMODULE);
 
   // Emit the finished record.
   Stream.EmitRecord(Code, NameVals, FSAbbrev);
   NameVals.clear();
 }
 
 // Collect the global value references in the given variable's initializer,
 // and emit them in a summary record.
 void ModuleBitcodeWriterBase::writeModuleLevelReferences(
     const GlobalVariable &V, SmallVector<uint64_t, 64> &NameVals,
     unsigned FSModRefsAbbrev) {
   auto VI = Index->getValueInfo(GlobalValue::getGUID(V.getName()));
   if (!VI || VI.getSummaryList().empty()) {
     // Only declarations should not have a summary (a declaration might however
     // have a summary if the def was in module level asm).
     assert(V.isDeclaration());
     return;
   }
   auto *Summary = VI.getSummaryList()[0].get();
   NameVals.push_back(VE.getValueID(&V));
   GlobalVarSummary *VS = cast<GlobalVarSummary>(Summary);
   NameVals.push_back(getEncodedGVSummaryFlags(VS->flags()));
 
   unsigned SizeBeforeRefs = NameVals.size();
   for (auto &RI : VS->refs())
     NameVals.push_back(VE.getValueID(RI.getValue()));
   // Sort the refs for determinism output, the vector returned by FS->refs() has
   // been initialized from a DenseSet.
   std::sort(NameVals.begin() + SizeBeforeRefs, NameVals.end());
 
   Stream.EmitRecord(bitc::FS_PERMODULE_GLOBALVAR_INIT_REFS, NameVals,
                     FSModRefsAbbrev);
   NameVals.clear();
 }
 
 // Current version for the summary.
 // This is bumped whenever we introduce changes in the way some record are
 // interpreted, like flags for instance.
 static const uint64_t INDEX_VERSION = 4;
 
 /// Emit the per-module summary section alongside the rest of
 /// the module's bitcode.
 void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
   // By default we compile with ThinLTO if the module has a summary, but the
   // client can request full LTO with a module flag.
   bool IsThinLTO = true;
   if (auto *MD =
           mdconst::extract_or_null<ConstantInt>(M.getModuleFlag("ThinLTO")))
     IsThinLTO = MD->getZExtValue();
   Stream.EnterSubblock(IsThinLTO ? bitc::GLOBALVAL_SUMMARY_BLOCK_ID
                                  : bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID,
                        4);
 
   Stream.EmitRecord(bitc::FS_VERSION, ArrayRef<uint64_t>{INDEX_VERSION});
 
   if (Index->begin() == Index->end()) {
     Stream.ExitBlock();
     return;
   }
 
   for (const auto &GVI : valueIds()) {
     Stream.EmitRecord(bitc::FS_VALUE_GUID,
                       ArrayRef<uint64_t>{GVI.second, GVI.first});
   }
 
   // Abbrev for FS_PERMODULE.
   auto Abbv = std::make_shared<BitCodeAbbrev>();
   Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // valueid
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));   // flags
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // instcount
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // fflags
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // numrefs
   // numrefs x valueid, n x (valueid)
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
   unsigned FSCallsAbbrev = Stream.EmitAbbrev(std::move(Abbv));
 
   // Abbrev for FS_PERMODULE_PROFILE.
   Abbv = std::make_shared<BitCodeAbbrev>();
   Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_PROFILE));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // valueid
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));   // flags
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // instcount
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // fflags
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // numrefs
   // numrefs x valueid, n x (valueid, hotness)
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
   unsigned FSCallsProfileAbbrev = Stream.EmitAbbrev(std::move(Abbv));
 
   // Abbrev for FS_PERMODULE_GLOBALVAR_INIT_REFS.
   Abbv = std::make_shared<BitCodeAbbrev>();
   Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_GLOBALVAR_INIT_REFS));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));  // valueids
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
   unsigned FSModRefsAbbrev = Stream.EmitAbbrev(std::move(Abbv));
 
   // Abbrev for FS_ALIAS.
   Abbv = std::make_shared<BitCodeAbbrev>();
   Abbv->Add(BitCodeAbbrevOp(bitc::FS_ALIAS));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // valueid
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));   // flags
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // valueid
   unsigned FSAliasAbbrev = Stream.EmitAbbrev(std::move(Abbv));
 
   SmallVector<uint64_t, 64> NameVals;
   // Iterate over the list of functions instead of the Index to
   // ensure the ordering is stable.
   for (const Function &F : M) {
     // Summary emission does not support anonymous functions, they have to
     // renamed using the anonymous function renaming pass.
     if (!F.hasName())
       report_fatal_error("Unexpected anonymous function when writing summary");
 
     ValueInfo VI = Index->getValueInfo(GlobalValue::getGUID(F.getName()));
     if (!VI || VI.getSummaryList().empty()) {
       // Only declarations should not have a summary (a declaration might
       // however have a summary if the def was in module level asm).
       assert(F.isDeclaration());
       continue;
     }
     auto *Summary = VI.getSummaryList()[0].get();
     writePerModuleFunctionSummaryRecord(NameVals, Summary, VE.getValueID(&F),
                                         FSCallsAbbrev, FSCallsProfileAbbrev, F);
   }
 
   // Capture references from GlobalVariable initializers, which are outside
   // of a function scope.
   for (const GlobalVariable &G : M.globals())
     writeModuleLevelReferences(G, NameVals, FSModRefsAbbrev);
 
   for (const GlobalAlias &A : M.aliases()) {
     auto *Aliasee = A.getBaseObject();
     if (!Aliasee->hasName())
       // Nameless function don't have an entry in the summary, skip it.
       continue;
     auto AliasId = VE.getValueID(&A);
     auto AliaseeId = VE.getValueID(Aliasee);
     NameVals.push_back(AliasId);
     auto *Summary = Index->getGlobalValueSummary(A);
     AliasSummary *AS = cast<AliasSummary>(Summary);
     NameVals.push_back(getEncodedGVSummaryFlags(AS->flags()));
     NameVals.push_back(AliaseeId);
     Stream.EmitRecord(bitc::FS_ALIAS, NameVals, FSAliasAbbrev);
     NameVals.clear();
   }
 
   Stream.ExitBlock();
 }
 
 /// Emit the combined summary section into the combined index file.
 void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
   Stream.EnterSubblock(bitc::GLOBALVAL_SUMMARY_BLOCK_ID, 3);
   Stream.EmitRecord(bitc::FS_VERSION, ArrayRef<uint64_t>{INDEX_VERSION});
 
   for (const auto &GVI : valueIds()) {
     Stream.EmitRecord(bitc::FS_VALUE_GUID,
                       ArrayRef<uint64_t>{GVI.second, GVI.first});
   }
 
   // Abbrev for FS_COMBINED.
   auto Abbv = std::make_shared<BitCodeAbbrev>();
   Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // valueid
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // modid
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));   // flags
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // instcount
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // fflags
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // numrefs
   // numrefs x valueid, n x (valueid)
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
   unsigned FSCallsAbbrev = Stream.EmitAbbrev(std::move(Abbv));
 
   // Abbrev for FS_COMBINED_PROFILE.
   Abbv = std::make_shared<BitCodeAbbrev>();
   Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_PROFILE));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // valueid
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // modid
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));   // flags
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // instcount
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // fflags
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // numrefs
   // numrefs x valueid, n x (valueid, hotness)
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
   unsigned FSCallsProfileAbbrev = Stream.EmitAbbrev(std::move(Abbv));
 
   // Abbrev for FS_COMBINED_GLOBALVAR_INIT_REFS.
   Abbv = std::make_shared<BitCodeAbbrev>();
   Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_GLOBALVAR_INIT_REFS));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // valueid
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // modid
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));   // flags
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));    // valueids
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
   unsigned FSModRefsAbbrev = Stream.EmitAbbrev(std::move(Abbv));
 
   // Abbrev for FS_COMBINED_ALIAS.
   Abbv = std::make_shared<BitCodeAbbrev>();
   Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_ALIAS));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // valueid
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // modid
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));   // flags
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // valueid
   unsigned FSAliasAbbrev = Stream.EmitAbbrev(std::move(Abbv));
 
   // The aliases are emitted as a post-pass, and will point to the value
   // id of the aliasee. Save them in a vector for post-processing.
   SmallVector<AliasSummary *, 64> Aliases;
 
   // Save the value id for each summary for alias emission.
   DenseMap<const GlobalValueSummary *, unsigned> SummaryToValueIdMap;
 
   SmallVector<uint64_t, 64> NameVals;
 
   // For local linkage, we also emit the original name separately
   // immediately after the record.
   auto MaybeEmitOriginalName = [&](GlobalValueSummary &S) {
     if (!GlobalValue::isLocalLinkage(S.linkage()))
       return;
     NameVals.push_back(S.getOriginalName());
     Stream.EmitRecord(bitc::FS_COMBINED_ORIGINAL_NAME, NameVals);
     NameVals.clear();
   };
 
   forEachSummary([&](GVInfo I, bool IsAliasee) {
     GlobalValueSummary *S = I.second;
     assert(S);
 
     auto ValueId = getValueId(I.first);
     assert(ValueId);
     SummaryToValueIdMap[S] = *ValueId;
 
     // If this is invoked for an aliasee, we want to record the above
     // mapping, but then not emit a summary entry (if the aliasee is
     // to be imported, we will invoke this separately with IsAliasee=false).
     if (IsAliasee)
       return;
 
     if (auto *AS = dyn_cast<AliasSummary>(S)) {
       // Will process aliases as a post-pass because the reader wants all
       // global to be loaded first.
       Aliases.push_back(AS);
       return;
     }
 
     if (auto *VS = dyn_cast<GlobalVarSummary>(S)) {
       NameVals.push_back(*ValueId);
       NameVals.push_back(Index.getModuleId(VS->modulePath()));
       NameVals.push_back(getEncodedGVSummaryFlags(VS->flags()));
       for (auto &RI : VS->refs()) {
         auto RefValueId = getValueId(RI.getGUID());
         if (!RefValueId)
           continue;
         NameVals.push_back(*RefValueId);
       }
 
       // Emit the finished record.
       Stream.EmitRecord(bitc::FS_COMBINED_GLOBALVAR_INIT_REFS, NameVals,
                         FSModRefsAbbrev);
       NameVals.clear();
       MaybeEmitOriginalName(*S);
       return;
     }
 
     auto *FS = cast<FunctionSummary>(S);
     writeFunctionTypeMetadataRecords(Stream, FS);
 
     NameVals.push_back(*ValueId);
     NameVals.push_back(Index.getModuleId(FS->modulePath()));
     NameVals.push_back(getEncodedGVSummaryFlags(FS->flags()));
     NameVals.push_back(FS->instCount());
     NameVals.push_back(getEncodedFFlags(FS->fflags()));
     // Fill in below
     NameVals.push_back(0);
 
     unsigned Count = 0;
     for (auto &RI : FS->refs()) {
       auto RefValueId = getValueId(RI.getGUID());
       if (!RefValueId)
         continue;
       NameVals.push_back(*RefValueId);
       Count++;
     }
     NameVals[5] = Count;
 
     bool HasProfileData = false;
     for (auto &EI : FS->calls()) {
       HasProfileData |= EI.second.Hotness != CalleeInfo::HotnessType::Unknown;
       if (HasProfileData)
         break;
     }
 
     for (auto &EI : FS->calls()) {
       // If this GUID doesn't have a value id, it doesn't have a function
       // summary and we don't need to record any calls to it.
       GlobalValue::GUID GUID = EI.first.getGUID();
       auto CallValueId = getValueId(GUID);
       if (!CallValueId) {
         // For SamplePGO, the indirect call targets for local functions will
         // have its original name annotated in profile. We try to find the
         // corresponding PGOFuncName as the GUID.
         GUID = Index.getGUIDFromOriginalID(GUID);
         if (GUID == 0)
           continue;
         CallValueId = getValueId(GUID);
         if (!CallValueId)
           continue;
         // The mapping from OriginalId to GUID may return a GUID
         // that corresponds to a static variable. Filter it out here.
         // This can happen when 
         // 1) There is a call to a library function which does not have
         // a CallValidId;
         // 2) There is a static variable with the  OriginalGUID identical
         // to the GUID of the library function in 1);
         // When this happens, the logic for SamplePGO kicks in and
         // the static variable in 2) will be found, which needs to be
         // filtered out.
         auto *GVSum = Index.getGlobalValueSummary(GUID, false);
         if (GVSum &&
             GVSum->getSummaryKind() == GlobalValueSummary::GlobalVarKind)
           continue;
       }
       NameVals.push_back(*CallValueId);
       if (HasProfileData)
         NameVals.push_back(static_cast<uint8_t>(EI.second.Hotness));
     }
 
     unsigned FSAbbrev = (HasProfileData ? FSCallsProfileAbbrev : FSCallsAbbrev);
     unsigned Code =
         (HasProfileData ? bitc::FS_COMBINED_PROFILE : bitc::FS_COMBINED);
 
     // Emit the finished record.
     Stream.EmitRecord(Code, NameVals, FSAbbrev);
     NameVals.clear();
     MaybeEmitOriginalName(*S);
   });
 
   for (auto *AS : Aliases) {
     auto AliasValueId = SummaryToValueIdMap[AS];
     assert(AliasValueId);
     NameVals.push_back(AliasValueId);
     NameVals.push_back(Index.getModuleId(AS->modulePath()));
     NameVals.push_back(getEncodedGVSummaryFlags(AS->flags()));
     auto AliaseeValueId = SummaryToValueIdMap[&AS->getAliasee()];
     assert(AliaseeValueId);
     NameVals.push_back(AliaseeValueId);
 
     // Emit the finished record.
     Stream.EmitRecord(bitc::FS_COMBINED_ALIAS, NameVals, FSAliasAbbrev);
     NameVals.clear();
     MaybeEmitOriginalName(*AS);
   }
 
   if (!Index.cfiFunctionDefs().empty()) {
     for (auto &S : Index.cfiFunctionDefs()) {
       NameVals.push_back(StrtabBuilder.add(S));
       NameVals.push_back(S.size());
     }
     Stream.EmitRecord(bitc::FS_CFI_FUNCTION_DEFS, NameVals);
     NameVals.clear();
   }
 
   if (!Index.cfiFunctionDecls().empty()) {
     for (auto &S : Index.cfiFunctionDecls()) {
       NameVals.push_back(StrtabBuilder.add(S));
       NameVals.push_back(S.size());
     }
     Stream.EmitRecord(bitc::FS_CFI_FUNCTION_DECLS, NameVals);
     NameVals.clear();
   }
 
   Stream.ExitBlock();
 }
 
 /// Create the "IDENTIFICATION_BLOCK_ID" containing a single string with the
 /// current llvm version, and a record for the epoch number.
 static void writeIdentificationBlock(BitstreamWriter &Stream) {
   Stream.EnterSubblock(bitc::IDENTIFICATION_BLOCK_ID, 5);
 
   // Write the "user readable" string identifying the bitcode producer
   auto Abbv = std::make_shared<BitCodeAbbrev>();
   Abbv->Add(BitCodeAbbrevOp(bitc::IDENTIFICATION_CODE_STRING));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
   auto StringAbbrev = Stream.EmitAbbrev(std::move(Abbv));
   writeStringRecord(Stream, bitc::IDENTIFICATION_CODE_STRING,
                     "LLVM" LLVM_VERSION_STRING, StringAbbrev);
 
   // Write the epoch version
   Abbv = std::make_shared<BitCodeAbbrev>();
   Abbv->Add(BitCodeAbbrevOp(bitc::IDENTIFICATION_CODE_EPOCH));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
   auto EpochAbbrev = Stream.EmitAbbrev(std::move(Abbv));
   SmallVector<unsigned, 1> Vals = {bitc::BITCODE_CURRENT_EPOCH};
   Stream.EmitRecord(bitc::IDENTIFICATION_CODE_EPOCH, Vals, EpochAbbrev);
   Stream.ExitBlock();
 }
 
 void ModuleBitcodeWriter::writeModuleHash(size_t BlockStartPos) {
   // Emit the module's hash.
   // MODULE_CODE_HASH: [5*i32]
   if (GenerateHash) {
     uint32_t Vals[5];
     Hasher.update(ArrayRef<uint8_t>((const uint8_t *)&(Buffer)[BlockStartPos],
                                     Buffer.size() - BlockStartPos));
     StringRef Hash = Hasher.result();
     for (int Pos = 0; Pos < 20; Pos += 4) {
       Vals[Pos / 4] = support::endian::read32be(Hash.data() + Pos);
     }
 
     // Emit the finished record.
     Stream.EmitRecord(bitc::MODULE_CODE_HASH, Vals);
 
     if (ModHash)
       // Save the written hash value.
       std::copy(std::begin(Vals), std::end(Vals), std::begin(*ModHash));
   }
 }
 
 void ModuleBitcodeWriter::write() {
   writeIdentificationBlock(Stream);
 
   Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3);
   size_t BlockStartPos = Buffer.size();
 
   writeModuleVersion();
 
   // Emit blockinfo, which defines the standard abbreviations etc.
   writeBlockInfo();
 
   // Emit information about attribute groups.
   writeAttributeGroupTable();
 
   // Emit information about parameter attributes.
   writeAttributeTable();
 
   // Emit information describing all of the types in the module.
   writeTypeTable();
 
   writeComdats();
 
   // Emit top-level description of module, including target triple, inline asm,
   // descriptors for global variables, and function prototype info.
   writeModuleInfo();
 
   // Emit constants.
   writeModuleConstants();
 
   // Emit metadata kind names.
   writeModuleMetadataKinds();
 
   // Emit metadata.
   writeModuleMetadata();
 
   // Emit module-level use-lists.
   if (VE.shouldPreserveUseListOrder())
     writeUseListBlock(nullptr);
 
   writeOperandBundleTags();
   writeSyncScopeNames();
 
   // Emit function bodies.
   DenseMap<const Function *, uint64_t> FunctionToBitcodeIndex;
   for (Module::const_iterator F = M.begin(), E = M.end(); F != E; ++F)
     if (!F->isDeclaration())
       writeFunction(*F, FunctionToBitcodeIndex);
 
   // Need to write after the above call to WriteFunction which populates
   // the summary information in the index.
   if (Index)
     writePerModuleGlobalValueSummary();
 
   writeGlobalValueSymbolTable(FunctionToBitcodeIndex);
 
   writeModuleHash(BlockStartPos);
 
   Stream.ExitBlock();
 }
 
 static void writeInt32ToBuffer(uint32_t Value, SmallVectorImpl<char> &Buffer,
                                uint32_t &Position) {
   support::endian::write32le(&Buffer[Position], Value);
   Position += 4;
 }
 
 /// If generating a bc file on darwin, we have to emit a
 /// header and trailer to make it compatible with the system archiver.  To do
 /// this we emit the following header, and then emit a trailer that pads the
 /// file out to be a multiple of 16 bytes.
 ///
 /// struct bc_header {
 ///   uint32_t Magic;         // 0x0B17C0DE
 ///   uint32_t Version;       // Version, currently always 0.
 ///   uint32_t BitcodeOffset; // Offset to traditional bitcode file.
 ///   uint32_t BitcodeSize;   // Size of traditional bitcode file.
 ///   uint32_t CPUType;       // CPU specifier.
 ///   ... potentially more later ...
 /// };
 static void emitDarwinBCHeaderAndTrailer(SmallVectorImpl<char> &Buffer,
                                          const Triple &TT) {
   unsigned CPUType = ~0U;
 
   // Match x86_64-*, i[3-9]86-*, powerpc-*, powerpc64-*, arm-*, thumb-*,
   // armv[0-9]-*, thumbv[0-9]-*, armv5te-*, or armv6t2-*. The CPUType is a magic
   // number from /usr/include/mach/machine.h.  It is ok to reproduce the
   // specific constants here because they are implicitly part of the Darwin ABI.
   enum {
     DARWIN_CPU_ARCH_ABI64      = 0x01000000,
     DARWIN_CPU_TYPE_X86        = 7,
     DARWIN_CPU_TYPE_ARM        = 12,
     DARWIN_CPU_TYPE_POWERPC    = 18
   };
 
   Triple::ArchType Arch = TT.getArch();
   if (Arch == Triple::x86_64)
     CPUType = DARWIN_CPU_TYPE_X86 | DARWIN_CPU_ARCH_ABI64;
   else if (Arch == Triple::x86)
     CPUType = DARWIN_CPU_TYPE_X86;
   else if (Arch == Triple::ppc)
     CPUType = DARWIN_CPU_TYPE_POWERPC;
   else if (Arch == Triple::ppc64)
     CPUType = DARWIN_CPU_TYPE_POWERPC | DARWIN_CPU_ARCH_ABI64;
   else if (Arch == Triple::arm || Arch == Triple::thumb)
     CPUType = DARWIN_CPU_TYPE_ARM;
 
   // Traditional Bitcode starts after header.
   assert(Buffer.size() >= BWH_HeaderSize &&
          "Expected header size to be reserved");
   unsigned BCOffset = BWH_HeaderSize;
   unsigned BCSize = Buffer.size() - BWH_HeaderSize;
 
   // Write the magic and version.
   unsigned Position = 0;
   writeInt32ToBuffer(0x0B17C0DE, Buffer, Position);
   writeInt32ToBuffer(0, Buffer, Position); // Version.
   writeInt32ToBuffer(BCOffset, Buffer, Position);
   writeInt32ToBuffer(BCSize, Buffer, Position);
   writeInt32ToBuffer(CPUType, Buffer, Position);
 
   // If the file is not a multiple of 16 bytes, insert dummy padding.
   while (Buffer.size() & 15)
     Buffer.push_back(0);
 }
 
 /// Helper to write the header common to all bitcode files.
 static void writeBitcodeHeader(BitstreamWriter &Stream) {
   // Emit the file header.
   Stream.Emit((unsigned)'B', 8);
   Stream.Emit((unsigned)'C', 8);
   Stream.Emit(0x0, 4);
   Stream.Emit(0xC, 4);
   Stream.Emit(0xE, 4);
   Stream.Emit(0xD, 4);
 }
 
 BitcodeWriter::BitcodeWriter(SmallVectorImpl<char> &Buffer)
     : Buffer(Buffer), Stream(new BitstreamWriter(Buffer)) {
   writeBitcodeHeader(*Stream);
 }
 
 BitcodeWriter::~BitcodeWriter() { assert(WroteStrtab); }
 
 void BitcodeWriter::writeBlob(unsigned Block, unsigned Record, StringRef Blob) {
   Stream->EnterSubblock(Block, 3);
 
   auto Abbv = std::make_shared<BitCodeAbbrev>();
   Abbv->Add(BitCodeAbbrevOp(Record));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
   auto AbbrevNo = Stream->EmitAbbrev(std::move(Abbv));
 
   Stream->EmitRecordWithBlob(AbbrevNo, ArrayRef<uint64_t>{Record}, Blob);
 
   Stream->ExitBlock();
 }
 
 void BitcodeWriter::writeSymtab() {
   assert(!WroteStrtab && !WroteSymtab);
 
   // If any module has module-level inline asm, we will require a registered asm
   // parser for the target so that we can create an accurate symbol table for
   // the module.
   for (Module *M : Mods) {
     if (M->getModuleInlineAsm().empty())
       continue;
 
     std::string Err;
     const Triple TT(M->getTargetTriple());
     const Target *T = TargetRegistry::lookupTarget(TT.str(), Err);
     if (!T || !T->hasMCAsmParser())
       return;
   }
 
   WroteSymtab = true;
   SmallVector<char, 0> Symtab;
   // The irsymtab::build function may be unable to create a symbol table if the
   // module is malformed (e.g. it contains an invalid alias). Writing a symbol
   // table is not required for correctness, but we still want to be able to
   // write malformed modules to bitcode files, so swallow the error.
   if (Error E = irsymtab::build(Mods, Symtab, StrtabBuilder, Alloc)) {
     consumeError(std::move(E));
     return;
   }
 
   writeBlob(bitc::SYMTAB_BLOCK_ID, bitc::SYMTAB_BLOB,
             {Symtab.data(), Symtab.size()});
 }
 
 void BitcodeWriter::writeStrtab() {
   assert(!WroteStrtab);
 
   std::vector<char> Strtab;
   StrtabBuilder.finalizeInOrder();
   Strtab.resize(StrtabBuilder.getSize());
   StrtabBuilder.write((uint8_t *)Strtab.data());
 
   writeBlob(bitc::STRTAB_BLOCK_ID, bitc::STRTAB_BLOB,
             {Strtab.data(), Strtab.size()});
 
   WroteStrtab = true;
 }
 
 void BitcodeWriter::copyStrtab(StringRef Strtab) {
   writeBlob(bitc::STRTAB_BLOCK_ID, bitc::STRTAB_BLOB, Strtab);
   WroteStrtab = true;
 }
 
 void BitcodeWriter::writeModule(const Module *M,
                                 bool ShouldPreserveUseListOrder,
                                 const ModuleSummaryIndex *Index,
                                 bool GenerateHash, ModuleHash *ModHash) {
   assert(!WroteStrtab);
 
   // The Mods vector is used by irsymtab::build, which requires non-const
   // Modules in case it needs to materialize metadata. But the bitcode writer
   // requires that the module is materialized, so we can cast to non-const here,
   // after checking that it is in fact materialized.
   assert(M->isMaterialized());
   Mods.push_back(const_cast<Module *>(M));
 
   ModuleBitcodeWriter ModuleWriter(M, Buffer, StrtabBuilder, *Stream,
                                    ShouldPreserveUseListOrder, Index,
                                    GenerateHash, ModHash);
   ModuleWriter.write();
 }
 
 void BitcodeWriter::writeIndex(
     const ModuleSummaryIndex *Index,
     const std::map<std::string, GVSummaryMapTy> *ModuleToSummariesForIndex) {
   IndexBitcodeWriter IndexWriter(*Stream, StrtabBuilder, *Index,
                                  ModuleToSummariesForIndex);
   IndexWriter.write();
 }
 
 /// WriteBitcodeToFile - Write the specified module to the specified output
 /// stream.
 void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out,
                               bool ShouldPreserveUseListOrder,
                               const ModuleSummaryIndex *Index,
                               bool GenerateHash, ModuleHash *ModHash) {
   SmallVector<char, 0> Buffer;
   Buffer.reserve(256*1024);
 
   // If this is darwin or another generic macho target, reserve space for the
   // header.
   Triple TT(M->getTargetTriple());
   if (TT.isOSDarwin() || TT.isOSBinFormatMachO())
     Buffer.insert(Buffer.begin(), BWH_HeaderSize, 0);
 
   BitcodeWriter Writer(Buffer);
   Writer.writeModule(M, ShouldPreserveUseListOrder, Index, GenerateHash,
                      ModHash);
   Writer.writeSymtab();
   Writer.writeStrtab();
 
   if (TT.isOSDarwin() || TT.isOSBinFormatMachO())
     emitDarwinBCHeaderAndTrailer(Buffer, TT);
 
   // Write the generated bitstream to "Out".
   Out.write((char*)&Buffer.front(), Buffer.size());
 }
 
 void IndexBitcodeWriter::write() {
   Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3);
 
   writeModuleVersion();
 
   // Write the module paths in the combined index.
   writeModStrings();
 
   // Write the summary combined index records.
   writeCombinedGlobalValueSummary();
 
   Stream.ExitBlock();
 }
 
 // Write the specified module summary index to the given raw output stream,
 // where it will be written in a new bitcode block. This is used when
 // writing the combined index file for ThinLTO. When writing a subset of the
 // index for a distributed backend, provide a \p ModuleToSummariesForIndex map.
 void llvm::WriteIndexToFile(
     const ModuleSummaryIndex &Index, raw_ostream &Out,
     const std::map<std::string, GVSummaryMapTy> *ModuleToSummariesForIndex) {
   SmallVector<char, 0> Buffer;
   Buffer.reserve(256 * 1024);
 
   BitcodeWriter Writer(Buffer);
   Writer.writeIndex(&Index, ModuleToSummariesForIndex);
   Writer.writeStrtab();
 
   Out.write((char *)&Buffer.front(), Buffer.size());
 }
 
 namespace {
 
 /// Class to manage the bitcode writing for a thin link bitcode file.
 class ThinLinkBitcodeWriter : public ModuleBitcodeWriterBase {
   /// ModHash is for use in ThinLTO incremental build, generated while writing
   /// the module bitcode file.
   const ModuleHash *ModHash;
 
 public:
   ThinLinkBitcodeWriter(const Module *M, StringTableBuilder &StrtabBuilder,
                         BitstreamWriter &Stream,
                         const ModuleSummaryIndex &Index,
                         const ModuleHash &ModHash)
       : ModuleBitcodeWriterBase(M, StrtabBuilder, Stream,
                                 /*ShouldPreserveUseListOrder=*/false, &Index),
         ModHash(&ModHash) {}
 
   void write();
 
 private:
   void writeSimplifiedModuleInfo();
 };
 
 } // end anonymous namespace
 
 // This function writes a simpilified module info for thin link bitcode file.
 // It only contains the source file name along with the name(the offset and
 // size in strtab) and linkage for global values. For the global value info
 // entry, in order to keep linkage at offset 5, there are three zeros used
 // as padding.
 void ThinLinkBitcodeWriter::writeSimplifiedModuleInfo() {
   SmallVector<unsigned, 64> Vals;
   // Emit the module's source file name.
   {
     StringEncoding Bits = getStringEncoding(M.getSourceFileName());
     BitCodeAbbrevOp AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8);
     if (Bits == SE_Char6)
       AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Char6);
     else if (Bits == SE_Fixed7)
       AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7);
 
     // MODULE_CODE_SOURCE_FILENAME: [namechar x N]
     auto Abbv = std::make_shared<BitCodeAbbrev>();
     Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_SOURCE_FILENAME));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
     Abbv->Add(AbbrevOpToUse);
     unsigned FilenameAbbrev = Stream.EmitAbbrev(std::move(Abbv));
 
     for (const auto P : M.getSourceFileName())
       Vals.push_back((unsigned char)P);
 
     Stream.EmitRecord(bitc::MODULE_CODE_SOURCE_FILENAME, Vals, FilenameAbbrev);
     Vals.clear();
   }
 
   // Emit the global variable information.
   for (const GlobalVariable &GV : M.globals()) {
     // GLOBALVAR: [strtab offset, strtab size, 0, 0, 0, linkage]
     Vals.push_back(StrtabBuilder.add(GV.getName()));
     Vals.push_back(GV.getName().size());
     Vals.push_back(0);
     Vals.push_back(0);
     Vals.push_back(0);
     Vals.push_back(getEncodedLinkage(GV));
 
     Stream.EmitRecord(bitc::MODULE_CODE_GLOBALVAR, Vals);
     Vals.clear();
   }
 
   // Emit the function proto information.
   for (const Function &F : M) {
     // FUNCTION:  [strtab offset, strtab size, 0, 0, 0, linkage]
     Vals.push_back(StrtabBuilder.add(F.getName()));
     Vals.push_back(F.getName().size());
     Vals.push_back(0);
     Vals.push_back(0);
     Vals.push_back(0);
     Vals.push_back(getEncodedLinkage(F));
 
     Stream.EmitRecord(bitc::MODULE_CODE_FUNCTION, Vals);
     Vals.clear();
   }
 
   // Emit the alias information.
   for (const GlobalAlias &A : M.aliases()) {
     // ALIAS: [strtab offset, strtab size, 0, 0, 0, linkage]
     Vals.push_back(StrtabBuilder.add(A.getName()));
     Vals.push_back(A.getName().size());
     Vals.push_back(0);
     Vals.push_back(0);
     Vals.push_back(0);
     Vals.push_back(getEncodedLinkage(A));
 
     Stream.EmitRecord(bitc::MODULE_CODE_ALIAS, Vals);
     Vals.clear();
   }
 
   // Emit the ifunc information.
   for (const GlobalIFunc &I : M.ifuncs()) {
     // IFUNC: [strtab offset, strtab size, 0, 0, 0, linkage]
     Vals.push_back(StrtabBuilder.add(I.getName()));
     Vals.push_back(I.getName().size());
     Vals.push_back(0);
     Vals.push_back(0);
     Vals.push_back(0);
     Vals.push_back(getEncodedLinkage(I));
 
     Stream.EmitRecord(bitc::MODULE_CODE_IFUNC, Vals);
     Vals.clear();
   }
 }
 
 void ThinLinkBitcodeWriter::write() {
   Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3);
 
   writeModuleVersion();
 
   writeSimplifiedModuleInfo();
 
   writePerModuleGlobalValueSummary();
 
   // Write module hash.
   Stream.EmitRecord(bitc::MODULE_CODE_HASH, ArrayRef<uint32_t>(*ModHash));
 
   Stream.ExitBlock();
 }
 
 void BitcodeWriter::writeThinLinkBitcode(const Module *M,
                                          const ModuleSummaryIndex &Index,
                                          const ModuleHash &ModHash) {
   assert(!WroteStrtab);
 
   // The Mods vector is used by irsymtab::build, which requires non-const
   // Modules in case it needs to materialize metadata. But the bitcode writer
   // requires that the module is materialized, so we can cast to non-const here,
   // after checking that it is in fact materialized.
   assert(M->isMaterialized());
   Mods.push_back(const_cast<Module *>(M));
 
   ThinLinkBitcodeWriter ThinLinkWriter(M, StrtabBuilder, *Stream, Index,
                                        ModHash);
   ThinLinkWriter.write();
 }
 
 // Write the specified thin link bitcode file to the given raw output stream,
 // where it will be written in a new bitcode block. This is used when
 // writing the per-module index file for ThinLTO.
 void llvm::WriteThinLinkBitcodeToFile(const Module *M, raw_ostream &Out,
                                       const ModuleSummaryIndex &Index,
                                       const ModuleHash &ModHash) {
   SmallVector<char, 0> Buffer;
   Buffer.reserve(256 * 1024);
 
   BitcodeWriter Writer(Buffer);
   Writer.writeThinLinkBitcode(M, Index, ModHash);
   Writer.writeSymtab();
   Writer.writeStrtab();
 
   Out.write((char *)&Buffer.front(), Buffer.size());
 }
Index: head/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
===================================================================
--- head/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp	(revision 329982)
+++ head/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp	(revision 329983)
@@ -1,1572 +1,1633 @@
 //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 /// \file
 /// This file implements the targeting of the InstructionSelector class for
 /// AArch64.
 /// \todo This should be generated by TableGen.
 //===----------------------------------------------------------------------===//
 
 #include "AArch64InstrInfo.h"
 #include "AArch64MachineFunctionInfo.h"
 #include "AArch64RegisterBankInfo.h"
 #include "AArch64RegisterInfo.h"
 #include "AArch64Subtarget.h"
 #include "AArch64TargetMachine.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
 #include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
 #define DEBUG_TYPE "aarch64-isel"
 
 using namespace llvm;
 
 namespace {
 
 #define GET_GLOBALISEL_PREDICATE_BITSET
 #include "AArch64GenGlobalISel.inc"
 #undef GET_GLOBALISEL_PREDICATE_BITSET
 
 class AArch64InstructionSelector : public InstructionSelector {
 public:
   AArch64InstructionSelector(const AArch64TargetMachine &TM,
                              const AArch64Subtarget &STI,
                              const AArch64RegisterBankInfo &RBI);
 
   bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const override;
   static const char *getName() { return DEBUG_TYPE; }
 
 private:
   /// tblgen-erated 'select' implementation, used as the initial selector for
   /// the patterns that don't require complex C++.
   bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
 
   bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
                           MachineRegisterInfo &MRI) const;
   bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
                            MachineRegisterInfo &MRI) const;
 
   bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
                            MachineRegisterInfo &MRI) const;
 
   ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
 
   ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
                                             unsigned Size) const;
 
   ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
     return selectAddrModeUnscaled(Root, 1);
   }
   ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
     return selectAddrModeUnscaled(Root, 2);
   }
   ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
     return selectAddrModeUnscaled(Root, 4);
   }
   ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
     return selectAddrModeUnscaled(Root, 8);
   }
   ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
     return selectAddrModeUnscaled(Root, 16);
   }
 
   ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
                                            unsigned Size) const;
   template <int Width>
   ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
     return selectAddrModeIndexed(Root, Width / 8);
   }
 
   const AArch64TargetMachine &TM;
   const AArch64Subtarget &STI;
   const AArch64InstrInfo &TII;
   const AArch64RegisterInfo &TRI;
   const AArch64RegisterBankInfo &RBI;
 
 #define GET_GLOBALISEL_PREDICATES_DECL
 #include "AArch64GenGlobalISel.inc"
 #undef GET_GLOBALISEL_PREDICATES_DECL
 
 // We declare the temporaries used by selectImpl() in the class to minimize the
 // cost of constructing placeholder values.
 #define GET_GLOBALISEL_TEMPORARIES_DECL
 #include "AArch64GenGlobalISel.inc"
 #undef GET_GLOBALISEL_TEMPORARIES_DECL
 };
 
 } // end anonymous namespace
 
 #define GET_GLOBALISEL_IMPL
 #include "AArch64GenGlobalISel.inc"
 #undef GET_GLOBALISEL_IMPL
 
 AArch64InstructionSelector::AArch64InstructionSelector(
     const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
     const AArch64RegisterBankInfo &RBI)
     : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
       TRI(*STI.getRegisterInfo()), RBI(RBI),
 #define GET_GLOBALISEL_PREDICATES_INIT
 #include "AArch64GenGlobalISel.inc"
 #undef GET_GLOBALISEL_PREDICATES_INIT
 #define GET_GLOBALISEL_TEMPORARIES_INIT
 #include "AArch64GenGlobalISel.inc"
 #undef GET_GLOBALISEL_TEMPORARIES_INIT
 {
 }
 
 // FIXME: This should be target-independent, inferred from the types declared
 // for each class in the bank.
 static const TargetRegisterClass *
 getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
-                         const RegisterBankInfo &RBI) {
+                         const RegisterBankInfo &RBI,
+                         bool GetAllRegSet = false) {
   if (RB.getID() == AArch64::GPRRegBankID) {
     if (Ty.getSizeInBits() <= 32)
-      return &AArch64::GPR32RegClass;
+      return GetAllRegSet ? &AArch64::GPR32allRegClass
+                          : &AArch64::GPR32RegClass;
     if (Ty.getSizeInBits() == 64)
-      return &AArch64::GPR64RegClass;
+      return GetAllRegSet ? &AArch64::GPR64allRegClass
+                          : &AArch64::GPR64RegClass;
     return nullptr;
   }
 
   if (RB.getID() == AArch64::FPRRegBankID) {
+    if (Ty.getSizeInBits() <= 16)
+      return &AArch64::FPR16RegClass;
     if (Ty.getSizeInBits() == 32)
       return &AArch64::FPR32RegClass;
     if (Ty.getSizeInBits() == 64)
       return &AArch64::FPR64RegClass;
     if (Ty.getSizeInBits() == 128)
       return &AArch64::FPR128RegClass;
     return nullptr;
   }
 
   return nullptr;
 }
 
 /// Check whether \p I is a currently unsupported binary operation:
 /// - it has an unsized type
 /// - an operand is not a vreg
 /// - all operands are not in the same bank
 /// These are checks that should someday live in the verifier, but right now,
 /// these are mostly limitations of the aarch64 selector.
 static bool unsupportedBinOp(const MachineInstr &I,
                              const AArch64RegisterBankInfo &RBI,
                              const MachineRegisterInfo &MRI,
                              const AArch64RegisterInfo &TRI) {
   LLT Ty = MRI.getType(I.getOperand(0).getReg());
   if (!Ty.isValid()) {
     DEBUG(dbgs() << "Generic binop register should be typed\n");
     return true;
   }
 
   const RegisterBank *PrevOpBank = nullptr;
   for (auto &MO : I.operands()) {
     // FIXME: Support non-register operands.
     if (!MO.isReg()) {
       DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
       return true;
     }
 
     // FIXME: Can generic operations have physical registers operands? If
     // so, this will need to be taught about that, and we'll need to get the
     // bank out of the minimal class for the register.
     // Either way, this needs to be documented (and possibly verified).
     if (!TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
       DEBUG(dbgs() << "Generic inst has physical register operand\n");
       return true;
     }
 
     const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
     if (!OpBank) {
       DEBUG(dbgs() << "Generic register has no bank or class\n");
       return true;
     }
 
     if (PrevOpBank && OpBank != PrevOpBank) {
       DEBUG(dbgs() << "Generic inst operands have different banks\n");
       return true;
     }
     PrevOpBank = OpBank;
   }
   return false;
 }
 
 /// Select the AArch64 opcode for the basic binary operation \p GenericOpc
 /// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
 /// and of size \p OpSize.
 /// \returns \p GenericOpc if the combination is unsupported.
 static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
                                unsigned OpSize) {
   switch (RegBankID) {
   case AArch64::GPRRegBankID:
     if (OpSize == 32) {
       switch (GenericOpc) {
       case TargetOpcode::G_SHL:
         return AArch64::LSLVWr;
       case TargetOpcode::G_LSHR:
         return AArch64::LSRVWr;
       case TargetOpcode::G_ASHR:
         return AArch64::ASRVWr;
       default:
         return GenericOpc;
       }
     } else if (OpSize == 64) {
       switch (GenericOpc) {
       case TargetOpcode::G_GEP:
         return AArch64::ADDXrr;
       case TargetOpcode::G_SHL:
         return AArch64::LSLVXr;
       case TargetOpcode::G_LSHR:
         return AArch64::LSRVXr;
       case TargetOpcode::G_ASHR:
         return AArch64::ASRVXr;
       default:
         return GenericOpc;
       }
     }
     break;
   case AArch64::FPRRegBankID:
     switch (OpSize) {
     case 32:
       switch (GenericOpc) {
       case TargetOpcode::G_FADD:
         return AArch64::FADDSrr;
       case TargetOpcode::G_FSUB:
         return AArch64::FSUBSrr;
       case TargetOpcode::G_FMUL:
         return AArch64::FMULSrr;
       case TargetOpcode::G_FDIV:
         return AArch64::FDIVSrr;
       default:
         return GenericOpc;
       }
     case 64:
       switch (GenericOpc) {
       case TargetOpcode::G_FADD:
         return AArch64::FADDDrr;
       case TargetOpcode::G_FSUB:
         return AArch64::FSUBDrr;
       case TargetOpcode::G_FMUL:
         return AArch64::FMULDrr;
       case TargetOpcode::G_FDIV:
         return AArch64::FDIVDrr;
       case TargetOpcode::G_OR:
         return AArch64::ORRv8i8;
       default:
         return GenericOpc;
       }
     }
     break;
   }
   return GenericOpc;
 }
 
 /// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
 /// appropriate for the (value) register bank \p RegBankID and of memory access
 /// size \p OpSize.  This returns the variant with the base+unsigned-immediate
 /// addressing mode (e.g., LDRXui).
 /// \returns \p GenericOpc if the combination is unsupported.
 static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
                                     unsigned OpSize) {
   const bool isStore = GenericOpc == TargetOpcode::G_STORE;
   switch (RegBankID) {
   case AArch64::GPRRegBankID:
     switch (OpSize) {
     case 8:
       return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
     case 16:
       return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
     case 32:
       return isStore ? AArch64::STRWui : AArch64::LDRWui;
     case 64:
       return isStore ? AArch64::STRXui : AArch64::LDRXui;
     }
     break;
   case AArch64::FPRRegBankID:
     switch (OpSize) {
     case 8:
       return isStore ? AArch64::STRBui : AArch64::LDRBui;
     case 16:
       return isStore ? AArch64::STRHui : AArch64::LDRHui;
     case 32:
       return isStore ? AArch64::STRSui : AArch64::LDRSui;
     case 64:
       return isStore ? AArch64::STRDui : AArch64::LDRDui;
     }
     break;
   }
   return GenericOpc;
 }
 
+static bool selectFP16CopyFromGPR32(MachineInstr &I, const TargetInstrInfo &TII,
+                                    MachineRegisterInfo &MRI, unsigned SrcReg) {
+  // Copies from gpr32 to fpr16 need to use a sub-register copy.
+  unsigned CopyReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
+  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::COPY))
+      .addDef(CopyReg)
+      .addUse(SrcReg);
+  unsigned SubRegCopy = MRI.createVirtualRegister(&AArch64::FPR16RegClass);
+  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY))
+      .addDef(SubRegCopy)
+      .addUse(CopyReg, 0, AArch64::hsub);
+
+  MachineOperand &RegOp = I.getOperand(1);
+  RegOp.setReg(SubRegCopy);
+  return true;
+}
+
 static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
                        MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
                        const RegisterBankInfo &RBI) {
 
   unsigned DstReg = I.getOperand(0).getReg();
+  unsigned SrcReg = I.getOperand(1).getReg();
+
   if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
+    if (TRI.getRegClass(AArch64::FPR16RegClassID)->contains(DstReg) &&
+        !TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
+      const RegisterBank &RegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
+      const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(
+          MRI.getType(SrcReg), RegBank, RBI, /* GetAllRegSet */ true);
+      if (SrcRC == &AArch64::GPR32allRegClass)
+        return selectFP16CopyFromGPR32(I, TII, MRI, SrcReg);
+    }
     assert(I.isCopy() && "Generic operators do not allow physical registers");
     return true;
   }
 
   const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI);
   const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
-  unsigned SrcReg = I.getOperand(1).getReg();
+  (void)DstSize;
   const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
   (void)SrcSize;
   assert((!TargetRegisterInfo::isPhysicalRegister(SrcReg) || I.isCopy()) &&
          "No phys reg on generic operators");
   assert(
       (DstSize == SrcSize ||
        // Copies are a mean to setup initial types, the number of
        // bits may not exactly match.
        (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
         DstSize <= RBI.getSizeInBits(SrcReg, MRI, TRI)) ||
        // Copies are a mean to copy bits around, as long as we are
        // on the same register class, that's fine. Otherwise, that
        // means we need some SUBREG_TO_REG or AND & co.
        (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
       "Copy with different width?!");
   assert((DstSize <= 64 || RegBank.getID() == AArch64::FPRRegBankID) &&
          "GPRs cannot get more than 64-bit width values");
-  const TargetRegisterClass *RC = nullptr;
 
-  if (RegBank.getID() == AArch64::FPRRegBankID) {
-    if (DstSize <= 16)
-      RC = &AArch64::FPR16RegClass;
-    else if (DstSize <= 32)
-      RC = &AArch64::FPR32RegClass;
-    else if (DstSize <= 64)
-      RC = &AArch64::FPR64RegClass;
-    else if (DstSize <= 128)
-      RC = &AArch64::FPR128RegClass;
-    else {
-      DEBUG(dbgs() << "Unexpected bitcast size " << DstSize << '\n');
-      return false;
+  const TargetRegisterClass *RC = getRegClassForTypeOnBank(
+      MRI.getType(DstReg), RegBank, RBI, /* GetAllRegSet */ true);
+  if (!RC) {
+    DEBUG(dbgs() << "Unexpected bitcast size " << DstSize << '\n');
+    return false;
+  }
+
+  if (!TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
+    const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(SrcReg);
+    const TargetRegisterClass *SrcRC =
+        RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
+    const RegisterBank *RB = nullptr;
+    if (!SrcRC) {
+      RB = RegClassOrBank.get<const RegisterBank *>();
+      SrcRC = getRegClassForTypeOnBank(MRI.getType(SrcReg), *RB, RBI, true);
     }
-  } else {
-    assert(RegBank.getID() == AArch64::GPRRegBankID &&
-           "Bitcast for the flags?");
-    RC =
-        DstSize <= 32 ? &AArch64::GPR32allRegClass : &AArch64::GPR64allRegClass;
+    // Copies from fpr16 to gpr32 need to use SUBREG_TO_REG.
+    if (RC == &AArch64::GPR32allRegClass && SrcRC == &AArch64::FPR16RegClass) {
+      unsigned PromoteReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
+      BuildMI(*I.getParent(), I, I.getDebugLoc(),
+              TII.get(AArch64::SUBREG_TO_REG))
+          .addDef(PromoteReg)
+          .addImm(0)
+          .addUse(SrcReg)
+          .addImm(AArch64::hsub);
+      MachineOperand &RegOp = I.getOperand(1);
+      RegOp.setReg(PromoteReg);
+    } else if (RC == &AArch64::FPR16RegClass &&
+               SrcRC == &AArch64::GPR32allRegClass) {
+      selectFP16CopyFromGPR32(I, TII, MRI, SrcReg);
+    }
   }
 
   // No need to constrain SrcReg. It will get constrained when
   // we hit another of its use or its defs.
   // Copies do not have constraints.
   if (!RBI.constrainGenericRegister(DstReg, *RC, MRI)) {
     DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
                  << " operand\n");
     return false;
   }
   I.setDesc(TII.get(AArch64::COPY));
   return true;
 }
 
 static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
   if (!DstTy.isScalar() || !SrcTy.isScalar())
     return GenericOpc;
 
   const unsigned DstSize = DstTy.getSizeInBits();
   const unsigned SrcSize = SrcTy.getSizeInBits();
 
   switch (DstSize) {
   case 32:
     switch (SrcSize) {
     case 32:
       switch (GenericOpc) {
       case TargetOpcode::G_SITOFP:
         return AArch64::SCVTFUWSri;
       case TargetOpcode::G_UITOFP:
         return AArch64::UCVTFUWSri;
       case TargetOpcode::G_FPTOSI:
         return AArch64::FCVTZSUWSr;
       case TargetOpcode::G_FPTOUI:
         return AArch64::FCVTZUUWSr;
       default:
         return GenericOpc;
       }
     case 64:
       switch (GenericOpc) {
       case TargetOpcode::G_SITOFP:
         return AArch64::SCVTFUXSri;
       case TargetOpcode::G_UITOFP:
         return AArch64::UCVTFUXSri;
       case TargetOpcode::G_FPTOSI:
         return AArch64::FCVTZSUWDr;
       case TargetOpcode::G_FPTOUI:
         return AArch64::FCVTZUUWDr;
       default:
         return GenericOpc;
       }
     default:
       return GenericOpc;
     }
   case 64:
     switch (SrcSize) {
     case 32:
       switch (GenericOpc) {
       case TargetOpcode::G_SITOFP:
         return AArch64::SCVTFUWDri;
       case TargetOpcode::G_UITOFP:
         return AArch64::UCVTFUWDri;
       case TargetOpcode::G_FPTOSI:
         return AArch64::FCVTZSUXSr;
       case TargetOpcode::G_FPTOUI:
         return AArch64::FCVTZUUXSr;
       default:
         return GenericOpc;
       }
     case 64:
       switch (GenericOpc) {
       case TargetOpcode::G_SITOFP:
         return AArch64::SCVTFUXDri;
       case TargetOpcode::G_UITOFP:
         return AArch64::UCVTFUXDri;
       case TargetOpcode::G_FPTOSI:
         return AArch64::FCVTZSUXDr;
       case TargetOpcode::G_FPTOUI:
         return AArch64::FCVTZUUXDr;
       default:
         return GenericOpc;
       }
     default:
       return GenericOpc;
     }
   default:
     return GenericOpc;
   };
   return GenericOpc;
 }
 
 static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
   switch (P) {
   default:
     llvm_unreachable("Unknown condition code!");
   case CmpInst::ICMP_NE:
     return AArch64CC::NE;
   case CmpInst::ICMP_EQ:
     return AArch64CC::EQ;
   case CmpInst::ICMP_SGT:
     return AArch64CC::GT;
   case CmpInst::ICMP_SGE:
     return AArch64CC::GE;
   case CmpInst::ICMP_SLT:
     return AArch64CC::LT;
   case CmpInst::ICMP_SLE:
     return AArch64CC::LE;
   case CmpInst::ICMP_UGT:
     return AArch64CC::HI;
   case CmpInst::ICMP_UGE:
     return AArch64CC::HS;
   case CmpInst::ICMP_ULT:
     return AArch64CC::LO;
   case CmpInst::ICMP_ULE:
     return AArch64CC::LS;
   }
 }
 
 static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
                                       AArch64CC::CondCode &CondCode,
                                       AArch64CC::CondCode &CondCode2) {
   CondCode2 = AArch64CC::AL;
   switch (P) {
   default:
     llvm_unreachable("Unknown FP condition!");
   case CmpInst::FCMP_OEQ:
     CondCode = AArch64CC::EQ;
     break;
   case CmpInst::FCMP_OGT:
     CondCode = AArch64CC::GT;
     break;
   case CmpInst::FCMP_OGE:
     CondCode = AArch64CC::GE;
     break;
   case CmpInst::FCMP_OLT:
     CondCode = AArch64CC::MI;
     break;
   case CmpInst::FCMP_OLE:
     CondCode = AArch64CC::LS;
     break;
   case CmpInst::FCMP_ONE:
     CondCode = AArch64CC::MI;
     CondCode2 = AArch64CC::GT;
     break;
   case CmpInst::FCMP_ORD:
     CondCode = AArch64CC::VC;
     break;
   case CmpInst::FCMP_UNO:
     CondCode = AArch64CC::VS;
     break;
   case CmpInst::FCMP_UEQ:
     CondCode = AArch64CC::EQ;
     CondCode2 = AArch64CC::VS;
     break;
   case CmpInst::FCMP_UGT:
     CondCode = AArch64CC::HI;
     break;
   case CmpInst::FCMP_UGE:
     CondCode = AArch64CC::PL;
     break;
   case CmpInst::FCMP_ULT:
     CondCode = AArch64CC::LT;
     break;
   case CmpInst::FCMP_ULE:
     CondCode = AArch64CC::LE;
     break;
   case CmpInst::FCMP_UNE:
     CondCode = AArch64CC::NE;
     break;
   }
 }
 
 bool AArch64InstructionSelector::selectCompareBranch(
     MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
 
   const unsigned CondReg = I.getOperand(0).getReg();
   MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
   MachineInstr *CCMI = MRI.getVRegDef(CondReg);
   if (CCMI->getOpcode() == TargetOpcode::G_TRUNC)
     CCMI = MRI.getVRegDef(CCMI->getOperand(1).getReg());
   if (CCMI->getOpcode() != TargetOpcode::G_ICMP)
     return false;
 
   unsigned LHS = CCMI->getOperand(2).getReg();
   unsigned RHS = CCMI->getOperand(3).getReg();
   if (!getConstantVRegVal(RHS, MRI))
     std::swap(RHS, LHS);
 
   const auto RHSImm = getConstantVRegVal(RHS, MRI);
   if (!RHSImm || *RHSImm != 0)
     return false;
 
   const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
   if (RB.getID() != AArch64::GPRRegBankID)
     return false;
 
   const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
   if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
     return false;
 
   const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits();
   unsigned CBOpc = 0;
   if (CmpWidth <= 32)
     CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW);
   else if (CmpWidth == 64)
     CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX);
   else
     return false;
 
   auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc))
                  .addUse(LHS)
                  .addMBB(DestMBB);
 
   constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
   I.eraseFromParent();
   return true;
 }
 
 bool AArch64InstructionSelector::selectVaStartAAPCS(
     MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
   return false;
 }
 
 bool AArch64InstructionSelector::selectVaStartDarwin(
     MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
   unsigned ListReg = I.getOperand(0).getReg();
 
   unsigned ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
 
   auto MIB =
       BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
           .addDef(ArgsAddrReg)
           .addFrameIndex(FuncInfo->getVarArgsStackIndex())
           .addImm(0)
           .addImm(0);
 
   constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
 
   MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
             .addUse(ArgsAddrReg)
             .addUse(ListReg)
             .addImm(0)
             .addMemOperand(*I.memoperands_begin());
 
   constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
   I.eraseFromParent();
   return true;
 }
 
 bool AArch64InstructionSelector::select(MachineInstr &I,
                                         CodeGenCoverage &CoverageInfo) const {
   assert(I.getParent() && "Instruction should be in a basic block!");
   assert(I.getParent()->getParent() && "Instruction should be in a function!");
 
   MachineBasicBlock &MBB = *I.getParent();
   MachineFunction &MF = *MBB.getParent();
   MachineRegisterInfo &MRI = MF.getRegInfo();
 
   unsigned Opcode = I.getOpcode();
   // G_PHI requires same handling as PHI
   if (!isPreISelGenericOpcode(Opcode) || Opcode == TargetOpcode::G_PHI) {
     // Certain non-generic instructions also need some special handling.
 
     if (Opcode ==  TargetOpcode::LOAD_STACK_GUARD)
       return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
 
     if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
       const unsigned DefReg = I.getOperand(0).getReg();
       const LLT DefTy = MRI.getType(DefReg);
 
       const TargetRegisterClass *DefRC = nullptr;
       if (TargetRegisterInfo::isPhysicalRegister(DefReg)) {
         DefRC = TRI.getRegClass(DefReg);
       } else {
         const RegClassOrRegBank &RegClassOrBank =
             MRI.getRegClassOrRegBank(DefReg);
 
         DefRC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
         if (!DefRC) {
           if (!DefTy.isValid()) {
             DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
             return false;
           }
           const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
           DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
           if (!DefRC) {
             DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
             return false;
           }
         }
       }
       I.setDesc(TII.get(TargetOpcode::PHI));
 
       return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
     }
 
     if (I.isCopy())
       return selectCopy(I, TII, MRI, TRI, RBI);
 
     return true;
   }
 
 
   if (I.getNumOperands() != I.getNumExplicitOperands()) {
     DEBUG(dbgs() << "Generic instruction has unexpected implicit operands\n");
     return false;
   }
 
   if (selectImpl(I, CoverageInfo))
     return true;
 
   LLT Ty =
       I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
 
   switch (Opcode) {
   case TargetOpcode::G_BRCOND: {
     if (Ty.getSizeInBits() > 32) {
       // We shouldn't need this on AArch64, but it would be implemented as an
       // EXTRACT_SUBREG followed by a TBNZW because TBNZX has no encoding if the
       // bit being tested is < 32.
       DEBUG(dbgs() << "G_BRCOND has type: " << Ty
                    << ", expected at most 32-bits");
       return false;
     }
 
     const unsigned CondReg = I.getOperand(0).getReg();
     MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
 
     if (selectCompareBranch(I, MF, MRI))
       return true;
 
     auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
                    .addUse(CondReg)
                    .addImm(/*bit offset=*/0)
                    .addMBB(DestMBB);
 
     I.eraseFromParent();
     return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
   }
 
   case TargetOpcode::G_BRINDIRECT: {
     I.setDesc(TII.get(AArch64::BR));
     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
   }
 
   case TargetOpcode::G_FCONSTANT:
   case TargetOpcode::G_CONSTANT: {
     const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
 
     const LLT s32 = LLT::scalar(32);
     const LLT s64 = LLT::scalar(64);
     const LLT p0 = LLT::pointer(0, 64);
 
     const unsigned DefReg = I.getOperand(0).getReg();
     const LLT DefTy = MRI.getType(DefReg);
     const unsigned DefSize = DefTy.getSizeInBits();
     const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
 
     // FIXME: Redundant check, but even less readable when factored out.
     if (isFP) {
       if (Ty != s32 && Ty != s64) {
         DEBUG(dbgs() << "Unable to materialize FP " << Ty
                      << " constant, expected: " << s32 << " or " << s64
                      << '\n');
         return false;
       }
 
       if (RB.getID() != AArch64::FPRRegBankID) {
         DEBUG(dbgs() << "Unable to materialize FP " << Ty
                      << " constant on bank: " << RB << ", expected: FPR\n");
         return false;
       }
 
       // The case when we have 0.0 is covered by tablegen. Reject it here so we
       // can be sure tablegen works correctly and isn't rescued by this code.
       if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0))
         return false;
     } else {
       // s32 and s64 are covered by tablegen.
       if (Ty != p0) {
         DEBUG(dbgs() << "Unable to materialize integer " << Ty
                      << " constant, expected: " << s32 << ", " << s64 << ", or "
                      << p0 << '\n');
         return false;
       }
 
       if (RB.getID() != AArch64::GPRRegBankID) {
         DEBUG(dbgs() << "Unable to materialize integer " << Ty
                      << " constant on bank: " << RB << ", expected: GPR\n");
         return false;
       }
     }
 
     const unsigned MovOpc =
         DefSize == 32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
 
     I.setDesc(TII.get(MovOpc));
 
     if (isFP) {
       const TargetRegisterClass &GPRRC =
           DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
       const TargetRegisterClass &FPRRC =
           DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass;
 
       const unsigned DefGPRReg = MRI.createVirtualRegister(&GPRRC);
       MachineOperand &RegOp = I.getOperand(0);
       RegOp.setReg(DefGPRReg);
 
       BuildMI(MBB, std::next(I.getIterator()), I.getDebugLoc(),
               TII.get(AArch64::COPY))
           .addDef(DefReg)
           .addUse(DefGPRReg);
 
       if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
         DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
         return false;
       }
 
       MachineOperand &ImmOp = I.getOperand(1);
       // FIXME: Is going through int64_t always correct?
       ImmOp.ChangeToImmediate(
           ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
     } else if (I.getOperand(1).isCImm()) {
       uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
       I.getOperand(1).ChangeToImmediate(Val);
     } else if (I.getOperand(1).isImm()) {
       uint64_t Val = I.getOperand(1).getImm();
       I.getOperand(1).ChangeToImmediate(Val);
     }
 
     constrainSelectedInstRegOperands(I, TII, TRI, RBI);
     return true;
   }
   case TargetOpcode::G_EXTRACT: {
     LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
+    LLT DstTy = MRI.getType(I.getOperand(0).getReg());
+    unsigned SrcSize = SrcTy.getSizeInBits();
     // Larger extracts are vectors, same-size extracts should be something else
     // by now (either split up or simplified to a COPY).
     if (SrcTy.getSizeInBits() > 64 || Ty.getSizeInBits() > 32)
       return false;
 
-    I.setDesc(TII.get(AArch64::UBFMXri));
+    I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
     MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
                                       Ty.getSizeInBits() - 1);
 
+    if (SrcSize < 64) {
+      assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
+             "unexpected G_EXTRACT types");
+      return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+    }
+
     unsigned DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
     BuildMI(MBB, std::next(I.getIterator()), I.getDebugLoc(),
             TII.get(AArch64::COPY))
         .addDef(I.getOperand(0).getReg())
         .addUse(DstReg, 0, AArch64::sub_32);
     RBI.constrainGenericRegister(I.getOperand(0).getReg(),
                                  AArch64::GPR32RegClass, MRI);
     I.getOperand(0).setReg(DstReg);
 
     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
   }
 
   case TargetOpcode::G_INSERT: {
     LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
+    LLT DstTy = MRI.getType(I.getOperand(0).getReg());
+    unsigned DstSize = DstTy.getSizeInBits();
+    (void)DstSize;
     // Larger inserts are vectors, same-size ones should be something else by
     // now (split up or turned into COPYs).
     if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
       return false;
 
-    I.setDesc(TII.get(AArch64::BFMXri));
+    I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
     unsigned LSB = I.getOperand(3).getImm();
     unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
-    I.getOperand(3).setImm((64 - LSB) % 64);
+    I.getOperand(3).setImm((DstSize - LSB) % DstSize);
     MachineInstrBuilder(MF, I).addImm(Width - 1);
+
+    if (DstSize < 64) {
+      assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
+             "unexpected G_INSERT types");
+      return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+    }
 
     unsigned SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
     BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
             TII.get(AArch64::SUBREG_TO_REG))
         .addDef(SrcReg)
         .addImm(0)
         .addUse(I.getOperand(2).getReg())
         .addImm(AArch64::sub_32);
     RBI.constrainGenericRegister(I.getOperand(2).getReg(),
                                  AArch64::GPR32RegClass, MRI);
     I.getOperand(2).setReg(SrcReg);
 
     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
   }
   case TargetOpcode::G_FRAME_INDEX: {
     // allocas and G_FRAME_INDEX are only supported in addrspace(0).
     if (Ty != LLT::pointer(0, 64)) {
       DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
             << ", expected: " << LLT::pointer(0, 64) << '\n');
       return false;
     }
     I.setDesc(TII.get(AArch64::ADDXri));
 
     // MOs for a #0 shifted immediate.
     I.addOperand(MachineOperand::CreateImm(0));
     I.addOperand(MachineOperand::CreateImm(0));
 
     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
   }
 
   case TargetOpcode::G_GLOBAL_VALUE: {
     auto GV = I.getOperand(1).getGlobal();
     if (GV->isThreadLocal()) {
       // FIXME: we don't support TLS yet.
       return false;
     }
     unsigned char OpFlags = STI.ClassifyGlobalReference(GV, TM);
     if (OpFlags & AArch64II::MO_GOT) {
       I.setDesc(TII.get(AArch64::LOADgot));
       I.getOperand(1).setTargetFlags(OpFlags);
     } else if (TM.getCodeModel() == CodeModel::Large) {
       // Materialize the global using movz/movk instructions.
       unsigned MovZDstReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
       auto InsertPt = std::next(I.getIterator());
       auto MovZ =
           BuildMI(MBB, InsertPt, I.getDebugLoc(), TII.get(AArch64::MOVZXi))
               .addDef(MovZDstReg);
       MovZ->addOperand(MF, I.getOperand(1));
       MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
                                          AArch64II::MO_NC);
       MovZ->addOperand(MF, MachineOperand::CreateImm(0));
       constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
 
       auto BuildMovK = [&](unsigned SrcReg, unsigned char Flags,
                            unsigned Offset, unsigned ForceDstReg) {
         unsigned DstReg =
             ForceDstReg ? ForceDstReg
                         : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
         auto MovI = BuildMI(MBB, InsertPt, MovZ->getDebugLoc(),
                             TII.get(AArch64::MOVKXi))
                         .addDef(DstReg)
                         .addReg(SrcReg);
         MovI->addOperand(MF, MachineOperand::CreateGA(
                                  GV, MovZ->getOperand(1).getOffset(), Flags));
         MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
         constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
         return DstReg;
       };
       unsigned DstReg = BuildMovK(MovZ->getOperand(0).getReg(),
                                   AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
       DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
       BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
       I.eraseFromParent();
       return true;
     } else {
       I.setDesc(TII.get(AArch64::MOVaddr));
       I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
       MachineInstrBuilder MIB(MF, I);
       MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
                            OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
     }
     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
   }
 
   case TargetOpcode::G_LOAD:
   case TargetOpcode::G_STORE: {
     LLT MemTy = Ty;
     LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
 
     if (PtrTy != LLT::pointer(0, 64)) {
       DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
                    << ", expected: " << LLT::pointer(0, 64) << '\n');
       return false;
     }
 
     auto &MemOp = **I.memoperands_begin();
     if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) {
       DEBUG(dbgs() << "Atomic load/store not supported yet\n");
       return false;
     }
 
     // FIXME: PR36018: Volatile loads in some cases are incorrectly selected by
     // folding with an extend. Until we have a G_SEXTLOAD solution bail out if
     // we hit one.
     if (Opcode == TargetOpcode::G_LOAD && MemOp.isVolatile())
       return false;
 
     const unsigned PtrReg = I.getOperand(1).getReg();
 #ifndef NDEBUG
     const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
     // Sanity-check the pointer register.
     assert(PtrRB.getID() == AArch64::GPRRegBankID &&
            "Load/Store pointer operand isn't a GPR");
     assert(MRI.getType(PtrReg).isPointer() &&
            "Load/Store pointer operand isn't a pointer");
 #endif
 
     const unsigned ValReg = I.getOperand(0).getReg();
     const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
 
     const unsigned NewOpc =
         selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemTy.getSizeInBits());
     if (NewOpc == I.getOpcode())
       return false;
 
     I.setDesc(TII.get(NewOpc));
 
     uint64_t Offset = 0;
     auto *PtrMI = MRI.getVRegDef(PtrReg);
 
     // Try to fold a GEP into our unsigned immediate addressing mode.
     if (PtrMI->getOpcode() == TargetOpcode::G_GEP) {
       if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) {
         int64_t Imm = *COff;
         const unsigned Size = MemTy.getSizeInBits() / 8;
         const unsigned Scale = Log2_32(Size);
         if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) {
           unsigned Ptr2Reg = PtrMI->getOperand(1).getReg();
           I.getOperand(1).setReg(Ptr2Reg);
           PtrMI = MRI.getVRegDef(Ptr2Reg);
           Offset = Imm / Size;
         }
       }
     }
 
     // If we haven't folded anything into our addressing mode yet, try to fold
     // a frame index into the base+offset.
     if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX)
       I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex());
 
     I.addOperand(MachineOperand::CreateImm(Offset));
 
     // If we're storing a 0, use WZR/XZR.
     if (auto CVal = getConstantVRegVal(ValReg, MRI)) {
       if (*CVal == 0 && Opcode == TargetOpcode::G_STORE) {
         if (I.getOpcode() == AArch64::STRWui)
           I.getOperand(0).setReg(AArch64::WZR);
         else if (I.getOpcode() == AArch64::STRXui)
           I.getOperand(0).setReg(AArch64::XZR);
       }
     }
 
     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
   }
 
   case TargetOpcode::G_SMULH:
   case TargetOpcode::G_UMULH: {
     // Reject the various things we don't support yet.
     if (unsupportedBinOp(I, RBI, MRI, TRI))
       return false;
 
     const unsigned DefReg = I.getOperand(0).getReg();
     const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
 
     if (RB.getID() != AArch64::GPRRegBankID) {
       DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
       return false;
     }
 
     if (Ty != LLT::scalar(64)) {
       DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
                    << ", expected: " << LLT::scalar(64) << '\n');
       return false;
     }
 
     unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
                                                              : AArch64::UMULHrr;
     I.setDesc(TII.get(NewOpc));
 
     // Now that we selected an opcode, we need to constrain the register
     // operands to use appropriate classes.
     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
   }
   case TargetOpcode::G_FADD:
   case TargetOpcode::G_FSUB:
   case TargetOpcode::G_FMUL:
   case TargetOpcode::G_FDIV:
 
   case TargetOpcode::G_OR:
   case TargetOpcode::G_SHL:
   case TargetOpcode::G_LSHR:
   case TargetOpcode::G_ASHR:
   case TargetOpcode::G_GEP: {
     // Reject the various things we don't support yet.
     if (unsupportedBinOp(I, RBI, MRI, TRI))
       return false;
 
     const unsigned OpSize = Ty.getSizeInBits();
 
     const unsigned DefReg = I.getOperand(0).getReg();
     const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
 
     const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
     if (NewOpc == I.getOpcode())
       return false;
 
     I.setDesc(TII.get(NewOpc));
     // FIXME: Should the type be always reset in setDesc?
 
     // Now that we selected an opcode, we need to constrain the register
     // operands to use appropriate classes.
     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
   }
 
   case TargetOpcode::G_PTR_MASK: {
     uint64_t Align = I.getOperand(2).getImm();
     if (Align >= 64 || Align == 0)
       return false;
 
     uint64_t Mask = ~((1ULL << Align) - 1);
     I.setDesc(TII.get(AArch64::ANDXri));
     I.getOperand(2).setImm(AArch64_AM::encodeLogicalImmediate(Mask, 64));
 
     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
   }
   case TargetOpcode::G_PTRTOINT:
   case TargetOpcode::G_TRUNC: {
     const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
     const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
 
     const unsigned DstReg = I.getOperand(0).getReg();
     const unsigned SrcReg = I.getOperand(1).getReg();
 
     const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
     const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
 
     if (DstRB.getID() != SrcRB.getID()) {
       DEBUG(dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
       return false;
     }
 
     if (DstRB.getID() == AArch64::GPRRegBankID) {
       const TargetRegisterClass *DstRC =
           getRegClassForTypeOnBank(DstTy, DstRB, RBI);
       if (!DstRC)
         return false;
 
       const TargetRegisterClass *SrcRC =
           getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
       if (!SrcRC)
         return false;
 
       if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
           !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
         DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
         return false;
       }
 
       if (DstRC == SrcRC) {
         // Nothing to be done
       } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
                  SrcTy == LLT::scalar(64)) {
         llvm_unreachable("TableGen can import this case");
         return false;
       } else if (DstRC == &AArch64::GPR32RegClass &&
                  SrcRC == &AArch64::GPR64RegClass) {
         I.getOperand(1).setSubReg(AArch64::sub_32);
       } else {
         DEBUG(dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
         return false;
       }
 
       I.setDesc(TII.get(TargetOpcode::COPY));
       return true;
     } else if (DstRB.getID() == AArch64::FPRRegBankID) {
       if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) {
         I.setDesc(TII.get(AArch64::XTNv4i16));
         constrainSelectedInstRegOperands(I, TII, TRI, RBI);
         return true;
       }
     }
 
     return false;
   }
 
   case TargetOpcode::G_ANYEXT: {
     const unsigned DstReg = I.getOperand(0).getReg();
     const unsigned SrcReg = I.getOperand(1).getReg();
 
     const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
     if (RBDst.getID() != AArch64::GPRRegBankID) {
       DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst << ", expected: GPR\n");
       return false;
     }
 
     const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
     if (RBSrc.getID() != AArch64::GPRRegBankID) {
       DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc << ", expected: GPR\n");
       return false;
     }
 
     const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
 
     if (DstSize == 0) {
       DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
       return false;
     }
 
     if (DstSize != 64 && DstSize > 32) {
       DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
                    << ", expected: 32 or 64\n");
       return false;
     }
     // At this point G_ANYEXT is just like a plain COPY, but we need
     // to explicitly form the 64-bit value if any.
     if (DstSize > 32) {
       unsigned ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
       BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
           .addDef(ExtSrc)
           .addImm(0)
           .addUse(SrcReg)
           .addImm(AArch64::sub_32);
       I.getOperand(1).setReg(ExtSrc);
     }
     return selectCopy(I, TII, MRI, TRI, RBI);
   }
 
   case TargetOpcode::G_ZEXT:
   case TargetOpcode::G_SEXT: {
     unsigned Opcode = I.getOpcode();
     const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
               SrcTy = MRI.getType(I.getOperand(1).getReg());
     const bool isSigned = Opcode == TargetOpcode::G_SEXT;
     const unsigned DefReg = I.getOperand(0).getReg();
     const unsigned SrcReg = I.getOperand(1).getReg();
     const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
 
     if (RB.getID() != AArch64::GPRRegBankID) {
       DEBUG(dbgs() << TII.getName(I.getOpcode()) << " on bank: " << RB
                    << ", expected: GPR\n");
       return false;
     }
 
     MachineInstr *ExtI;
     if (DstTy == LLT::scalar(64)) {
       // FIXME: Can we avoid manually doing this?
       if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, MRI)) {
         DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
                      << " operand\n");
         return false;
       }
 
       const unsigned SrcXReg =
           MRI.createVirtualRegister(&AArch64::GPR64RegClass);
       BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
           .addDef(SrcXReg)
           .addImm(0)
           .addUse(SrcReg)
           .addImm(AArch64::sub_32);
 
       const unsigned NewOpc = isSigned ? AArch64::SBFMXri : AArch64::UBFMXri;
       ExtI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc))
                  .addDef(DefReg)
                  .addUse(SrcXReg)
                  .addImm(0)
                  .addImm(SrcTy.getSizeInBits() - 1);
     } else if (DstTy.isScalar() && DstTy.getSizeInBits() <= 32) {
       const unsigned NewOpc = isSigned ? AArch64::SBFMWri : AArch64::UBFMWri;
       ExtI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc))
                  .addDef(DefReg)
                  .addUse(SrcReg)
                  .addImm(0)
                  .addImm(SrcTy.getSizeInBits() - 1);
     } else {
       return false;
     }
 
     constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
 
     I.eraseFromParent();
     return true;
   }
 
   case TargetOpcode::G_SITOFP:
   case TargetOpcode::G_UITOFP:
   case TargetOpcode::G_FPTOSI:
   case TargetOpcode::G_FPTOUI: {
     const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
               SrcTy = MRI.getType(I.getOperand(1).getReg());
     const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
     if (NewOpc == Opcode)
       return false;
 
     I.setDesc(TII.get(NewOpc));
     constrainSelectedInstRegOperands(I, TII, TRI, RBI);
 
     return true;
   }
 
 
   case TargetOpcode::G_INTTOPTR:
     // The importer is currently unable to import pointer types since they
     // didn't exist in SelectionDAG.
     return selectCopy(I, TII, MRI, TRI, RBI);
 
   case TargetOpcode::G_BITCAST:
     // Imported SelectionDAG rules can handle every bitcast except those that
     // bitcast from a type to the same type. Ideally, these shouldn't occur
     // but we might not run an optimizer that deletes them.
     if (MRI.getType(I.getOperand(0).getReg()) ==
         MRI.getType(I.getOperand(1).getReg()))
       return selectCopy(I, TII, MRI, TRI, RBI);
     return false;
 
   case TargetOpcode::G_SELECT: {
     if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
       DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
                    << ", expected: " << LLT::scalar(1) << '\n');
       return false;
     }
 
     const unsigned CondReg = I.getOperand(1).getReg();
     const unsigned TReg = I.getOperand(2).getReg();
     const unsigned FReg = I.getOperand(3).getReg();
 
     unsigned CSelOpc = 0;
 
     if (Ty == LLT::scalar(32)) {
       CSelOpc = AArch64::CSELWr;
     } else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) {
       CSelOpc = AArch64::CSELXr;
     } else {
       return false;
     }
 
     MachineInstr &TstMI =
         *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
              .addDef(AArch64::WZR)
              .addUse(CondReg)
              .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
 
     MachineInstr &CSelMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CSelOpc))
                                 .addDef(I.getOperand(0).getReg())
                                 .addUse(TReg)
                                 .addUse(FReg)
                                 .addImm(AArch64CC::NE);
 
     constrainSelectedInstRegOperands(TstMI, TII, TRI, RBI);
     constrainSelectedInstRegOperands(CSelMI, TII, TRI, RBI);
 
     I.eraseFromParent();
     return true;
   }
   case TargetOpcode::G_ICMP: {
     if (Ty != LLT::scalar(32)) {
       DEBUG(dbgs() << "G_ICMP result has type: " << Ty
                    << ", expected: " << LLT::scalar(32) << '\n');
       return false;
     }
 
     unsigned CmpOpc = 0;
     unsigned ZReg = 0;
 
     LLT CmpTy = MRI.getType(I.getOperand(2).getReg());
     if (CmpTy == LLT::scalar(32)) {
       CmpOpc = AArch64::SUBSWrr;
       ZReg = AArch64::WZR;
     } else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) {
       CmpOpc = AArch64::SUBSXrr;
       ZReg = AArch64::XZR;
     } else {
       return false;
     }
 
     // CSINC increments the result by one when the condition code is false.
     // Therefore, we have to invert the predicate to get an increment by 1 when
     // the predicate is true.
     const AArch64CC::CondCode invCC =
         changeICMPPredToAArch64CC(CmpInst::getInversePredicate(
             (CmpInst::Predicate)I.getOperand(1).getPredicate()));
 
     MachineInstr &CmpMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
                                .addDef(ZReg)
                                .addUse(I.getOperand(2).getReg())
                                .addUse(I.getOperand(3).getReg());
 
     MachineInstr &CSetMI =
         *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
              .addDef(I.getOperand(0).getReg())
              .addUse(AArch64::WZR)
              .addUse(AArch64::WZR)
              .addImm(invCC);
 
     constrainSelectedInstRegOperands(CmpMI, TII, TRI, RBI);
     constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
 
     I.eraseFromParent();
     return true;
   }
 
   case TargetOpcode::G_FCMP: {
     if (Ty != LLT::scalar(32)) {
       DEBUG(dbgs() << "G_FCMP result has type: " << Ty
                    << ", expected: " << LLT::scalar(32) << '\n');
       return false;
     }
 
     unsigned CmpOpc = 0;
     LLT CmpTy = MRI.getType(I.getOperand(2).getReg());
     if (CmpTy == LLT::scalar(32)) {
       CmpOpc = AArch64::FCMPSrr;
     } else if (CmpTy == LLT::scalar(64)) {
       CmpOpc = AArch64::FCMPDrr;
     } else {
       return false;
     }
 
     // FIXME: regbank
 
     AArch64CC::CondCode CC1, CC2;
     changeFCMPPredToAArch64CC(
         (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2);
 
     MachineInstr &CmpMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
                                .addUse(I.getOperand(2).getReg())
                                .addUse(I.getOperand(3).getReg());
 
     const unsigned DefReg = I.getOperand(0).getReg();
     unsigned Def1Reg = DefReg;
     if (CC2 != AArch64CC::AL)
       Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
 
     MachineInstr &CSetMI =
         *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
              .addDef(Def1Reg)
              .addUse(AArch64::WZR)
              .addUse(AArch64::WZR)
              .addImm(getInvertedCondCode(CC1));
 
     if (CC2 != AArch64CC::AL) {
       unsigned Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
       MachineInstr &CSet2MI =
           *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
                .addDef(Def2Reg)
                .addUse(AArch64::WZR)
                .addUse(AArch64::WZR)
                .addImm(getInvertedCondCode(CC2));
       MachineInstr &OrMI =
           *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr))
                .addDef(DefReg)
                .addUse(Def1Reg)
                .addUse(Def2Reg);
       constrainSelectedInstRegOperands(OrMI, TII, TRI, RBI);
       constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI);
     }
 
     constrainSelectedInstRegOperands(CmpMI, TII, TRI, RBI);
     constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
 
     I.eraseFromParent();
     return true;
   }
   case TargetOpcode::G_VASTART:
     return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
                                 : selectVaStartAAPCS(I, MF, MRI);
   case TargetOpcode::G_IMPLICIT_DEF:
     I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
     return true;
   }
 
   return false;
 }
 
 /// SelectArithImmed - Select an immediate value that can be represented as
 /// a 12-bit value shifted left by either 0 or 12.  If so, return true with
 /// Val set to the 12-bit value and Shift set to the shifter operand.
 InstructionSelector::ComplexRendererFns
 AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
   MachineInstr &MI = *Root.getParent();
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
   MachineRegisterInfo &MRI = MF.getRegInfo();
 
   // This function is called from the addsub_shifted_imm ComplexPattern,
   // which lists [imm] as the list of opcode it's interested in, however
   // we still need to check whether the operand is actually an immediate
   // here because the ComplexPattern opcode list is only used in
   // root-level opcode matching.
   uint64_t Immed;
   if (Root.isImm())
     Immed = Root.getImm();
   else if (Root.isCImm())
     Immed = Root.getCImm()->getZExtValue();
   else if (Root.isReg()) {
     MachineInstr *Def = MRI.getVRegDef(Root.getReg());
     if (Def->getOpcode() != TargetOpcode::G_CONSTANT)
       return None;
     MachineOperand &Op1 = Def->getOperand(1);
     if (!Op1.isCImm() || Op1.getCImm()->getBitWidth() > 64)
       return None;
     Immed = Op1.getCImm()->getZExtValue();
   } else
     return None;
 
   unsigned ShiftAmt;
 
   if (Immed >> 12 == 0) {
     ShiftAmt = 0;
   } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
     ShiftAmt = 12;
     Immed = Immed >> 12;
   } else
     return None;
 
   unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
   return {{
       [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
       [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
   }};
 }
 
 /// Select a "register plus unscaled signed 9-bit immediate" address.  This
 /// should only match when there is an offset that is not valid for a scaled
 /// immediate addressing mode.  The "Size" argument is the size in bytes of the
 /// memory reference, which is needed here to know what is valid for a scaled
 /// immediate.
 InstructionSelector::ComplexRendererFns
 AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
                                                    unsigned Size) const {
   MachineRegisterInfo &MRI =
       Root.getParent()->getParent()->getParent()->getRegInfo();
 
   if (!Root.isReg())
     return None;
 
   if (!isBaseWithConstantOffset(Root, MRI))
     return None;
 
   MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
   if (!RootDef)
     return None;
 
   MachineOperand &OffImm = RootDef->getOperand(2);
   if (!OffImm.isReg())
     return None;
   MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
   if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
     return None;
   int64_t RHSC;
   MachineOperand &RHSOp1 = RHS->getOperand(1);
   if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
     return None;
   RHSC = RHSOp1.getCImm()->getSExtValue();
 
   // If the offset is valid as a scaled immediate, don't match here.
   if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
     return None;
   if (RHSC >= -256 && RHSC < 256) {
     MachineOperand &Base = RootDef->getOperand(1);
     return {{
         [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
         [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
     }};
   }
   return None;
 }
 
 /// Select a "register plus scaled unsigned 12-bit immediate" address.  The
 /// "Size" argument is the size in bytes of the memory reference, which
 /// determines the scale.
 InstructionSelector::ComplexRendererFns
 AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
                                                   unsigned Size) const {
   MachineRegisterInfo &MRI =
       Root.getParent()->getParent()->getParent()->getRegInfo();
 
   if (!Root.isReg())
     return None;
 
   MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
   if (!RootDef)
     return None;
 
   if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
     return {{
         [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
         [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
     }};
   }
 
   if (isBaseWithConstantOffset(Root, MRI)) {
     MachineOperand &LHS = RootDef->getOperand(1);
     MachineOperand &RHS = RootDef->getOperand(2);
     MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
     MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
     if (LHSDef && RHSDef) {
       int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
       unsigned Scale = Log2_32(Size);
       if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
         if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
           return {{
               [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
               [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
           }};
 
         return {{
             [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
             [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
         }};
       }
     }
   }
 
   // Before falling back to our general case, check if the unscaled
   // instructions can handle this. If so, that's preferable.
   if (selectAddrModeUnscaled(Root, Size).hasValue())
     return None;
 
   return {{
       [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
   }};
 }
 
 namespace llvm {
 InstructionSelector *
 createAArch64InstructionSelector(const AArch64TargetMachine &TM,
                                  AArch64Subtarget &Subtarget,
                                  AArch64RegisterBankInfo &RBI) {
   return new AArch64InstructionSelector(TM, Subtarget, RBI);
 }
 }
Index: head/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
===================================================================
--- head/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp	(revision 329982)
+++ head/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp	(revision 329983)
@@ -1,4828 +1,4834 @@
 //===- SIInstrInfo.cpp - SI Instruction Information  ----------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 /// \file
 /// \brief SI Implementation of TargetInstrInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #include "SIInstrInfo.h"
 #include "AMDGPU.h"
 #include "AMDGPUSubtarget.h"
 #include "GCNHazardRecognizer.h"
 #include "SIDefines.h"
 #include "SIMachineFunctionInfo.h"
 #include "SIRegisterInfo.h"
 #include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/MemoryLocation.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/CodeGen/TargetOpcodes.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Target/TargetMachine.h"
 #include <cassert>
 #include <cstdint>
 #include <iterator>
 #include <utility>
 
 using namespace llvm;
 
 // Must be at least 4 to be able to branch over minimum unconditional branch
 // code. This is only for making it possible to write reasonably small tests for
 // long branches.
 static cl::opt<unsigned>
 BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16),
                  cl::desc("Restrict range of branch instructions (DEBUG)"));
 
 SIInstrInfo::SIInstrInfo(const SISubtarget &ST)
   : AMDGPUInstrInfo(ST), RI(ST), ST(ST) {}
 
 //===----------------------------------------------------------------------===//
 // TargetInstrInfo callbacks
 //===----------------------------------------------------------------------===//
 
 static unsigned getNumOperandsNoGlue(SDNode *Node) {
   unsigned N = Node->getNumOperands();
   while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue)
     --N;
   return N;
 }
 
 static SDValue findChainOperand(SDNode *Load) {
   SDValue LastOp = Load->getOperand(getNumOperandsNoGlue(Load) - 1);
   assert(LastOp.getValueType() == MVT::Other && "Chain missing from load node");
   return LastOp;
 }
 
 /// \brief Returns true if both nodes have the same value for the given
 ///        operand \p Op, or if both nodes do not have this operand.
 static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) {
   unsigned Opc0 = N0->getMachineOpcode();
   unsigned Opc1 = N1->getMachineOpcode();
 
   int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0, OpName);
   int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1, OpName);
 
   if (Op0Idx == -1 && Op1Idx == -1)
     return true;
 
 
   if ((Op0Idx == -1 && Op1Idx != -1) ||
       (Op1Idx == -1 && Op0Idx != -1))
     return false;
 
   // getNamedOperandIdx returns the index for the MachineInstr's operands,
   // which includes the result as the first operand. We are indexing into the
   // MachineSDNode's operands, so we need to skip the result operand to get
   // the real index.
   --Op0Idx;
   --Op1Idx;
 
   return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx);
 }
 
 bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
                                                     AliasAnalysis *AA) const {
   // TODO: The generic check fails for VALU instructions that should be
   // rematerializable due to implicit reads of exec. We really want all of the
   // generic logic for this except for this.
   switch (MI.getOpcode()) {
   case AMDGPU::V_MOV_B32_e32:
   case AMDGPU::V_MOV_B32_e64:
   case AMDGPU::V_MOV_B64_PSEUDO:
     return true;
   default:
     return false;
   }
 }
 
 bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
                                           int64_t &Offset0,
                                           int64_t &Offset1) const {
   if (!Load0->isMachineOpcode() || !Load1->isMachineOpcode())
     return false;
 
   unsigned Opc0 = Load0->getMachineOpcode();
   unsigned Opc1 = Load1->getMachineOpcode();
 
   // Make sure both are actually loads.
   if (!get(Opc0).mayLoad() || !get(Opc1).mayLoad())
     return false;
 
   if (isDS(Opc0) && isDS(Opc1)) {
 
     // FIXME: Handle this case:
     if (getNumOperandsNoGlue(Load0) != getNumOperandsNoGlue(Load1))
       return false;
 
     // Check base reg.
     if (Load0->getOperand(1) != Load1->getOperand(1))
       return false;
 
     // Check chain.
     if (findChainOperand(Load0) != findChainOperand(Load1))
       return false;
 
     // Skip read2 / write2 variants for simplicity.
     // TODO: We should report true if the used offsets are adjacent (excluded
     // st64 versions).
     if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::data1) != -1 ||
         AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::data1) != -1)
       return false;
 
     Offset0 = cast<ConstantSDNode>(Load0->getOperand(2))->getZExtValue();
     Offset1 = cast<ConstantSDNode>(Load1->getOperand(2))->getZExtValue();
     return true;
   }
 
   if (isSMRD(Opc0) && isSMRD(Opc1)) {
     // Skip time and cache invalidation instructions.
     if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::sbase) == -1 ||
         AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::sbase) == -1)
       return false;
 
     assert(getNumOperandsNoGlue(Load0) == getNumOperandsNoGlue(Load1));
 
     // Check base reg.
     if (Load0->getOperand(0) != Load1->getOperand(0))
       return false;
 
     const ConstantSDNode *Load0Offset =
         dyn_cast<ConstantSDNode>(Load0->getOperand(1));
     const ConstantSDNode *Load1Offset =
         dyn_cast<ConstantSDNode>(Load1->getOperand(1));
 
     if (!Load0Offset || !Load1Offset)
       return false;
 
     // Check chain.
     if (findChainOperand(Load0) != findChainOperand(Load1))
       return false;
 
     Offset0 = Load0Offset->getZExtValue();
     Offset1 = Load1Offset->getZExtValue();
     return true;
   }
 
   // MUBUF and MTBUF can access the same addresses.
   if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1))) {
 
     // MUBUF and MTBUF have vaddr at different indices.
     if (!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::soffset) ||
         findChainOperand(Load0) != findChainOperand(Load1) ||
         !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::vaddr) ||
         !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::srsrc))
       return false;
 
     int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
     int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
 
     if (OffIdx0 == -1 || OffIdx1 == -1)
       return false;
 
     // getNamedOperandIdx returns the index for MachineInstrs.  Since they
     // inlcude the output in the operand list, but SDNodes don't, we need to
     // subtract the index by one.
     --OffIdx0;
     --OffIdx1;
 
     SDValue Off0 = Load0->getOperand(OffIdx0);
     SDValue Off1 = Load1->getOperand(OffIdx1);
 
     // The offset might be a FrameIndexSDNode.
     if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1))
       return false;
 
     Offset0 = cast<ConstantSDNode>(Off0)->getZExtValue();
     Offset1 = cast<ConstantSDNode>(Off1)->getZExtValue();
     return true;
   }
 
   return false;
 }
 
 static bool isStride64(unsigned Opc) {
   switch (Opc) {
   case AMDGPU::DS_READ2ST64_B32:
   case AMDGPU::DS_READ2ST64_B64:
   case AMDGPU::DS_WRITE2ST64_B32:
   case AMDGPU::DS_WRITE2ST64_B64:
     return true;
   default:
     return false;
   }
 }
 
 bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
                                         int64_t &Offset,
                                         const TargetRegisterInfo *TRI) const {
   unsigned Opc = LdSt.getOpcode();
 
   if (isDS(LdSt)) {
     const MachineOperand *OffsetImm =
         getNamedOperand(LdSt, AMDGPU::OpName::offset);
     if (OffsetImm) {
       // Normal, single offset LDS instruction.
       const MachineOperand *AddrReg =
           getNamedOperand(LdSt, AMDGPU::OpName::addr);
 
       BaseReg = AddrReg->getReg();
       Offset = OffsetImm->getImm();
       return true;
     }
 
     // The 2 offset instructions use offset0 and offset1 instead. We can treat
     // these as a load with a single offset if the 2 offsets are consecutive. We
     // will use this for some partially aligned loads.
     const MachineOperand *Offset0Imm =
         getNamedOperand(LdSt, AMDGPU::OpName::offset0);
     const MachineOperand *Offset1Imm =
         getNamedOperand(LdSt, AMDGPU::OpName::offset1);
 
     uint8_t Offset0 = Offset0Imm->getImm();
     uint8_t Offset1 = Offset1Imm->getImm();
 
     if (Offset1 > Offset0 && Offset1 - Offset0 == 1) {
       // Each of these offsets is in element sized units, so we need to convert
       // to bytes of the individual reads.
 
       unsigned EltSize;
       if (LdSt.mayLoad())
         EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, 0)) / 16;
       else {
         assert(LdSt.mayStore());
         int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0);
         EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, Data0Idx)) / 8;
       }
 
       if (isStride64(Opc))
         EltSize *= 64;
 
       const MachineOperand *AddrReg =
           getNamedOperand(LdSt, AMDGPU::OpName::addr);
       BaseReg = AddrReg->getReg();
       Offset = EltSize * Offset0;
       return true;
     }
 
     return false;
   }
 
   if (isMUBUF(LdSt) || isMTBUF(LdSt)) {
     const MachineOperand *SOffset = getNamedOperand(LdSt, AMDGPU::OpName::soffset);
     if (SOffset && SOffset->isReg())
       return false;
 
     const MachineOperand *AddrReg =
         getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
     if (!AddrReg)
       return false;
 
     const MachineOperand *OffsetImm =
         getNamedOperand(LdSt, AMDGPU::OpName::offset);
     BaseReg = AddrReg->getReg();
     Offset = OffsetImm->getImm();
 
     if (SOffset) // soffset can be an inline immediate.
       Offset += SOffset->getImm();
 
     return true;
   }
 
   if (isSMRD(LdSt)) {
     const MachineOperand *OffsetImm =
         getNamedOperand(LdSt, AMDGPU::OpName::offset);
     if (!OffsetImm)
       return false;
 
     const MachineOperand *SBaseReg =
         getNamedOperand(LdSt, AMDGPU::OpName::sbase);
     BaseReg = SBaseReg->getReg();
     Offset = OffsetImm->getImm();
     return true;
   }
 
   if (isFLAT(LdSt)) {
     const MachineOperand *VAddr = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
     if (VAddr) {
       // Can't analyze 2 offsets.
       if (getNamedOperand(LdSt, AMDGPU::OpName::saddr))
         return false;
 
       BaseReg = VAddr->getReg();
     } else {
       // scratch instructions have either vaddr or saddr.
       BaseReg = getNamedOperand(LdSt, AMDGPU::OpName::saddr)->getReg();
     }
 
     Offset = getNamedOperand(LdSt, AMDGPU::OpName::offset)->getImm();
     return true;
   }
 
   return false;
 }
 
 static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, unsigned BaseReg1,
                                   const MachineInstr &MI2, unsigned BaseReg2) {
   if (BaseReg1 == BaseReg2)
     return true;
 
   if (!MI1.hasOneMemOperand() || !MI2.hasOneMemOperand())
     return false;
 
   auto MO1 = *MI1.memoperands_begin();
   auto MO2 = *MI2.memoperands_begin();
   if (MO1->getAddrSpace() != MO2->getAddrSpace())
     return false;
 
   auto Base1 = MO1->getValue();
   auto Base2 = MO2->getValue();
   if (!Base1 || !Base2)
     return false;
   const MachineFunction &MF = *MI1.getParent()->getParent();
   const DataLayout &DL = MF.getFunction().getParent()->getDataLayout();
   Base1 = GetUnderlyingObject(Base1, DL);
   Base2 = GetUnderlyingObject(Base1, DL);
 
   if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2))
     return false;
 
   return Base1 == Base2;
 }
 
 bool SIInstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
                                       unsigned BaseReg1,
                                       MachineInstr &SecondLdSt,
                                       unsigned BaseReg2,
                                       unsigned NumLoads) const {
   if (!memOpsHaveSameBasePtr(FirstLdSt, BaseReg1, SecondLdSt, BaseReg2))
     return false;
 
   const MachineOperand *FirstDst = nullptr;
   const MachineOperand *SecondDst = nullptr;
 
   if ((isMUBUF(FirstLdSt) && isMUBUF(SecondLdSt)) ||
       (isMTBUF(FirstLdSt) && isMTBUF(SecondLdSt)) ||
       (isFLAT(FirstLdSt) && isFLAT(SecondLdSt))) {
     const unsigned MaxGlobalLoadCluster = 6;
     if (NumLoads > MaxGlobalLoadCluster)
       return false;
 
     FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdata);
     if (!FirstDst)
       FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdst);
     SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdata);
     if (!SecondDst)
       SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdst);
   } else if (isSMRD(FirstLdSt) && isSMRD(SecondLdSt)) {
     FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::sdst);
     SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::sdst);
   } else if (isDS(FirstLdSt) && isDS(SecondLdSt)) {
     FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdst);
     SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdst);
   }
 
   if (!FirstDst || !SecondDst)
     return false;
 
   // Try to limit clustering based on the total number of bytes loaded
   // rather than the number of instructions.  This is done to help reduce
   // register pressure.  The method used is somewhat inexact, though,
   // because it assumes that all loads in the cluster will load the
   // same number of bytes as FirstLdSt.
 
   // The unit of this value is bytes.
   // FIXME: This needs finer tuning.
   unsigned LoadClusterThreshold = 16;
 
   const MachineRegisterInfo &MRI =
       FirstLdSt.getParent()->getParent()->getRegInfo();
   const TargetRegisterClass *DstRC = MRI.getRegClass(FirstDst->getReg());
 
   return (NumLoads * (RI.getRegSizeInBits(*DstRC) / 8)) <= LoadClusterThreshold;
 }
 
 static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator MI,
                               const DebugLoc &DL, unsigned DestReg,
                               unsigned SrcReg, bool KillSrc) {
   MachineFunction *MF = MBB.getParent();
   DiagnosticInfoUnsupported IllegalCopy(MF->getFunction(),
                                         "illegal SGPR to VGPR copy",
                                         DL, DS_Error);
   LLVMContext &C = MF->getFunction().getContext();
   C.diagnose(IllegalCopy);
 
   BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_ILLEGAL_COPY), DestReg)
     .addReg(SrcReg, getKillRegState(KillSrc));
 }
 
 void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator MI,
                               const DebugLoc &DL, unsigned DestReg,
                               unsigned SrcReg, bool KillSrc) const {
   const TargetRegisterClass *RC = RI.getPhysRegClass(DestReg);
 
   if (RC == &AMDGPU::VGPR_32RegClass) {
     assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) ||
            AMDGPU::SReg_32RegClass.contains(SrcReg));
     BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
       .addReg(SrcReg, getKillRegState(KillSrc));
     return;
   }
 
   if (RC == &AMDGPU::SReg_32_XM0RegClass ||
       RC == &AMDGPU::SReg_32RegClass) {
     if (SrcReg == AMDGPU::SCC) {
       BuildMI(MBB, MI, DL, get(AMDGPU::S_CSELECT_B32), DestReg)
           .addImm(-1)
           .addImm(0);
       return;
     }
 
     if (!AMDGPU::SReg_32RegClass.contains(SrcReg)) {
       reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
       return;
     }
 
     BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
             .addReg(SrcReg, getKillRegState(KillSrc));
     return;
   }
 
   if (RC == &AMDGPU::SReg_64RegClass) {
     if (DestReg == AMDGPU::VCC) {
       if (AMDGPU::SReg_64RegClass.contains(SrcReg)) {
         BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC)
           .addReg(SrcReg, getKillRegState(KillSrc));
       } else {
         // FIXME: Hack until VReg_1 removed.
         assert(AMDGPU::VGPR_32RegClass.contains(SrcReg));
         BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32))
           .addImm(0)
           .addReg(SrcReg, getKillRegState(KillSrc));
       }
 
       return;
     }
 
     if (!AMDGPU::SReg_64RegClass.contains(SrcReg)) {
       reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
       return;
     }
 
     BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
             .addReg(SrcReg, getKillRegState(KillSrc));
     return;
   }
 
   if (DestReg == AMDGPU::SCC) {
     assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
     BuildMI(MBB, MI, DL, get(AMDGPU::S_CMP_LG_U32))
       .addReg(SrcReg, getKillRegState(KillSrc))
       .addImm(0);
     return;
   }
 
   unsigned EltSize = 4;
   unsigned Opcode = AMDGPU::V_MOV_B32_e32;
   if (RI.isSGPRClass(RC)) {
     if (RI.getRegSizeInBits(*RC) > 32) {
       Opcode =  AMDGPU::S_MOV_B64;
       EltSize = 8;
     } else {
       Opcode = AMDGPU::S_MOV_B32;
       EltSize = 4;
     }
 
     if (!RI.isSGPRClass(RI.getPhysRegClass(SrcReg))) {
       reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
       return;
     }
   }
 
   ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RC, EltSize);
   bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
 
   for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) {
     unsigned SubIdx;
     if (Forward)
       SubIdx = SubIndices[Idx];
     else
       SubIdx = SubIndices[SubIndices.size() - Idx - 1];
 
     MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
       get(Opcode), RI.getSubReg(DestReg, SubIdx));
 
     Builder.addReg(RI.getSubReg(SrcReg, SubIdx));
 
     if (Idx == 0)
       Builder.addReg(DestReg, RegState::Define | RegState::Implicit);
 
     bool UseKill = KillSrc && Idx == SubIndices.size() - 1;
     Builder.addReg(SrcReg, getKillRegState(UseKill) | RegState::Implicit);
   }
 }
 
 int SIInstrInfo::commuteOpcode(unsigned Opcode) const {
   int NewOpc;
 
   // Try to map original to commuted opcode
   NewOpc = AMDGPU::getCommuteRev(Opcode);
   if (NewOpc != -1)
     // Check if the commuted (REV) opcode exists on the target.
     return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
 
   // Try to map commuted to original opcode
   NewOpc = AMDGPU::getCommuteOrig(Opcode);
   if (NewOpc != -1)
     // Check if the original (non-REV) opcode exists on the target.
     return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
 
   return Opcode;
 }
 
 void SIInstrInfo::materializeImmediate(MachineBasicBlock &MBB,
                                        MachineBasicBlock::iterator MI,
                                        const DebugLoc &DL, unsigned DestReg,
                                        int64_t Value) const {
   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
   const TargetRegisterClass *RegClass = MRI.getRegClass(DestReg);
   if (RegClass == &AMDGPU::SReg_32RegClass ||
       RegClass == &AMDGPU::SGPR_32RegClass ||
       RegClass == &AMDGPU::SReg_32_XM0RegClass ||
       RegClass == &AMDGPU::SReg_32_XM0_XEXECRegClass) {
     BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
       .addImm(Value);
     return;
   }
 
   if (RegClass == &AMDGPU::SReg_64RegClass ||
       RegClass == &AMDGPU::SGPR_64RegClass ||
       RegClass == &AMDGPU::SReg_64_XEXECRegClass) {
     BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
       .addImm(Value);
     return;
   }
 
   if (RegClass == &AMDGPU::VGPR_32RegClass) {
     BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
       .addImm(Value);
     return;
   }
   if (RegClass == &AMDGPU::VReg_64RegClass) {
     BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO), DestReg)
       .addImm(Value);
     return;
   }
 
   unsigned EltSize = 4;
   unsigned Opcode = AMDGPU::V_MOV_B32_e32;
   if (RI.isSGPRClass(RegClass)) {
     if (RI.getRegSizeInBits(*RegClass) > 32) {
       Opcode =  AMDGPU::S_MOV_B64;
       EltSize = 8;
     } else {
       Opcode = AMDGPU::S_MOV_B32;
       EltSize = 4;
     }
   }
 
   ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RegClass, EltSize);
   for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) {
     int64_t IdxValue = Idx == 0 ? Value : 0;
 
     MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
       get(Opcode), RI.getSubReg(DestReg, Idx));
     Builder.addImm(IdxValue);
   }
 }
 
 const TargetRegisterClass *
 SIInstrInfo::getPreferredSelectRegClass(unsigned Size) const {
   return &AMDGPU::VGPR_32RegClass;
 }
 
 void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator I,
                                      const DebugLoc &DL, unsigned DstReg,
                                      ArrayRef<MachineOperand> Cond,
                                      unsigned TrueReg,
                                      unsigned FalseReg) const {
   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
   assert(MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
          "Not a VGPR32 reg");
 
   if (Cond.size() == 1) {
     unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
     BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
       .add(Cond[0]);
     BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
       .addReg(FalseReg)
       .addReg(TrueReg)
       .addReg(SReg);
   } else if (Cond.size() == 2) {
     assert(Cond[0].isImm() && "Cond[0] is not an immediate");
     switch (Cond[0].getImm()) {
     case SIInstrInfo::SCC_TRUE: {
       unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
       BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
         .addImm(-1)
         .addImm(0);
       BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
         .addReg(FalseReg)
         .addReg(TrueReg)
         .addReg(SReg);
       break;
     }
     case SIInstrInfo::SCC_FALSE: {
       unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
       BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
         .addImm(0)
         .addImm(-1);
       BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
         .addReg(FalseReg)
         .addReg(TrueReg)
         .addReg(SReg);
       break;
     }
     case SIInstrInfo::VCCNZ: {
       MachineOperand RegOp = Cond[1];
       RegOp.setImplicit(false);
       unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
       BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
         .add(RegOp);
       BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
           .addReg(FalseReg)
           .addReg(TrueReg)
           .addReg(SReg);
       break;
     }
     case SIInstrInfo::VCCZ: {
       MachineOperand RegOp = Cond[1];
       RegOp.setImplicit(false);
       unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
       BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
         .add(RegOp);
       BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
           .addReg(TrueReg)
           .addReg(FalseReg)
           .addReg(SReg);
       break;
     }
     case SIInstrInfo::EXECNZ: {
       unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
       unsigned SReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
       BuildMI(MBB, I, DL, get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
         .addImm(0);
       BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
         .addImm(-1)
         .addImm(0);
       BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
         .addReg(FalseReg)
         .addReg(TrueReg)
         .addReg(SReg);
       break;
     }
     case SIInstrInfo::EXECZ: {
       unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
       unsigned SReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
       BuildMI(MBB, I, DL, get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
         .addImm(0);
       BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
         .addImm(0)
         .addImm(-1);
       BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
         .addReg(FalseReg)
         .addReg(TrueReg)
         .addReg(SReg);
       llvm_unreachable("Unhandled branch predicate EXECZ");
       break;
     }
     default:
       llvm_unreachable("invalid branch predicate");
     }
   } else {
     llvm_unreachable("Can only handle Cond size 1 or 2");
   }
 }
 
 unsigned SIInstrInfo::insertEQ(MachineBasicBlock *MBB,
                                MachineBasicBlock::iterator I,
                                const DebugLoc &DL,
                                unsigned SrcReg, int Value) const {
   MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
   unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
   BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_EQ_I32_e64), Reg)
     .addImm(Value)
     .addReg(SrcReg);
 
   return Reg;
 }
 
 unsigned SIInstrInfo::insertNE(MachineBasicBlock *MBB,
                                MachineBasicBlock::iterator I,
                                const DebugLoc &DL,
                                unsigned SrcReg, int Value) const {
   MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
   unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
   BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_NE_I32_e64), Reg)
     .addImm(Value)
     .addReg(SrcReg);
 
   return Reg;
 }
 
 unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
 
   if (RI.getRegSizeInBits(*DstRC) == 32) {
     return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
   } else if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC)) {
     return AMDGPU::S_MOV_B64;
   } else if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC)) {
     return  AMDGPU::V_MOV_B64_PSEUDO;
   }
   return AMDGPU::COPY;
 }
 
 static unsigned getSGPRSpillSaveOpcode(unsigned Size) {
   switch (Size) {
   case 4:
     return AMDGPU::SI_SPILL_S32_SAVE;
   case 8:
     return AMDGPU::SI_SPILL_S64_SAVE;
   case 16:
     return AMDGPU::SI_SPILL_S128_SAVE;
   case 32:
     return AMDGPU::SI_SPILL_S256_SAVE;
   case 64:
     return AMDGPU::SI_SPILL_S512_SAVE;
   default:
     llvm_unreachable("unknown register size");
   }
 }
 
 static unsigned getVGPRSpillSaveOpcode(unsigned Size) {
   switch (Size) {
   case 4:
     return AMDGPU::SI_SPILL_V32_SAVE;
   case 8:
     return AMDGPU::SI_SPILL_V64_SAVE;
   case 12:
     return AMDGPU::SI_SPILL_V96_SAVE;
   case 16:
     return AMDGPU::SI_SPILL_V128_SAVE;
   case 32:
     return AMDGPU::SI_SPILL_V256_SAVE;
   case 64:
     return AMDGPU::SI_SPILL_V512_SAVE;
   default:
     llvm_unreachable("unknown register size");
   }
 }
 
 void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
                                       MachineBasicBlock::iterator MI,
                                       unsigned SrcReg, bool isKill,
                                       int FrameIndex,
                                       const TargetRegisterClass *RC,
                                       const TargetRegisterInfo *TRI) const {
   MachineFunction *MF = MBB.getParent();
   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
   MachineFrameInfo &FrameInfo = MF->getFrameInfo();
   DebugLoc DL = MBB.findDebugLoc(MI);
 
   assert(SrcReg != MFI->getStackPtrOffsetReg() &&
          SrcReg != MFI->getFrameOffsetReg() &&
          SrcReg != MFI->getScratchWaveOffsetReg());
 
   unsigned Size = FrameInfo.getObjectSize(FrameIndex);
   unsigned Align = FrameInfo.getObjectAlignment(FrameIndex);
   MachinePointerInfo PtrInfo
     = MachinePointerInfo::getFixedStack(*MF, FrameIndex);
   MachineMemOperand *MMO
     = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
                                Size, Align);
   unsigned SpillSize = TRI->getSpillSize(*RC);
 
   if (RI.isSGPRClass(RC)) {
     MFI->setHasSpilledSGPRs();
 
     // We are only allowed to create one new instruction when spilling
     // registers, so we need to use pseudo instruction for spilling SGPRs.
     const MCInstrDesc &OpDesc = get(getSGPRSpillSaveOpcode(SpillSize));
 
     // The SGPR spill/restore instructions only work on number sgprs, so we need
     // to make sure we are using the correct register class.
     if (TargetRegisterInfo::isVirtualRegister(SrcReg) && SpillSize == 4) {
       MachineRegisterInfo &MRI = MF->getRegInfo();
       MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0RegClass);
     }
 
     MachineInstrBuilder Spill = BuildMI(MBB, MI, DL, OpDesc)
       .addReg(SrcReg, getKillRegState(isKill)) // data
       .addFrameIndex(FrameIndex)               // addr
       .addMemOperand(MMO)
       .addReg(MFI->getScratchRSrcReg(), RegState::Implicit)
       .addReg(MFI->getFrameOffsetReg(), RegState::Implicit);
     // Add the scratch resource registers as implicit uses because we may end up
     // needing them, and need to ensure that the reserved registers are
     // correctly handled.
 
     FrameInfo.setStackID(FrameIndex, 1);
     if (ST.hasScalarStores()) {
       // m0 is used for offset to scalar stores if used to spill.
       Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine | RegState::Dead);
     }
 
     return;
   }
 
   if (!ST.isVGPRSpillingEnabled(MF->getFunction())) {
     LLVMContext &Ctx = MF->getFunction().getContext();
     Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to"
                   " spill register");
     BuildMI(MBB, MI, DL, get(AMDGPU::KILL))
       .addReg(SrcReg);
 
     return;
   }
 
   assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
 
   unsigned Opcode = getVGPRSpillSaveOpcode(SpillSize);
   MFI->setHasSpilledVGPRs();
   BuildMI(MBB, MI, DL, get(Opcode))
     .addReg(SrcReg, getKillRegState(isKill)) // data
     .addFrameIndex(FrameIndex)               // addr
     .addReg(MFI->getScratchRSrcReg())        // scratch_rsrc
     .addReg(MFI->getFrameOffsetReg())        // scratch_offset
     .addImm(0)                               // offset
     .addMemOperand(MMO);
 }
 
 static unsigned getSGPRSpillRestoreOpcode(unsigned Size) {
   switch (Size) {
   case 4:
     return AMDGPU::SI_SPILL_S32_RESTORE;
   case 8:
     return AMDGPU::SI_SPILL_S64_RESTORE;
   case 16:
     return AMDGPU::SI_SPILL_S128_RESTORE;
   case 32:
     return AMDGPU::SI_SPILL_S256_RESTORE;
   case 64:
     return AMDGPU::SI_SPILL_S512_RESTORE;
   default:
     llvm_unreachable("unknown register size");
   }
 }
 
 static unsigned getVGPRSpillRestoreOpcode(unsigned Size) {
   switch (Size) {
   case 4:
     return AMDGPU::SI_SPILL_V32_RESTORE;
   case 8:
     return AMDGPU::SI_SPILL_V64_RESTORE;
   case 12:
     return AMDGPU::SI_SPILL_V96_RESTORE;
   case 16:
     return AMDGPU::SI_SPILL_V128_RESTORE;
   case 32:
     return AMDGPU::SI_SPILL_V256_RESTORE;
   case 64:
     return AMDGPU::SI_SPILL_V512_RESTORE;
   default:
     llvm_unreachable("unknown register size");
   }
 }
 
 void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
                                        MachineBasicBlock::iterator MI,
                                        unsigned DestReg, int FrameIndex,
                                        const TargetRegisterClass *RC,
                                        const TargetRegisterInfo *TRI) const {
   MachineFunction *MF = MBB.getParent();
   const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
   MachineFrameInfo &FrameInfo = MF->getFrameInfo();
   DebugLoc DL = MBB.findDebugLoc(MI);
   unsigned Align = FrameInfo.getObjectAlignment(FrameIndex);
   unsigned Size = FrameInfo.getObjectSize(FrameIndex);
   unsigned SpillSize = TRI->getSpillSize(*RC);
 
   MachinePointerInfo PtrInfo
     = MachinePointerInfo::getFixedStack(*MF, FrameIndex);
 
   MachineMemOperand *MMO = MF->getMachineMemOperand(
     PtrInfo, MachineMemOperand::MOLoad, Size, Align);
 
   if (RI.isSGPRClass(RC)) {
     // FIXME: Maybe this should not include a memoperand because it will be
     // lowered to non-memory instructions.
     const MCInstrDesc &OpDesc = get(getSGPRSpillRestoreOpcode(SpillSize));
     if (TargetRegisterInfo::isVirtualRegister(DestReg) && SpillSize == 4) {
       MachineRegisterInfo &MRI = MF->getRegInfo();
       MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0RegClass);
     }
 
     FrameInfo.setStackID(FrameIndex, 1);
     MachineInstrBuilder Spill = BuildMI(MBB, MI, DL, OpDesc, DestReg)
       .addFrameIndex(FrameIndex) // addr
       .addMemOperand(MMO)
       .addReg(MFI->getScratchRSrcReg(), RegState::Implicit)
       .addReg(MFI->getFrameOffsetReg(), RegState::Implicit);
 
     if (ST.hasScalarStores()) {
       // m0 is used for offset to scalar stores if used to spill.
       Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine | RegState::Dead);
     }
 
     return;
   }
 
   if (!ST.isVGPRSpillingEnabled(MF->getFunction())) {
     LLVMContext &Ctx = MF->getFunction().getContext();
     Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to"
                   " restore register");
     BuildMI(MBB, MI, DL, get(AMDGPU::IMPLICIT_DEF), DestReg);
 
     return;
   }
 
   assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
 
   unsigned Opcode = getVGPRSpillRestoreOpcode(SpillSize);
   BuildMI(MBB, MI, DL, get(Opcode), DestReg)
     .addFrameIndex(FrameIndex)        // vaddr
     .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
     .addReg(MFI->getFrameOffsetReg()) // scratch_offset
     .addImm(0)                        // offset
     .addMemOperand(MMO);
 }
 
 /// \param @Offset Offset in bytes of the FrameIndex being spilled
 unsigned SIInstrInfo::calculateLDSSpillAddress(
     MachineBasicBlock &MBB, MachineInstr &MI, RegScavenger *RS, unsigned TmpReg,
     unsigned FrameOffset, unsigned Size) const {
   MachineFunction *MF = MBB.getParent();
   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
   const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
   DebugLoc DL = MBB.findDebugLoc(MI);
   unsigned WorkGroupSize = MFI->getMaxFlatWorkGroupSize();
   unsigned WavefrontSize = ST.getWavefrontSize();
 
   unsigned TIDReg = MFI->getTIDReg();
   if (!MFI->hasCalculatedTID()) {
     MachineBasicBlock &Entry = MBB.getParent()->front();
     MachineBasicBlock::iterator Insert = Entry.front();
     DebugLoc DL = Insert->getDebugLoc();
 
     TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass,
                                    *MF);
     if (TIDReg == AMDGPU::NoRegister)
       return TIDReg;
 
     if (!AMDGPU::isShader(MF->getFunction().getCallingConv()) &&
         WorkGroupSize > WavefrontSize) {
       unsigned TIDIGXReg
         = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_X);
       unsigned TIDIGYReg
         = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_Y);
       unsigned TIDIGZReg
         = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
       unsigned InputPtrReg =
           MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
       for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) {
         if (!Entry.isLiveIn(Reg))
           Entry.addLiveIn(Reg);
       }
 
       RS->enterBasicBlock(Entry);
       // FIXME: Can we scavenge an SReg_64 and access the subregs?
       unsigned STmp0 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
       unsigned STmp1 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
       BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp0)
               .addReg(InputPtrReg)
               .addImm(SI::KernelInputOffsets::NGROUPS_Z);
       BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp1)
               .addReg(InputPtrReg)
               .addImm(SI::KernelInputOffsets::NGROUPS_Y);
 
       // NGROUPS.X * NGROUPS.Y
       BuildMI(Entry, Insert, DL, get(AMDGPU::S_MUL_I32), STmp1)
               .addReg(STmp1)
               .addReg(STmp0);
       // (NGROUPS.X * NGROUPS.Y) * TIDIG.X
       BuildMI(Entry, Insert, DL, get(AMDGPU::V_MUL_U32_U24_e32), TIDReg)
               .addReg(STmp1)
               .addReg(TIDIGXReg);
       // NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)
       BuildMI(Entry, Insert, DL, get(AMDGPU::V_MAD_U32_U24), TIDReg)
               .addReg(STmp0)
               .addReg(TIDIGYReg)
               .addReg(TIDReg);
       // (NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)) + TIDIG.Z
       getAddNoCarry(Entry, Insert, DL, TIDReg)
         .addReg(TIDReg)
         .addReg(TIDIGZReg);
     } else {
       // Get the wave id
       BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_LO_U32_B32_e64),
               TIDReg)
               .addImm(-1)
               .addImm(0);
 
       BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_HI_U32_B32_e64),
               TIDReg)
               .addImm(-1)
               .addReg(TIDReg);
     }
 
     BuildMI(Entry, Insert, DL, get(AMDGPU::V_LSHLREV_B32_e32),
             TIDReg)
             .addImm(2)
             .addReg(TIDReg);
     MFI->setTIDReg(TIDReg);
   }
 
   // Add FrameIndex to LDS offset
   unsigned LDSOffset = MFI->getLDSSize() + (FrameOffset * WorkGroupSize);
   getAddNoCarry(MBB, MI, DL, TmpReg)
     .addImm(LDSOffset)
     .addReg(TIDReg);
 
   return TmpReg;
 }
 
 void SIInstrInfo::insertWaitStates(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator MI,
                                    int Count) const {
   DebugLoc DL = MBB.findDebugLoc(MI);
   while (Count > 0) {
     int Arg;
     if (Count >= 8)
       Arg = 7;
     else
       Arg = Count - 1;
     Count -= 8;
     BuildMI(MBB, MI, DL, get(AMDGPU::S_NOP))
             .addImm(Arg);
   }
 }
 
 void SIInstrInfo::insertNoop(MachineBasicBlock &MBB,
                              MachineBasicBlock::iterator MI) const {
   insertWaitStates(MBB, MI, 1);
 }
 
 void SIInstrInfo::insertReturn(MachineBasicBlock &MBB) const {
   auto MF = MBB.getParent();
   SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
 
   assert(Info->isEntryFunction());
 
   if (MBB.succ_empty()) {
     bool HasNoTerminator = MBB.getFirstTerminator() == MBB.end();
     if (HasNoTerminator)
       BuildMI(MBB, MBB.end(), DebugLoc(),
               get(Info->returnsVoid() ? AMDGPU::S_ENDPGM : AMDGPU::SI_RETURN_TO_EPILOG));
   }
 }
 
 unsigned SIInstrInfo::getNumWaitStates(const MachineInstr &MI) const {
   switch (MI.getOpcode()) {
   default: return 1; // FIXME: Do wait states equal cycles?
 
   case AMDGPU::S_NOP:
     return MI.getOperand(0).getImm() + 1;
   }
 }
 
 bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
   MachineBasicBlock &MBB = *MI.getParent();
   DebugLoc DL = MBB.findDebugLoc(MI);
   switch (MI.getOpcode()) {
   default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);
   case AMDGPU::S_MOV_B64_term:
     // This is only a terminator to get the correct spill code placement during
     // register allocation.
     MI.setDesc(get(AMDGPU::S_MOV_B64));
     break;
 
   case AMDGPU::S_XOR_B64_term:
     // This is only a terminator to get the correct spill code placement during
     // register allocation.
     MI.setDesc(get(AMDGPU::S_XOR_B64));
     break;
 
   case AMDGPU::S_ANDN2_B64_term:
     // This is only a terminator to get the correct spill code placement during
     // register allocation.
     MI.setDesc(get(AMDGPU::S_ANDN2_B64));
     break;
 
   case AMDGPU::V_MOV_B64_PSEUDO: {
     unsigned Dst = MI.getOperand(0).getReg();
     unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
     unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
 
     const MachineOperand &SrcOp = MI.getOperand(1);
     // FIXME: Will this work for 64-bit floating point immediates?
     assert(!SrcOp.isFPImm());
     if (SrcOp.isImm()) {
       APInt Imm(64, SrcOp.getImm());
       BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
         .addImm(Imm.getLoBits(32).getZExtValue())
         .addReg(Dst, RegState::Implicit | RegState::Define);
       BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
         .addImm(Imm.getHiBits(32).getZExtValue())
         .addReg(Dst, RegState::Implicit | RegState::Define);
     } else {
       assert(SrcOp.isReg());
       BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
         .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub0))
         .addReg(Dst, RegState::Implicit | RegState::Define);
       BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
         .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub1))
         .addReg(Dst, RegState::Implicit | RegState::Define);
     }
     MI.eraseFromParent();
     break;
   }
   case AMDGPU::V_SET_INACTIVE_B32: {
     BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
       .addReg(AMDGPU::EXEC);
     BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), MI.getOperand(0).getReg())
       .add(MI.getOperand(2));
     BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
       .addReg(AMDGPU::EXEC);
     MI.eraseFromParent();
     break;
   }
   case AMDGPU::V_SET_INACTIVE_B64: {
     BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
       .addReg(AMDGPU::EXEC);
     MachineInstr *Copy = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO),
                                  MI.getOperand(0).getReg())
       .add(MI.getOperand(2));
     expandPostRAPseudo(*Copy);
     BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
       .addReg(AMDGPU::EXEC);
     MI.eraseFromParent();
     break;
   }
   case AMDGPU::V_MOVRELD_B32_V1:
   case AMDGPU::V_MOVRELD_B32_V2:
   case AMDGPU::V_MOVRELD_B32_V4:
   case AMDGPU::V_MOVRELD_B32_V8:
   case AMDGPU::V_MOVRELD_B32_V16: {
     const MCInstrDesc &MovRelDesc = get(AMDGPU::V_MOVRELD_B32_e32);
     unsigned VecReg = MI.getOperand(0).getReg();
     bool IsUndef = MI.getOperand(1).isUndef();
     unsigned SubReg = AMDGPU::sub0 + MI.getOperand(3).getImm();
     assert(VecReg == MI.getOperand(1).getReg());
 
     MachineInstr *MovRel =
         BuildMI(MBB, MI, DL, MovRelDesc)
             .addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef)
             .add(MI.getOperand(2))
             .addReg(VecReg, RegState::ImplicitDefine)
             .addReg(VecReg,
                     RegState::Implicit | (IsUndef ? RegState::Undef : 0));
 
     const int ImpDefIdx =
         MovRelDesc.getNumOperands() + MovRelDesc.getNumImplicitUses();
     const int ImpUseIdx = ImpDefIdx + 1;
     MovRel->tieOperands(ImpDefIdx, ImpUseIdx);
 
     MI.eraseFromParent();
     break;
   }
   case AMDGPU::SI_PC_ADD_REL_OFFSET: {
     MachineFunction &MF = *MBB.getParent();
     unsigned Reg = MI.getOperand(0).getReg();
     unsigned RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
     unsigned RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
 
     // Create a bundle so these instructions won't be re-ordered by the
     // post-RA scheduler.
     MIBundleBuilder Bundler(MBB, MI);
     Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_GETPC_B64), Reg));
 
     // Add 32-bit offset from this instruction to the start of the
     // constant data.
     Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADD_U32), RegLo)
                        .addReg(RegLo)
                        .add(MI.getOperand(1)));
 
     MachineInstrBuilder MIB = BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi)
                                   .addReg(RegHi);
     if (MI.getOperand(2).getTargetFlags() == SIInstrInfo::MO_NONE)
       MIB.addImm(0);
     else
       MIB.add(MI.getOperand(2));
 
     Bundler.append(MIB);
     finalizeBundle(MBB, Bundler.begin());
 
     MI.eraseFromParent();
     break;
   }
   case AMDGPU::EXIT_WWM: {
     // This only gets its own opcode so that SIFixWWMLiveness can tell when WWM
     // is exited.
     MI.setDesc(get(AMDGPU::S_MOV_B64));
     break;
   }
   }
   return true;
 }
 
 bool SIInstrInfo::swapSourceModifiers(MachineInstr &MI,
                                       MachineOperand &Src0,
                                       unsigned Src0OpName,
                                       MachineOperand &Src1,
                                       unsigned Src1OpName) const {
   MachineOperand *Src0Mods = getNamedOperand(MI, Src0OpName);
   if (!Src0Mods)
     return false;
 
   MachineOperand *Src1Mods = getNamedOperand(MI, Src1OpName);
   assert(Src1Mods &&
          "All commutable instructions have both src0 and src1 modifiers");
 
   int Src0ModsVal = Src0Mods->getImm();
   int Src1ModsVal = Src1Mods->getImm();
 
   Src1Mods->setImm(Src0ModsVal);
   Src0Mods->setImm(Src1ModsVal);
   return true;
 }
 
 static MachineInstr *swapRegAndNonRegOperand(MachineInstr &MI,
                                              MachineOperand &RegOp,
                                              MachineOperand &NonRegOp) {
   unsigned Reg = RegOp.getReg();
   unsigned SubReg = RegOp.getSubReg();
   bool IsKill = RegOp.isKill();
   bool IsDead = RegOp.isDead();
   bool IsUndef = RegOp.isUndef();
   bool IsDebug = RegOp.isDebug();
 
   if (NonRegOp.isImm())
     RegOp.ChangeToImmediate(NonRegOp.getImm());
   else if (NonRegOp.isFI())
     RegOp.ChangeToFrameIndex(NonRegOp.getIndex());
   else
     return nullptr;
 
   NonRegOp.ChangeToRegister(Reg, false, false, IsKill, IsDead, IsUndef, IsDebug);
   NonRegOp.setSubReg(SubReg);
 
   return &MI;
 }
 
 MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
                                                   unsigned Src0Idx,
                                                   unsigned Src1Idx) const {
   assert(!NewMI && "this should never be used");
 
   unsigned Opc = MI.getOpcode();
   int CommutedOpcode = commuteOpcode(Opc);
   if (CommutedOpcode == -1)
     return nullptr;
 
   assert(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0) ==
            static_cast<int>(Src0Idx) &&
          AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1) ==
            static_cast<int>(Src1Idx) &&
          "inconsistency with findCommutedOpIndices");
 
   MachineOperand &Src0 = MI.getOperand(Src0Idx);
   MachineOperand &Src1 = MI.getOperand(Src1Idx);
 
   MachineInstr *CommutedMI = nullptr;
   if (Src0.isReg() && Src1.isReg()) {
     if (isOperandLegal(MI, Src1Idx, &Src0)) {
       // Be sure to copy the source modifiers to the right place.
       CommutedMI
         = TargetInstrInfo::commuteInstructionImpl(MI, NewMI, Src0Idx, Src1Idx);
     }
 
   } else if (Src0.isReg() && !Src1.isReg()) {
     // src0 should always be able to support any operand type, so no need to
     // check operand legality.
     CommutedMI = swapRegAndNonRegOperand(MI, Src0, Src1);
   } else if (!Src0.isReg() && Src1.isReg()) {
     if (isOperandLegal(MI, Src1Idx, &Src0))
       CommutedMI = swapRegAndNonRegOperand(MI, Src1, Src0);
   } else {
     // FIXME: Found two non registers to commute. This does happen.
     return nullptr;
   }
 
   if (CommutedMI) {
     swapSourceModifiers(MI, Src0, AMDGPU::OpName::src0_modifiers,
                         Src1, AMDGPU::OpName::src1_modifiers);
 
     CommutedMI->setDesc(get(CommutedOpcode));
   }
 
   return CommutedMI;
 }
 
 // This needs to be implemented because the source modifiers may be inserted
 // between the true commutable operands, and the base
 // TargetInstrInfo::commuteInstruction uses it.
 bool SIInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx0,
                                         unsigned &SrcOpIdx1) const {
   if (!MI.isCommutable())
     return false;
 
   unsigned Opc = MI.getOpcode();
   int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
   if (Src0Idx == -1)
     return false;
 
   int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
   if (Src1Idx == -1)
     return false;
 
   return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
 }
 
 bool SIInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
                                         int64_t BrOffset) const {
   // BranchRelaxation should never have to check s_setpc_b64 because its dest
   // block is unanalyzable.
   assert(BranchOp != AMDGPU::S_SETPC_B64);
 
   // Convert to dwords.
   BrOffset /= 4;
 
   // The branch instructions do PC += signext(SIMM16 * 4) + 4, so the offset is
   // from the next instruction.
   BrOffset -= 1;
 
   return isIntN(BranchOffsetBits, BrOffset);
 }
 
 MachineBasicBlock *SIInstrInfo::getBranchDestBlock(
   const MachineInstr &MI) const {
   if (MI.getOpcode() == AMDGPU::S_SETPC_B64) {
     // This would be a difficult analysis to perform, but can always be legal so
     // there's no need to analyze it.
     return nullptr;
   }
 
   return MI.getOperand(0).getMBB();
 }
 
 unsigned SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
                                            MachineBasicBlock &DestBB,
                                            const DebugLoc &DL,
                                            int64_t BrOffset,
                                            RegScavenger *RS) const {
   assert(RS && "RegScavenger required for long branching");
   assert(MBB.empty() &&
          "new block should be inserted for expanding unconditional branch");
   assert(MBB.pred_size() == 1);
 
   MachineFunction *MF = MBB.getParent();
   MachineRegisterInfo &MRI = MF->getRegInfo();
 
   // FIXME: Virtual register workaround for RegScavenger not working with empty
   // blocks.
   unsigned PCReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
 
   auto I = MBB.end();
 
   // We need to compute the offset relative to the instruction immediately after
   // s_getpc_b64. Insert pc arithmetic code before last terminator.
   MachineInstr *GetPC = BuildMI(MBB, I, DL, get(AMDGPU::S_GETPC_B64), PCReg);
 
   // TODO: Handle > 32-bit block address.
   if (BrOffset >= 0) {
     BuildMI(MBB, I, DL, get(AMDGPU::S_ADD_U32))
       .addReg(PCReg, RegState::Define, AMDGPU::sub0)
       .addReg(PCReg, 0, AMDGPU::sub0)
       .addMBB(&DestBB, AMDGPU::TF_LONG_BRANCH_FORWARD);
     BuildMI(MBB, I, DL, get(AMDGPU::S_ADDC_U32))
       .addReg(PCReg, RegState::Define, AMDGPU::sub1)
       .addReg(PCReg, 0, AMDGPU::sub1)
       .addImm(0);
   } else {
     // Backwards branch.
     BuildMI(MBB, I, DL, get(AMDGPU::S_SUB_U32))
       .addReg(PCReg, RegState::Define, AMDGPU::sub0)
       .addReg(PCReg, 0, AMDGPU::sub0)
       .addMBB(&DestBB, AMDGPU::TF_LONG_BRANCH_BACKWARD);
     BuildMI(MBB, I, DL, get(AMDGPU::S_SUBB_U32))
       .addReg(PCReg, RegState::Define, AMDGPU::sub1)
       .addReg(PCReg, 0, AMDGPU::sub1)
       .addImm(0);
   }
 
   // Insert the indirect branch after the other terminator.
   BuildMI(&MBB, DL, get(AMDGPU::S_SETPC_B64))
     .addReg(PCReg);
 
   // FIXME: If spilling is necessary, this will fail because this scavenger has
   // no emergency stack slots. It is non-trivial to spill in this situation,
   // because the restore code needs to be specially placed after the
   // jump. BranchRelaxation then needs to be made aware of the newly inserted
   // block.
   //
   // If a spill is needed for the pc register pair, we need to insert a spill
   // restore block right before the destination block, and insert a short branch
   // into the old destination block's fallthrough predecessor.
   // e.g.:
   //
   // s_cbranch_scc0 skip_long_branch:
   //
   // long_branch_bb:
   //   spill s[8:9]
   //   s_getpc_b64 s[8:9]
   //   s_add_u32 s8, s8, restore_bb
   //   s_addc_u32 s9, s9, 0
   //   s_setpc_b64 s[8:9]
   //
   // skip_long_branch:
   //   foo;
   //
   // .....
   //
   // dest_bb_fallthrough_predecessor:
   // bar;
   // s_branch dest_bb
   //
   // restore_bb:
   //  restore s[8:9]
   //  fallthrough dest_bb
   ///
   // dest_bb:
   //   buzz;
 
   RS->enterBasicBlockEnd(MBB);
   unsigned Scav = RS->scavengeRegister(&AMDGPU::SReg_64RegClass,
                                        MachineBasicBlock::iterator(GetPC), 0);
   MRI.replaceRegWith(PCReg, Scav);
   MRI.clearVirtRegs();
   RS->setRegUsed(Scav);
 
   return 4 + 8 + 4 + 4;
 }
 
 unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate Cond) {
   switch (Cond) {
   case SIInstrInfo::SCC_TRUE:
     return AMDGPU::S_CBRANCH_SCC1;
   case SIInstrInfo::SCC_FALSE:
     return AMDGPU::S_CBRANCH_SCC0;
   case SIInstrInfo::VCCNZ:
     return AMDGPU::S_CBRANCH_VCCNZ;
   case SIInstrInfo::VCCZ:
     return AMDGPU::S_CBRANCH_VCCZ;
   case SIInstrInfo::EXECNZ:
     return AMDGPU::S_CBRANCH_EXECNZ;
   case SIInstrInfo::EXECZ:
     return AMDGPU::S_CBRANCH_EXECZ;
   default:
     llvm_unreachable("invalid branch predicate");
   }
 }
 
 SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(unsigned Opcode) {
   switch (Opcode) {
   case AMDGPU::S_CBRANCH_SCC0:
     return SCC_FALSE;
   case AMDGPU::S_CBRANCH_SCC1:
     return SCC_TRUE;
   case AMDGPU::S_CBRANCH_VCCNZ:
     return VCCNZ;
   case AMDGPU::S_CBRANCH_VCCZ:
     return VCCZ;
   case AMDGPU::S_CBRANCH_EXECNZ:
     return EXECNZ;
   case AMDGPU::S_CBRANCH_EXECZ:
     return EXECZ;
   default:
     return INVALID_BR;
   }
 }
 
 bool SIInstrInfo::analyzeBranchImpl(MachineBasicBlock &MBB,
                                     MachineBasicBlock::iterator I,
                                     MachineBasicBlock *&TBB,
                                     MachineBasicBlock *&FBB,
                                     SmallVectorImpl<MachineOperand> &Cond,
                                     bool AllowModify) const {
   if (I->getOpcode() == AMDGPU::S_BRANCH) {
     // Unconditional Branch
     TBB = I->getOperand(0).getMBB();
     return false;
   }
 
   MachineBasicBlock *CondBB = nullptr;
 
   if (I->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
     CondBB = I->getOperand(1).getMBB();
     Cond.push_back(I->getOperand(0));
   } else {
     BranchPredicate Pred = getBranchPredicate(I->getOpcode());
     if (Pred == INVALID_BR)
       return true;
 
     CondBB = I->getOperand(0).getMBB();
     Cond.push_back(MachineOperand::CreateImm(Pred));
     Cond.push_back(I->getOperand(1)); // Save the branch register.
   }
   ++I;
 
   if (I == MBB.end()) {
     // Conditional branch followed by fall-through.
     TBB = CondBB;
     return false;
   }
 
   if (I->getOpcode() == AMDGPU::S_BRANCH) {
     TBB = CondBB;
     FBB = I->getOperand(0).getMBB();
     return false;
   }
 
   return true;
 }
 
 bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
                                 MachineBasicBlock *&FBB,
                                 SmallVectorImpl<MachineOperand> &Cond,
                                 bool AllowModify) const {
   MachineBasicBlock::iterator I = MBB.getFirstTerminator();
   if (I == MBB.end())
     return false;
 
   if (I->getOpcode() != AMDGPU::SI_MASK_BRANCH)
     return analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify);
 
   ++I;
 
   // TODO: Should be able to treat as fallthrough?
   if (I == MBB.end())
     return true;
 
   if (analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify))
     return true;
 
   MachineBasicBlock *MaskBrDest = I->getOperand(0).getMBB();
 
   // Specifically handle the case where the conditional branch is to the same
   // destination as the mask branch. e.g.
   //
   // si_mask_branch BB8
   // s_cbranch_execz BB8
   // s_cbranch BB9
   //
   // This is required to understand divergent loops which may need the branches
   // to be relaxed.
   if (TBB != MaskBrDest || Cond.empty())
     return true;
 
   auto Pred = Cond[0].getImm();
   return (Pred != EXECZ && Pred != EXECNZ);
 }
 
 unsigned SIInstrInfo::removeBranch(MachineBasicBlock &MBB,
                                    int *BytesRemoved) const {
   MachineBasicBlock::iterator I = MBB.getFirstTerminator();
 
   unsigned Count = 0;
   unsigned RemovedSize = 0;
   while (I != MBB.end()) {
     MachineBasicBlock::iterator Next = std::next(I);
     if (I->getOpcode() == AMDGPU::SI_MASK_BRANCH) {
       I = Next;
       continue;
     }
 
     RemovedSize += getInstSizeInBytes(*I);
     I->eraseFromParent();
     ++Count;
     I = Next;
   }
 
   if (BytesRemoved)
     *BytesRemoved = RemovedSize;
 
   return Count;
 }
 
 // Copy the flags onto the implicit condition register operand.
 static void preserveCondRegFlags(MachineOperand &CondReg,
                                  const MachineOperand &OrigCond) {
   CondReg.setIsUndef(OrigCond.isUndef());
   CondReg.setIsKill(OrigCond.isKill());
 }
 
 unsigned SIInstrInfo::insertBranch(MachineBasicBlock &MBB,
                                    MachineBasicBlock *TBB,
                                    MachineBasicBlock *FBB,
                                    ArrayRef<MachineOperand> Cond,
                                    const DebugLoc &DL,
                                    int *BytesAdded) const {
   if (!FBB && Cond.empty()) {
     BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH))
       .addMBB(TBB);
     if (BytesAdded)
       *BytesAdded = 4;
     return 1;
   }
 
   if(Cond.size() == 1 && Cond[0].isReg()) {
      BuildMI(&MBB, DL, get(AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO))
        .add(Cond[0])
        .addMBB(TBB);
      return 1;
   }
 
   assert(TBB && Cond[0].isImm());
 
   unsigned Opcode
     = getBranchOpcode(static_cast<BranchPredicate>(Cond[0].getImm()));
 
   if (!FBB) {
     Cond[1].isUndef();
     MachineInstr *CondBr =
       BuildMI(&MBB, DL, get(Opcode))
       .addMBB(TBB);
 
     // Copy the flags onto the implicit condition register operand.
     preserveCondRegFlags(CondBr->getOperand(1), Cond[1]);
 
     if (BytesAdded)
       *BytesAdded = 4;
     return 1;
   }
 
   assert(TBB && FBB);
 
   MachineInstr *CondBr =
     BuildMI(&MBB, DL, get(Opcode))
     .addMBB(TBB);
   BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH))
     .addMBB(FBB);
 
   MachineOperand &CondReg = CondBr->getOperand(1);
   CondReg.setIsUndef(Cond[1].isUndef());
   CondReg.setIsKill(Cond[1].isKill());
 
   if (BytesAdded)
       *BytesAdded = 8;
 
   return 2;
 }
 
 bool SIInstrInfo::reverseBranchCondition(
   SmallVectorImpl<MachineOperand> &Cond) const {
   if (Cond.size() != 2) {
     return true;
   }
 
   if (Cond[0].isImm()) {
     Cond[0].setImm(-Cond[0].getImm());
     return false;
   }
 
   return true;
 }
 
 bool SIInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
                                   ArrayRef<MachineOperand> Cond,
                                   unsigned TrueReg, unsigned FalseReg,
                                   int &CondCycles,
                                   int &TrueCycles, int &FalseCycles) const {
   switch (Cond[0].getImm()) {
   case VCCNZ:
   case VCCZ: {
     const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
     const TargetRegisterClass *RC = MRI.getRegClass(TrueReg);
     assert(MRI.getRegClass(FalseReg) == RC);
 
     int NumInsts = AMDGPU::getRegBitWidth(RC->getID()) / 32;
     CondCycles = TrueCycles = FalseCycles = NumInsts; // ???
 
     // Limit to equal cost for branch vs. N v_cndmask_b32s.
     return !RI.isSGPRClass(RC) && NumInsts <= 6;
   }
   case SCC_TRUE:
   case SCC_FALSE: {
     // FIXME: We could insert for VGPRs if we could replace the original compare
     // with a vector one.
     const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
     const TargetRegisterClass *RC = MRI.getRegClass(TrueReg);
     assert(MRI.getRegClass(FalseReg) == RC);
 
     int NumInsts = AMDGPU::getRegBitWidth(RC->getID()) / 32;
 
     // Multiples of 8 can do s_cselect_b64
     if (NumInsts % 2 == 0)
       NumInsts /= 2;
 
     CondCycles = TrueCycles = FalseCycles = NumInsts; // ???
     return RI.isSGPRClass(RC);
   }
   default:
     return false;
   }
 }
 
 void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
                                MachineBasicBlock::iterator I, const DebugLoc &DL,
                                unsigned DstReg, ArrayRef<MachineOperand> Cond,
                                unsigned TrueReg, unsigned FalseReg) const {
   BranchPredicate Pred = static_cast<BranchPredicate>(Cond[0].getImm());
   if (Pred == VCCZ || Pred == SCC_FALSE) {
     Pred = static_cast<BranchPredicate>(-Pred);
     std::swap(TrueReg, FalseReg);
   }
 
   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
   const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg);
   unsigned DstSize = RI.getRegSizeInBits(*DstRC);
 
   if (DstSize == 32) {
     unsigned SelOp = Pred == SCC_TRUE ?
       AMDGPU::S_CSELECT_B32 : AMDGPU::V_CNDMASK_B32_e32;
 
     // Instruction's operands are backwards from what is expected.
     MachineInstr *Select =
       BuildMI(MBB, I, DL, get(SelOp), DstReg)
       .addReg(FalseReg)
       .addReg(TrueReg);
 
     preserveCondRegFlags(Select->getOperand(3), Cond[1]);
     return;
   }
 
   if (DstSize == 64 && Pred == SCC_TRUE) {
     MachineInstr *Select =
       BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), DstReg)
       .addReg(FalseReg)
       .addReg(TrueReg);
 
     preserveCondRegFlags(Select->getOperand(3), Cond[1]);
     return;
   }
 
   static const int16_t Sub0_15[] = {
     AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
     AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
     AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
     AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
   };
 
   static const int16_t Sub0_15_64[] = {
     AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
     AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
     AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
     AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
   };
 
   unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
   const TargetRegisterClass *EltRC = &AMDGPU::VGPR_32RegClass;
   const int16_t *SubIndices = Sub0_15;
   int NElts = DstSize / 32;
 
   // 64-bit select is only avaialble for SALU.
   if (Pred == SCC_TRUE) {
     SelOp = AMDGPU::S_CSELECT_B64;
     EltRC = &AMDGPU::SGPR_64RegClass;
     SubIndices = Sub0_15_64;
 
     assert(NElts % 2 == 0);
     NElts /= 2;
   }
 
   MachineInstrBuilder MIB = BuildMI(
     MBB, I, DL, get(AMDGPU::REG_SEQUENCE), DstReg);
 
   I = MIB->getIterator();
 
   SmallVector<unsigned, 8> Regs;
   for (int Idx = 0; Idx != NElts; ++Idx) {
     unsigned DstElt = MRI.createVirtualRegister(EltRC);
     Regs.push_back(DstElt);
 
     unsigned SubIdx = SubIndices[Idx];
 
     MachineInstr *Select =
       BuildMI(MBB, I, DL, get(SelOp), DstElt)
       .addReg(FalseReg, 0, SubIdx)
       .addReg(TrueReg, 0, SubIdx);
     preserveCondRegFlags(Select->getOperand(3), Cond[1]);
 
     MIB.addReg(DstElt)
        .addImm(SubIdx);
   }
 }
 
 bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) const {
   switch (MI.getOpcode()) {
   case AMDGPU::V_MOV_B32_e32:
   case AMDGPU::V_MOV_B32_e64:
   case AMDGPU::V_MOV_B64_PSEUDO: {
     // If there are additional implicit register operands, this may be used for
     // register indexing so the source register operand isn't simply copied.
     unsigned NumOps = MI.getDesc().getNumOperands() +
       MI.getDesc().getNumImplicitUses();
 
     return MI.getNumOperands() == NumOps;
   }
   case AMDGPU::S_MOV_B32:
   case AMDGPU::S_MOV_B64:
   case AMDGPU::COPY:
     return true;
   default:
     return false;
   }
 }
 
 unsigned SIInstrInfo::getAddressSpaceForPseudoSourceKind(
     PseudoSourceValue::PSVKind Kind) const {
   switch(Kind) {
   case PseudoSourceValue::Stack:
   case PseudoSourceValue::FixedStack:
     return AMDGPUASI.PRIVATE_ADDRESS;
   case PseudoSourceValue::ConstantPool:
   case PseudoSourceValue::GOT:
   case PseudoSourceValue::JumpTable:
   case PseudoSourceValue::GlobalValueCallEntry:
   case PseudoSourceValue::ExternalSymbolCallEntry:
   case PseudoSourceValue::TargetCustom:
     return AMDGPUASI.CONSTANT_ADDRESS;
   }
   return AMDGPUASI.FLAT_ADDRESS;
 }
 
 static void removeModOperands(MachineInstr &MI) {
   unsigned Opc = MI.getOpcode();
   int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc,
                                               AMDGPU::OpName::src0_modifiers);
   int Src1ModIdx = AMDGPU::getNamedOperandIdx(Opc,
                                               AMDGPU::OpName::src1_modifiers);
   int Src2ModIdx = AMDGPU::getNamedOperandIdx(Opc,
                                               AMDGPU::OpName::src2_modifiers);
 
   MI.RemoveOperand(Src2ModIdx);
   MI.RemoveOperand(Src1ModIdx);
   MI.RemoveOperand(Src0ModIdx);
 }
 
 bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
                                 unsigned Reg, MachineRegisterInfo *MRI) const {
   if (!MRI->hasOneNonDBGUse(Reg))
     return false;
 
   switch (DefMI.getOpcode()) {
   default:
     return false;
   case AMDGPU::S_MOV_B64:
     // TODO: We could fold 64-bit immediates, but this get compilicated
     // when there are sub-registers.
     return false;
 
   case AMDGPU::V_MOV_B32_e32:
   case AMDGPU::S_MOV_B32:
     break;
   }
 
   const MachineOperand *ImmOp = getNamedOperand(DefMI, AMDGPU::OpName::src0);
   assert(ImmOp);
   // FIXME: We could handle FrameIndex values here.
   if (!ImmOp->isImm())
     return false;
 
   unsigned Opc = UseMI.getOpcode();
   if (Opc == AMDGPU::COPY) {
     bool isVGPRCopy = RI.isVGPR(*MRI, UseMI.getOperand(0).getReg());
     unsigned NewOpc = isVGPRCopy ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
     UseMI.setDesc(get(NewOpc));
     UseMI.getOperand(1).ChangeToImmediate(ImmOp->getImm());
     UseMI.addImplicitDefUseOperands(*UseMI.getParent()->getParent());
     return true;
   }
 
   if (Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64 ||
       Opc == AMDGPU::V_MAD_F16 || Opc == AMDGPU::V_MAC_F16_e64) {
     // Don't fold if we are using source or output modifiers. The new VOP2
     // instructions don't have them.
     if (hasAnyModifiersSet(UseMI))
       return false;
 
     // If this is a free constant, there's no reason to do this.
     // TODO: We could fold this here instead of letting SIFoldOperands do it
     // later.
     MachineOperand *Src0 = getNamedOperand(UseMI, AMDGPU::OpName::src0);
 
     // Any src operand can be used for the legality check.
     if (isInlineConstant(UseMI, *Src0, *ImmOp))
       return false;
 
     bool IsF32 = Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64;
     MachineOperand *Src1 = getNamedOperand(UseMI, AMDGPU::OpName::src1);
     MachineOperand *Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2);
 
     // Multiplied part is the constant: Use v_madmk_{f16, f32}.
     // We should only expect these to be on src0 due to canonicalizations.
     if (Src0->isReg() && Src0->getReg() == Reg) {
       if (!Src1->isReg() || RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))
         return false;
 
       if (!Src2->isReg() || RI.isSGPRClass(MRI->getRegClass(Src2->getReg())))
         return false;
 
       // We need to swap operands 0 and 1 since madmk constant is at operand 1.
 
       const int64_t Imm = ImmOp->getImm();
 
       // FIXME: This would be a lot easier if we could return a new instruction
       // instead of having to modify in place.
 
       // Remove these first since they are at the end.
       UseMI.RemoveOperand(
           AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod));
       UseMI.RemoveOperand(
           AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp));
 
       unsigned Src1Reg = Src1->getReg();
       unsigned Src1SubReg = Src1->getSubReg();
       Src0->setReg(Src1Reg);
       Src0->setSubReg(Src1SubReg);
       Src0->setIsKill(Src1->isKill());
 
       if (Opc == AMDGPU::V_MAC_F32_e64 ||
           Opc == AMDGPU::V_MAC_F16_e64)
         UseMI.untieRegOperand(
             AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
 
       Src1->ChangeToImmediate(Imm);
 
       removeModOperands(UseMI);
       UseMI.setDesc(get(IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16));
 
       bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
       if (DeleteDef)
         DefMI.eraseFromParent();
 
       return true;
     }
 
     // Added part is the constant: Use v_madak_{f16, f32}.
     if (Src2->isReg() && Src2->getReg() == Reg) {
       // Not allowed to use constant bus for another operand.
       // We can however allow an inline immediate as src0.
       if (!Src0->isImm() &&
           (Src0->isReg() && RI.isSGPRClass(MRI->getRegClass(Src0->getReg()))))
         return false;
 
       if (!Src1->isReg() || RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))
         return false;
 
       const int64_t Imm = ImmOp->getImm();
 
       // FIXME: This would be a lot easier if we could return a new instruction
       // instead of having to modify in place.
 
       // Remove these first since they are at the end.
       UseMI.RemoveOperand(
           AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod));
       UseMI.RemoveOperand(
           AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp));
 
       if (Opc == AMDGPU::V_MAC_F32_e64 ||
           Opc == AMDGPU::V_MAC_F16_e64)
         UseMI.untieRegOperand(
             AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
 
       // ChangingToImmediate adds Src2 back to the instruction.
       Src2->ChangeToImmediate(Imm);
 
       // These come before src2.
       removeModOperands(UseMI);
       UseMI.setDesc(get(IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16));
 
       bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
       if (DeleteDef)
         DefMI.eraseFromParent();
 
       return true;
     }
   }
 
   return false;
 }
 
 static bool offsetsDoNotOverlap(int WidthA, int OffsetA,
                                 int WidthB, int OffsetB) {
   int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
   int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
   int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
   return LowOffset + LowWidth <= HighOffset;
 }
 
 bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr &MIa,
                                                MachineInstr &MIb) const {
   unsigned BaseReg0, BaseReg1;
   int64_t Offset0, Offset1;
 
   if (getMemOpBaseRegImmOfs(MIa, BaseReg0, Offset0, &RI) &&
       getMemOpBaseRegImmOfs(MIb, BaseReg1, Offset1, &RI)) {
 
     if (!MIa.hasOneMemOperand() || !MIb.hasOneMemOperand()) {
       // FIXME: Handle ds_read2 / ds_write2.
       return false;
     }
     unsigned Width0 = (*MIa.memoperands_begin())->getSize();
     unsigned Width1 = (*MIb.memoperands_begin())->getSize();
     if (BaseReg0 == BaseReg1 &&
         offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1)) {
       return true;
     }
   }
 
   return false;
 }
 
 bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr &MIa,
                                                   MachineInstr &MIb,
                                                   AliasAnalysis *AA) const {
   assert((MIa.mayLoad() || MIa.mayStore()) &&
          "MIa must load from or modify a memory location");
   assert((MIb.mayLoad() || MIb.mayStore()) &&
          "MIb must load from or modify a memory location");
 
   if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects())
     return false;
 
   // XXX - Can we relax this between address spaces?
   if (MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
     return false;
 
   if (AA && MIa.hasOneMemOperand() && MIb.hasOneMemOperand()) {
     const MachineMemOperand *MMOa = *MIa.memoperands_begin();
     const MachineMemOperand *MMOb = *MIb.memoperands_begin();
     if (MMOa->getValue() && MMOb->getValue()) {
       MemoryLocation LocA(MMOa->getValue(), MMOa->getSize(), MMOa->getAAInfo());
       MemoryLocation LocB(MMOb->getValue(), MMOb->getSize(), MMOb->getAAInfo());
       if (!AA->alias(LocA, LocB))
         return true;
     }
   }
 
   // TODO: Should we check the address space from the MachineMemOperand? That
   // would allow us to distinguish objects we know don't alias based on the
   // underlying address space, even if it was lowered to a different one,
   // e.g. private accesses lowered to use MUBUF instructions on a scratch
   // buffer.
   if (isDS(MIa)) {
     if (isDS(MIb))
       return checkInstOffsetsDoNotOverlap(MIa, MIb);
 
     return !isFLAT(MIb) || isSegmentSpecificFLAT(MIb);
   }
 
   if (isMUBUF(MIa) || isMTBUF(MIa)) {
     if (isMUBUF(MIb) || isMTBUF(MIb))
       return checkInstOffsetsDoNotOverlap(MIa, MIb);
 
     return !isFLAT(MIb) && !isSMRD(MIb);
   }
 
   if (isSMRD(MIa)) {
     if (isSMRD(MIb))
       return checkInstOffsetsDoNotOverlap(MIa, MIb);
 
     return !isFLAT(MIb) && !isMUBUF(MIa) && !isMTBUF(MIa);
   }
 
   if (isFLAT(MIa)) {
     if (isFLAT(MIb))
       return checkInstOffsetsDoNotOverlap(MIa, MIb);
 
     return false;
   }
 
   return false;
 }
 
 static int64_t getFoldableImm(const MachineOperand* MO) {
   if (!MO->isReg())
     return false;
   const MachineFunction *MF = MO->getParent()->getParent()->getParent();
   const MachineRegisterInfo &MRI = MF->getRegInfo();
   auto Def = MRI.getUniqueVRegDef(MO->getReg());
   if (Def && Def->getOpcode() == AMDGPU::V_MOV_B32_e32 &&
       Def->getOperand(1).isImm())
     return Def->getOperand(1).getImm();
   return AMDGPU::NoRegister;
 }
 
 MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
                                                  MachineInstr &MI,
                                                  LiveVariables *LV) const {
   bool IsF16 = false;
 
   switch (MI.getOpcode()) {
   default:
     return nullptr;
   case AMDGPU::V_MAC_F16_e64:
     IsF16 = true;
     LLVM_FALLTHROUGH;
   case AMDGPU::V_MAC_F32_e64:
     break;
   case AMDGPU::V_MAC_F16_e32:
     IsF16 = true;
     LLVM_FALLTHROUGH;
   case AMDGPU::V_MAC_F32_e32: {
     int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
                                              AMDGPU::OpName::src0);
     const MachineOperand *Src0 = &MI.getOperand(Src0Idx);
     if (!Src0->isReg() && !Src0->isImm())
       return nullptr;
 
     if (Src0->isImm() && !isInlineConstant(MI, Src0Idx, *Src0))
       return nullptr;
 
     break;
   }
   }
 
   const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst);
   const MachineOperand *Src0 = getNamedOperand(MI, AMDGPU::OpName::src0);
   const MachineOperand *Src0Mods =
     getNamedOperand(MI, AMDGPU::OpName::src0_modifiers);
   const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1);
   const MachineOperand *Src1Mods =
     getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
   const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
   const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
   const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod);
 
   if (!Src0Mods && !Src1Mods && !Clamp && !Omod &&
       // If we have an SGPR input, we will violate the constant bus restriction.
       (!Src0->isReg() || !RI.isSGPRReg(MBB->getParent()->getRegInfo(), Src0->getReg()))) {
     if (auto Imm = getFoldableImm(Src2)) {
       return BuildMI(*MBB, MI, MI.getDebugLoc(),
                      get(IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32))
                .add(*Dst)
                .add(*Src0)
                .add(*Src1)
                .addImm(Imm);
     }
     if (auto Imm = getFoldableImm(Src1)) {
       return BuildMI(*MBB, MI, MI.getDebugLoc(),
                      get(IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32))
                .add(*Dst)
                .add(*Src0)
                .addImm(Imm)
                .add(*Src2);
     }
     if (auto Imm = getFoldableImm(Src0)) {
       if (isOperandLegal(MI, AMDGPU::getNamedOperandIdx(AMDGPU::V_MADMK_F32,
                            AMDGPU::OpName::src0), Src1))
         return BuildMI(*MBB, MI, MI.getDebugLoc(),
                        get(IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32))
                  .add(*Dst)
                  .add(*Src1)
                  .addImm(Imm)
                  .add(*Src2);
     }
   }
 
   return BuildMI(*MBB, MI, MI.getDebugLoc(),
                  get(IsF16 ? AMDGPU::V_MAD_F16 : AMDGPU::V_MAD_F32))
       .add(*Dst)
       .addImm(Src0Mods ? Src0Mods->getImm() : 0)
       .add(*Src0)
       .addImm(Src1Mods ? Src1Mods->getImm() : 0)
       .add(*Src1)
       .addImm(0) // Src mods
       .add(*Src2)
       .addImm(Clamp ? Clamp->getImm() : 0)
       .addImm(Omod ? Omod->getImm() : 0);
 }
 
 // It's not generally safe to move VALU instructions across these since it will
 // start using the register as a base index rather than directly.
 // XXX - Why isn't hasSideEffects sufficient for these?
 static bool changesVGPRIndexingMode(const MachineInstr &MI) {
   switch (MI.getOpcode()) {
   case AMDGPU::S_SET_GPR_IDX_ON:
   case AMDGPU::S_SET_GPR_IDX_MODE:
   case AMDGPU::S_SET_GPR_IDX_OFF:
     return true;
   default:
     return false;
   }
 }
 
 bool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
                                        const MachineBasicBlock *MBB,
                                        const MachineFunction &MF) const {
   // XXX - Do we want the SP check in the base implementation?
 
   // Target-independent instructions do not have an implicit-use of EXEC, even
   // when they operate on VGPRs. Treating EXEC modifications as scheduling
   // boundaries prevents incorrect movements of such instructions.
   return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF) ||
          MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
          MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
          MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
          changesVGPRIndexingMode(MI);
 }
 
 bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
   switch (Imm.getBitWidth()) {
   case 32:
     return AMDGPU::isInlinableLiteral32(Imm.getSExtValue(),
                                         ST.hasInv2PiInlineImm());
   case 64:
     return AMDGPU::isInlinableLiteral64(Imm.getSExtValue(),
                                         ST.hasInv2PiInlineImm());
   case 16:
     return ST.has16BitInsts() &&
            AMDGPU::isInlinableLiteral16(Imm.getSExtValue(),
                                         ST.hasInv2PiInlineImm());
   default:
     llvm_unreachable("invalid bitwidth");
   }
 }
 
 bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
                                    uint8_t OperandType) const {
   if (!MO.isImm() ||
       OperandType < AMDGPU::OPERAND_SRC_FIRST ||
       OperandType > AMDGPU::OPERAND_SRC_LAST)
     return false;
 
   // MachineOperand provides no way to tell the true operand size, since it only
   // records a 64-bit value. We need to know the size to determine if a 32-bit
   // floating point immediate bit pattern is legal for an integer immediate. It
   // would be for any 32-bit integer operand, but would not be for a 64-bit one.
 
   int64_t Imm = MO.getImm();
   switch (OperandType) {
   case AMDGPU::OPERAND_REG_IMM_INT32:
   case AMDGPU::OPERAND_REG_IMM_FP32:
   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
   case AMDGPU::OPERAND_REG_INLINE_C_FP32: {
     int32_t Trunc = static_cast<int32_t>(Imm);
     return Trunc == Imm &&
            AMDGPU::isInlinableLiteral32(Trunc, ST.hasInv2PiInlineImm());
   }
   case AMDGPU::OPERAND_REG_IMM_INT64:
   case AMDGPU::OPERAND_REG_IMM_FP64:
   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
     return AMDGPU::isInlinableLiteral64(MO.getImm(),
                                         ST.hasInv2PiInlineImm());
   case AMDGPU::OPERAND_REG_IMM_INT16:
   case AMDGPU::OPERAND_REG_IMM_FP16:
   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
   case AMDGPU::OPERAND_REG_INLINE_C_FP16: {
     if (isInt<16>(Imm) || isUInt<16>(Imm)) {
       // A few special case instructions have 16-bit operands on subtargets
       // where 16-bit instructions are not legal.
       // TODO: Do the 32-bit immediates work? We shouldn't really need to handle
       // constants in these cases
       int16_t Trunc = static_cast<int16_t>(Imm);
       return ST.has16BitInsts() &&
              AMDGPU::isInlinableLiteral16(Trunc, ST.hasInv2PiInlineImm());
     }
 
     return false;
   }
   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
     uint32_t Trunc = static_cast<uint32_t>(Imm);
     return  AMDGPU::isInlinableLiteralV216(Trunc, ST.hasInv2PiInlineImm());
   }
   default:
     llvm_unreachable("invalid bitwidth");
   }
 }
 
 bool SIInstrInfo::isLiteralConstantLike(const MachineOperand &MO,
                                         const MCOperandInfo &OpInfo) const {
   switch (MO.getType()) {
   case MachineOperand::MO_Register:
     return false;
   case MachineOperand::MO_Immediate:
     return !isInlineConstant(MO, OpInfo);
   case MachineOperand::MO_FrameIndex:
   case MachineOperand::MO_MachineBasicBlock:
   case MachineOperand::MO_ExternalSymbol:
   case MachineOperand::MO_GlobalAddress:
   case MachineOperand::MO_MCSymbol:
     return true;
   default:
     llvm_unreachable("unexpected operand type");
   }
 }
 
 static bool compareMachineOp(const MachineOperand &Op0,
                              const MachineOperand &Op1) {
   if (Op0.getType() != Op1.getType())
     return false;
 
   switch (Op0.getType()) {
   case MachineOperand::MO_Register:
     return Op0.getReg() == Op1.getReg();
   case MachineOperand::MO_Immediate:
     return Op0.getImm() == Op1.getImm();
   default:
     llvm_unreachable("Didn't expect to be comparing these operand types");
   }
 }
 
 bool SIInstrInfo::isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
                                     const MachineOperand &MO) const {
   const MCOperandInfo &OpInfo = get(MI.getOpcode()).OpInfo[OpNo];
 
   assert(MO.isImm() || MO.isTargetIndex() || MO.isFI());
 
   if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE)
     return true;
 
   if (OpInfo.RegClass < 0)
     return false;
 
   if (MO.isImm() && isInlineConstant(MO, OpInfo))
     return RI.opCanUseInlineConstant(OpInfo.OperandType);
 
   return RI.opCanUseLiteralConstant(OpInfo.OperandType);
 }
 
 bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const {
   int Op32 = AMDGPU::getVOPe32(Opcode);
   if (Op32 == -1)
     return false;
 
   return pseudoToMCOpcode(Op32) != -1;
 }
 
 bool SIInstrInfo::hasModifiers(unsigned Opcode) const {
   // The src0_modifier operand is present on all instructions
   // that have modifiers.
 
   return AMDGPU::getNamedOperandIdx(Opcode,
                                     AMDGPU::OpName::src0_modifiers) != -1;
 }
 
 bool SIInstrInfo::hasModifiersSet(const MachineInstr &MI,
                                   unsigned OpName) const {
   const MachineOperand *Mods = getNamedOperand(MI, OpName);
   return Mods && Mods->getImm();
 }
 
 bool SIInstrInfo::hasAnyModifiersSet(const MachineInstr &MI) const {
   return hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) ||
          hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) ||
          hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers) ||
          hasModifiersSet(MI, AMDGPU::OpName::clamp) ||
          hasModifiersSet(MI, AMDGPU::OpName::omod);
 }
 
 bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI,
                                   const MachineOperand &MO,
                                   const MCOperandInfo &OpInfo) const {
   // Literal constants use the constant bus.
   //if (isLiteralConstantLike(MO, OpInfo))
   // return true;
   if (MO.isImm())
     return !isInlineConstant(MO, OpInfo);
 
   if (!MO.isReg())
     return true; // Misc other operands like FrameIndex
 
   if (!MO.isUse())
     return false;
 
   if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
     return RI.isSGPRClass(MRI.getRegClass(MO.getReg()));
 
   // FLAT_SCR is just an SGPR pair.
   if (!MO.isImplicit() && (MO.getReg() == AMDGPU::FLAT_SCR))
     return true;
 
   // EXEC register uses the constant bus.
   if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC)
     return true;
 
   // SGPRs use the constant bus
   return (MO.getReg() == AMDGPU::VCC || MO.getReg() == AMDGPU::M0 ||
           (!MO.isImplicit() &&
            (AMDGPU::SGPR_32RegClass.contains(MO.getReg()) ||
             AMDGPU::SGPR_64RegClass.contains(MO.getReg()))));
 }
 
 static unsigned findImplicitSGPRRead(const MachineInstr &MI) {
   for (const MachineOperand &MO : MI.implicit_operands()) {
     // We only care about reads.
     if (MO.isDef())
       continue;
 
     switch (MO.getReg()) {
     case AMDGPU::VCC:
     case AMDGPU::M0:
     case AMDGPU::FLAT_SCR:
       return MO.getReg();
 
     default:
       break;
     }
   }
 
   return AMDGPU::NoRegister;
 }
 
 static bool shouldReadExec(const MachineInstr &MI) {
   if (SIInstrInfo::isVALU(MI)) {
     switch (MI.getOpcode()) {
     case AMDGPU::V_READLANE_B32:
     case AMDGPU::V_READLANE_B32_si:
     case AMDGPU::V_READLANE_B32_vi:
     case AMDGPU::V_WRITELANE_B32:
     case AMDGPU::V_WRITELANE_B32_si:
     case AMDGPU::V_WRITELANE_B32_vi:
       return false;
     }
 
     return true;
   }
 
   if (SIInstrInfo::isGenericOpcode(MI.getOpcode()) ||
       SIInstrInfo::isSALU(MI) ||
       SIInstrInfo::isSMRD(MI))
     return false;
 
   return true;
 }
 
 static bool isSubRegOf(const SIRegisterInfo &TRI,
                        const MachineOperand &SuperVec,
                        const MachineOperand &SubReg) {
   if (TargetRegisterInfo::isPhysicalRegister(SubReg.getReg()))
     return TRI.isSubRegister(SuperVec.getReg(), SubReg.getReg());
 
   return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
          SubReg.getReg() == SuperVec.getReg();
 }
 
 bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
                                     StringRef &ErrInfo) const {
   uint16_t Opcode = MI.getOpcode();
   if (SIInstrInfo::isGenericOpcode(MI.getOpcode()))
     return true;
 
   const MachineFunction *MF = MI.getParent()->getParent();
   const MachineRegisterInfo &MRI = MF->getRegInfo();
 
   int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
   int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
   int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
 
   // Make sure the number of operands is correct.
   const MCInstrDesc &Desc = get(Opcode);
   if (!Desc.isVariadic() &&
       Desc.getNumOperands() != MI.getNumExplicitOperands()) {
     ErrInfo = "Instruction has wrong number of operands.";
     return false;
   }
 
   if (MI.isInlineAsm()) {
     // Verify register classes for inlineasm constraints.
     for (unsigned I = InlineAsm::MIOp_FirstOperand, E = MI.getNumOperands();
          I != E; ++I) {
       const TargetRegisterClass *RC = MI.getRegClassConstraint(I, this, &RI);
       if (!RC)
         continue;
 
       const MachineOperand &Op = MI.getOperand(I);
       if (!Op.isReg())
         continue;
 
       unsigned Reg = Op.getReg();
       if (!TargetRegisterInfo::isVirtualRegister(Reg) && !RC->contains(Reg)) {
         ErrInfo = "inlineasm operand has incorrect register class.";
         return false;
       }
     }
 
     return true;
   }
 
   // Make sure the register classes are correct.
   for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) {
     if (MI.getOperand(i).isFPImm()) {
       ErrInfo = "FPImm Machine Operands are not supported. ISel should bitcast "
                 "all fp values to integers.";
       return false;
     }
 
     int RegClass = Desc.OpInfo[i].RegClass;
 
     switch (Desc.OpInfo[i].OperandType) {
     case MCOI::OPERAND_REGISTER:
       if (MI.getOperand(i).isImm()) {
         ErrInfo = "Illegal immediate value for operand.";
         return false;
       }
       break;
     case AMDGPU::OPERAND_REG_IMM_INT32:
     case AMDGPU::OPERAND_REG_IMM_FP32:
       break;
     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
     case AMDGPU::OPERAND_REG_INLINE_C_FP16: {
       const MachineOperand &MO = MI.getOperand(i);
       if (!MO.isReg() && (!MO.isImm() || !isInlineConstant(MI, i))) {
         ErrInfo = "Illegal immediate value for operand.";
         return false;
       }
       break;
     }
     case MCOI::OPERAND_IMMEDIATE:
     case AMDGPU::OPERAND_KIMM32:
       // Check if this operand is an immediate.
       // FrameIndex operands will be replaced by immediates, so they are
       // allowed.
       if (!MI.getOperand(i).isImm() && !MI.getOperand(i).isFI()) {
         ErrInfo = "Expected immediate, but got non-immediate";
         return false;
       }
       LLVM_FALLTHROUGH;
     default:
       continue;
     }
 
     if (!MI.getOperand(i).isReg())
       continue;
 
     if (RegClass != -1) {
       unsigned Reg = MI.getOperand(i).getReg();
       if (Reg == AMDGPU::NoRegister ||
           TargetRegisterInfo::isVirtualRegister(Reg))
         continue;
 
       const TargetRegisterClass *RC = RI.getRegClass(RegClass);
       if (!RC->contains(Reg)) {
         ErrInfo = "Operand has incorrect register class.";
         return false;
       }
     }
   }
 
   // Verify SDWA
   if (isSDWA(MI)) {
     if (!ST.hasSDWA()) {
       ErrInfo = "SDWA is not supported on this target";
       return false;
     }
 
     int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
 
     const int OpIndicies[] = { DstIdx, Src0Idx, Src1Idx, Src2Idx };
 
     for (int OpIdx: OpIndicies) {
       if (OpIdx == -1)
         continue;
       const MachineOperand &MO = MI.getOperand(OpIdx);
 
       if (!ST.hasSDWAScalar()) {
         // Only VGPRS on VI
         if (!MO.isReg() || !RI.hasVGPRs(RI.getRegClassForReg(MRI, MO.getReg()))) {
           ErrInfo = "Only VGPRs allowed as operands in SDWA instructions on VI";
           return false;
         }
       } else {
         // No immediates on GFX9
         if (!MO.isReg()) {
           ErrInfo = "Only reg allowed as operands in SDWA instructions on GFX9";
           return false;
         }
       }
     }
 
     if (!ST.hasSDWAOmod()) {
       // No omod allowed on VI
       const MachineOperand *OMod = getNamedOperand(MI, AMDGPU::OpName::omod);
       if (OMod != nullptr &&
         (!OMod->isImm() || OMod->getImm() != 0)) {
         ErrInfo = "OMod not allowed in SDWA instructions on VI";
         return false;
       }
     }
 
     uint16_t BasicOpcode = AMDGPU::getBasicFromSDWAOp(Opcode);
     if (isVOPC(BasicOpcode)) {
       if (!ST.hasSDWASdst() && DstIdx != -1) {
         // Only vcc allowed as dst on VI for VOPC
         const MachineOperand &Dst = MI.getOperand(DstIdx);
         if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
           ErrInfo = "Only VCC allowed as dst in SDWA instructions on VI";
           return false;
         }
       } else if (!ST.hasSDWAOutModsVOPC()) {
         // No clamp allowed on GFX9 for VOPC
         const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
         if (Clamp && (!Clamp->isImm() || Clamp->getImm() != 0)) {
           ErrInfo = "Clamp not allowed in VOPC SDWA instructions on VI";
           return false;
         }
 
         // No omod allowed on GFX9 for VOPC
         const MachineOperand *OMod = getNamedOperand(MI, AMDGPU::OpName::omod);
         if (OMod && (!OMod->isImm() || OMod->getImm() != 0)) {
           ErrInfo = "OMod not allowed in VOPC SDWA instructions on VI";
           return false;
         }
       }
     }
 
     const MachineOperand *DstUnused = getNamedOperand(MI, AMDGPU::OpName::dst_unused);
     if (DstUnused && DstUnused->isImm() &&
         DstUnused->getImm() == AMDGPU::SDWA::UNUSED_PRESERVE) {
       const MachineOperand &Dst = MI.getOperand(DstIdx);
       if (!Dst.isReg() || !Dst.isTied()) {
         ErrInfo = "Dst register should have tied register";
         return false;
       }
 
       const MachineOperand &TiedMO =
           MI.getOperand(MI.findTiedOperandIdx(DstIdx));
       if (!TiedMO.isReg() || !TiedMO.isImplicit() || !TiedMO.isUse()) {
         ErrInfo =
             "Dst register should be tied to implicit use of preserved register";
         return false;
       } else if (TargetRegisterInfo::isPhysicalRegister(TiedMO.getReg()) &&
                  Dst.getReg() != TiedMO.getReg()) {
         ErrInfo = "Dst register should use same physical register as preserved";
         return false;
       }
     }
   }
 
   // Verify VOP*
   if (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isVOPC(MI) || isSDWA(MI)) {
     // Only look at the true operands. Only a real operand can use the constant
     // bus, and we don't want to check pseudo-operands like the source modifier
     // flags.
     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
 
     unsigned ConstantBusCount = 0;
 
     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1)
       ++ConstantBusCount;
 
     unsigned SGPRUsed = findImplicitSGPRRead(MI);
     if (SGPRUsed != AMDGPU::NoRegister)
       ++ConstantBusCount;
 
     for (int OpIdx : OpIndices) {
       if (OpIdx == -1)
         break;
       const MachineOperand &MO = MI.getOperand(OpIdx);
       if (usesConstantBus(MRI, MO, MI.getDesc().OpInfo[OpIdx])) {
         if (MO.isReg()) {
           if (MO.getReg() != SGPRUsed)
             ++ConstantBusCount;
           SGPRUsed = MO.getReg();
         } else {
           ++ConstantBusCount;
         }
       }
     }
     if (ConstantBusCount > 1) {
       ErrInfo = "VOP* instruction uses the constant bus more than once";
       return false;
     }
   }
 
   // Verify misc. restrictions on specific instructions.
   if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 ||
       Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) {
     const MachineOperand &Src0 = MI.getOperand(Src0Idx);
     const MachineOperand &Src1 = MI.getOperand(Src1Idx);
     const MachineOperand &Src2 = MI.getOperand(Src2Idx);
     if (Src0.isReg() && Src1.isReg() && Src2.isReg()) {
       if (!compareMachineOp(Src0, Src1) &&
           !compareMachineOp(Src0, Src2)) {
         ErrInfo = "v_div_scale_{f32|f64} require src0 = src1 or src2";
         return false;
       }
     }
   }
 
   if (isSOPK(MI)) {
     int64_t Imm = getNamedOperand(MI, AMDGPU::OpName::simm16)->getImm();
     if (sopkIsZext(MI)) {
       if (!isUInt<16>(Imm)) {
         ErrInfo = "invalid immediate for SOPK instruction";
         return false;
       }
     } else {
       if (!isInt<16>(Imm)) {
         ErrInfo = "invalid immediate for SOPK instruction";
         return false;
       }
     }
   }
 
   if (Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
       Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
       Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
       Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
     const bool IsDst = Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
                        Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
 
     const unsigned StaticNumOps = Desc.getNumOperands() +
       Desc.getNumImplicitUses();
     const unsigned NumImplicitOps = IsDst ? 2 : 1;
 
     // Allow additional implicit operands. This allows a fixup done by the post
     // RA scheduler where the main implicit operand is killed and implicit-defs
     // are added for sub-registers that remain live after this instruction.
     if (MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
       ErrInfo = "missing implicit register operands";
       return false;
     }
 
     const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst);
     if (IsDst) {
       if (!Dst->isUse()) {
         ErrInfo = "v_movreld_b32 vdst should be a use operand";
         return false;
       }
 
       unsigned UseOpIdx;
       if (!MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
           UseOpIdx != StaticNumOps + 1) {
         ErrInfo = "movrel implicit operands should be tied";
         return false;
       }
     }
 
     const MachineOperand &Src0 = MI.getOperand(Src0Idx);
     const MachineOperand &ImpUse
       = MI.getOperand(StaticNumOps + NumImplicitOps - 1);
     if (!ImpUse.isReg() || !ImpUse.isUse() ||
         !isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
       ErrInfo = "src0 should be subreg of implicit vector use";
       return false;
     }
   }
 
   // Make sure we aren't losing exec uses in the td files. This mostly requires
   // being careful when using let Uses to try to add other use registers.
   if (shouldReadExec(MI)) {
     if (!MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
       ErrInfo = "VALU instruction does not implicitly read exec mask";
       return false;
     }
   }
 
   if (isSMRD(MI)) {
     if (MI.mayStore()) {
       // The register offset form of scalar stores may only use m0 as the
       // soffset register.
       const MachineOperand *Soff = getNamedOperand(MI, AMDGPU::OpName::soff);
       if (Soff && Soff->getReg() != AMDGPU::M0) {
         ErrInfo = "scalar stores must use m0 as offset register";
         return false;
       }
     }
   }
 
   if (isFLAT(MI) && !MF->getSubtarget<SISubtarget>().hasFlatInstOffsets()) {
     const MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset);
     if (Offset->getImm() != 0) {
       ErrInfo = "subtarget does not support offsets in flat instructions";
       return false;
     }
   }
 
   return true;
 }
 
 unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
   switch (MI.getOpcode()) {
   default: return AMDGPU::INSTRUCTION_LIST_END;
   case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE;
   case AMDGPU::COPY: return AMDGPU::COPY;
   case AMDGPU::PHI: return AMDGPU::PHI;
   case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG;
   case AMDGPU::WQM: return AMDGPU::WQM;
   case AMDGPU::WWM: return AMDGPU::WWM;
   case AMDGPU::S_MOV_B32:
     return MI.getOperand(1).isReg() ?
            AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
   case AMDGPU::S_ADD_I32:
     return ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_I32_e32;
   case AMDGPU::S_ADDC_U32:
     return AMDGPU::V_ADDC_U32_e32;
   case AMDGPU::S_SUB_I32:
     return ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32;
     // FIXME: These are not consistently handled, and selected when the carry is
     // used.
   case AMDGPU::S_ADD_U32:
     return AMDGPU::V_ADD_I32_e32;
   case AMDGPU::S_SUB_U32:
     return AMDGPU::V_SUB_I32_e32;
   case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32;
   case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_I32;
   case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e64;
   case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e64;
   case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e64;
   case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e64;
   case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e64;
   case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e64;
   case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e64;
   case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32;
   case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64;
   case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32;
   case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64;
   case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32;
   case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64;
   case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32;
   case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32;
   case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32;
   case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32;
   case AMDGPU::S_BFM_B32: return AMDGPU::V_BFM_B32_e64;
   case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32;
   case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32;
   case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32;
   case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32;
   case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32;
   case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32;
   case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e32;
   case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32;
   case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32;
   case AMDGPU::S_CMP_EQ_U32: return AMDGPU::V_CMP_EQ_U32_e32;
   case AMDGPU::S_CMP_LG_U32: return AMDGPU::V_CMP_NE_U32_e32;
   case AMDGPU::S_CMP_GT_U32: return AMDGPU::V_CMP_GT_U32_e32;
   case AMDGPU::S_CMP_GE_U32: return AMDGPU::V_CMP_GE_U32_e32;
   case AMDGPU::S_CMP_LT_U32: return AMDGPU::V_CMP_LT_U32_e32;
   case AMDGPU::S_CMP_LE_U32: return AMDGPU::V_CMP_LE_U32_e32;
   case AMDGPU::S_CMP_EQ_U64: return AMDGPU::V_CMP_EQ_U64_e32;
   case AMDGPU::S_CMP_LG_U64: return AMDGPU::V_CMP_NE_U64_e32;
   case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64;
   case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
   case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
   case AMDGPU::S_FLBIT_I32: return AMDGPU::V_FFBH_I32_e64;
   case AMDGPU::S_CBRANCH_SCC0: return AMDGPU::S_CBRANCH_VCCZ;
   case AMDGPU::S_CBRANCH_SCC1: return AMDGPU::S_CBRANCH_VCCNZ;
   }
 }
 
 const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
                                                       unsigned OpNo) const {
   const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
   const MCInstrDesc &Desc = get(MI.getOpcode());
   if (MI.isVariadic() || OpNo >= Desc.getNumOperands() ||
       Desc.OpInfo[OpNo].RegClass == -1) {
     unsigned Reg = MI.getOperand(OpNo).getReg();
 
     if (TargetRegisterInfo::isVirtualRegister(Reg))
       return MRI.getRegClass(Reg);
     return RI.getPhysRegClass(Reg);
   }
 
   unsigned RCID = Desc.OpInfo[OpNo].RegClass;
   return RI.getRegClass(RCID);
 }
 
 bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const {
   switch (MI.getOpcode()) {
   case AMDGPU::COPY:
   case AMDGPU::REG_SEQUENCE:
   case AMDGPU::PHI:
   case AMDGPU::INSERT_SUBREG:
     return RI.hasVGPRs(getOpRegClass(MI, 0));
   default:
     return RI.hasVGPRs(getOpRegClass(MI, OpNo));
   }
 }
 
 void SIInstrInfo::legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const {
   MachineBasicBlock::iterator I = MI;
   MachineBasicBlock *MBB = MI.getParent();
   MachineOperand &MO = MI.getOperand(OpIdx);
   MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
   unsigned RCID = get(MI.getOpcode()).OpInfo[OpIdx].RegClass;
   const TargetRegisterClass *RC = RI.getRegClass(RCID);
   unsigned Opcode = AMDGPU::V_MOV_B32_e32;
   if (MO.isReg())
     Opcode = AMDGPU::COPY;
   else if (RI.isSGPRClass(RC))
     Opcode = AMDGPU::S_MOV_B32;
 
   const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC);
   if (RI.getCommonSubClass(&AMDGPU::VReg_64RegClass, VRC))
     VRC = &AMDGPU::VReg_64RegClass;
   else
     VRC = &AMDGPU::VGPR_32RegClass;
 
   unsigned Reg = MRI.createVirtualRegister(VRC);
   DebugLoc DL = MBB->findDebugLoc(I);
   BuildMI(*MI.getParent(), I, DL, get(Opcode), Reg).add(MO);
   MO.ChangeToRegister(Reg, false);
 }
 
 unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
                                          MachineRegisterInfo &MRI,
                                          MachineOperand &SuperReg,
                                          const TargetRegisterClass *SuperRC,
                                          unsigned SubIdx,
                                          const TargetRegisterClass *SubRC)
                                          const {
   MachineBasicBlock *MBB = MI->getParent();
   DebugLoc DL = MI->getDebugLoc();
   unsigned SubReg = MRI.createVirtualRegister(SubRC);
 
   if (SuperReg.getSubReg() == AMDGPU::NoSubRegister) {
     BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg)
       .addReg(SuperReg.getReg(), 0, SubIdx);
     return SubReg;
   }
 
   // Just in case the super register is itself a sub-register, copy it to a new
   // value so we don't need to worry about merging its subreg index with the
   // SubIdx passed to this function. The register coalescer should be able to
   // eliminate this extra copy.
   unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
 
   BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg)
     .addReg(SuperReg.getReg(), 0, SuperReg.getSubReg());
 
   BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg)
     .addReg(NewSuperReg, 0, SubIdx);
 
   return SubReg;
 }
 
 MachineOperand SIInstrInfo::buildExtractSubRegOrImm(
   MachineBasicBlock::iterator MII,
   MachineRegisterInfo &MRI,
   MachineOperand &Op,
   const TargetRegisterClass *SuperRC,
   unsigned SubIdx,
   const TargetRegisterClass *SubRC) const {
   if (Op.isImm()) {
     if (SubIdx == AMDGPU::sub0)
       return MachineOperand::CreateImm(static_cast<int32_t>(Op.getImm()));
     if (SubIdx == AMDGPU::sub1)
       return MachineOperand::CreateImm(static_cast<int32_t>(Op.getImm() >> 32));
 
     llvm_unreachable("Unhandled register index for immediate");
   }
 
   unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC,
                                        SubIdx, SubRC);
   return MachineOperand::CreateReg(SubReg, false);
 }
 
 // Change the order of operands from (0, 1, 2) to (0, 2, 1)
 void SIInstrInfo::swapOperands(MachineInstr &Inst) const {
   assert(Inst.getNumExplicitOperands() == 3);
   MachineOperand Op1 = Inst.getOperand(1);
   Inst.RemoveOperand(1);
   Inst.addOperand(Op1);
 }
 
 bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI,
                                     const MCOperandInfo &OpInfo,
                                     const MachineOperand &MO) const {
   if (!MO.isReg())
     return false;
 
   unsigned Reg = MO.getReg();
   const TargetRegisterClass *RC =
     TargetRegisterInfo::isVirtualRegister(Reg) ?
     MRI.getRegClass(Reg) :
     RI.getPhysRegClass(Reg);
 
   const SIRegisterInfo *TRI =
       static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
   RC = TRI->getSubRegClass(RC, MO.getSubReg());
 
   // In order to be legal, the common sub-class must be equal to the
   // class of the current operand.  For example:
   //
   // v_mov_b32 s0 ; Operand defined as vsrc_b32
   //              ; RI.getCommonSubClass(s0,vsrc_b32) = sgpr ; LEGAL
   //
   // s_sendmsg 0, s0 ; Operand defined as m0reg
   //                 ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL
 
   return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC;
 }
 
 bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI,
                                      const MCOperandInfo &OpInfo,
                                      const MachineOperand &MO) const {
   if (MO.isReg())
     return isLegalRegOperand(MRI, OpInfo, MO);
 
   // Handle non-register types that are treated like immediates.
   assert(MO.isImm() || MO.isTargetIndex() || MO.isFI());
   return true;
 }
 
 bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
                                  const MachineOperand *MO) const {
   const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
   const MCInstrDesc &InstDesc = MI.getDesc();
   const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx];
   const TargetRegisterClass *DefinedRC =
       OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr;
   if (!MO)
     MO = &MI.getOperand(OpIdx);
 
   if (isVALU(MI) && usesConstantBus(MRI, *MO, OpInfo)) {
 
     RegSubRegPair SGPRUsed;
     if (MO->isReg())
       SGPRUsed = RegSubRegPair(MO->getReg(), MO->getSubReg());
 
     for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
       if (i == OpIdx)
         continue;
       const MachineOperand &Op = MI.getOperand(i);
       if (Op.isReg()) {
         if ((Op.getReg() != SGPRUsed.Reg || Op.getSubReg() != SGPRUsed.SubReg) &&
             usesConstantBus(MRI, Op, InstDesc.OpInfo[i])) {
           return false;
         }
       } else if (InstDesc.OpInfo[i].OperandType == AMDGPU::OPERAND_KIMM32) {
         return false;
       }
     }
   }
 
   if (MO->isReg()) {
     assert(DefinedRC);
     return isLegalRegOperand(MRI, OpInfo, *MO);
   }
 
   // Handle non-register types that are treated like immediates.
   assert(MO->isImm() || MO->isTargetIndex() || MO->isFI());
 
   if (!DefinedRC) {
     // This operand expects an immediate.
     return true;
   }
 
   return isImmOperandLegal(MI, OpIdx, *MO);
 }
 
 void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI,
                                        MachineInstr &MI) const {
   unsigned Opc = MI.getOpcode();
   const MCInstrDesc &InstrDesc = get(Opc);
 
   int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
   MachineOperand &Src1 = MI.getOperand(Src1Idx);
 
   // If there is an implicit SGPR use such as VCC use for v_addc_u32/v_subb_u32
   // we need to only have one constant bus use.
   //
   // Note we do not need to worry about literal constants here. They are
   // disabled for the operand type for instructions because they will always
   // violate the one constant bus use rule.
   bool HasImplicitSGPR = findImplicitSGPRRead(MI) != AMDGPU::NoRegister;
   if (HasImplicitSGPR) {
     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
     MachineOperand &Src0 = MI.getOperand(Src0Idx);
 
     if (Src0.isReg() && RI.isSGPRReg(MRI, Src0.getReg()))
       legalizeOpWithMove(MI, Src0Idx);
   }
 
   // VOP2 src0 instructions support all operand types, so we don't need to check
   // their legality. If src1 is already legal, we don't need to do anything.
   if (isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src1))
     return;
 
   // Special case: V_READLANE_B32 accepts only immediate or SGPR operands for
   // lane select. Fix up using V_READFIRSTLANE, since we assume that the lane
   // select is uniform.
   if (Opc == AMDGPU::V_READLANE_B32 && Src1.isReg() &&
       RI.isVGPR(MRI, Src1.getReg())) {
     unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
     const DebugLoc &DL = MI.getDebugLoc();
     BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg)
         .add(Src1);
     Src1.ChangeToRegister(Reg, false);
     return;
   }
 
   // We do not use commuteInstruction here because it is too aggressive and will
   // commute if it is possible. We only want to commute here if it improves
   // legality. This can be called a fairly large number of times so don't waste
   // compile time pointlessly swapping and checking legality again.
   if (HasImplicitSGPR || !MI.isCommutable()) {
     legalizeOpWithMove(MI, Src1Idx);
     return;
   }
 
   int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
   MachineOperand &Src0 = MI.getOperand(Src0Idx);
 
   // If src0 can be used as src1, commuting will make the operands legal.
   // Otherwise we have to give up and insert a move.
   //
   // TODO: Other immediate-like operand kinds could be commuted if there was a
   // MachineOperand::ChangeTo* for them.
   if ((!Src1.isImm() && !Src1.isReg()) ||
       !isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0)) {
     legalizeOpWithMove(MI, Src1Idx);
     return;
   }
 
   int CommutedOpc = commuteOpcode(MI);
   if (CommutedOpc == -1) {
     legalizeOpWithMove(MI, Src1Idx);
     return;
   }
 
   MI.setDesc(get(CommutedOpc));
 
   unsigned Src0Reg = Src0.getReg();
   unsigned Src0SubReg = Src0.getSubReg();
   bool Src0Kill = Src0.isKill();
 
   if (Src1.isImm())
     Src0.ChangeToImmediate(Src1.getImm());
   else if (Src1.isReg()) {
     Src0.ChangeToRegister(Src1.getReg(), false, false, Src1.isKill());
     Src0.setSubReg(Src1.getSubReg());
   } else
     llvm_unreachable("Should only have register or immediate operands");
 
   Src1.ChangeToRegister(Src0Reg, false, false, Src0Kill);
   Src1.setSubReg(Src0SubReg);
 }
 
 // Legalize VOP3 operands. Because all operand types are supported for any
 // operand, and since literal constants are not allowed and should never be
 // seen, we only need to worry about inserting copies if we use multiple SGPR
 // operands.
 void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI,
                                        MachineInstr &MI) const {
   unsigned Opc = MI.getOpcode();
 
   int VOP3Idx[3] = {
     AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0),
     AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1),
     AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)
   };
 
   // Find the one SGPR operand we are allowed to use.
   unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx);
 
   for (unsigned i = 0; i < 3; ++i) {
     int Idx = VOP3Idx[i];
     if (Idx == -1)
       break;
     MachineOperand &MO = MI.getOperand(Idx);
 
     // We should never see a VOP3 instruction with an illegal immediate operand.
     if (!MO.isReg())
       continue;
 
     if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
       continue; // VGPRs are legal
 
     if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) {
       SGPRReg = MO.getReg();
       // We can use one SGPR in each VOP3 instruction.
       continue;
     }
 
     // If we make it this far, then the operand is not legal and we must
     // legalize it.
     legalizeOpWithMove(MI, Idx);
   }
 }
 
 unsigned SIInstrInfo::readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr &UseMI,
                                          MachineRegisterInfo &MRI) const {
   const TargetRegisterClass *VRC = MRI.getRegClass(SrcReg);
   const TargetRegisterClass *SRC = RI.getEquivalentSGPRClass(VRC);
   unsigned DstReg = MRI.createVirtualRegister(SRC);
   unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
 
   SmallVector<unsigned, 8> SRegs;
   for (unsigned i = 0; i < SubRegs; ++i) {
     unsigned SGPR = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
     BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(),
             get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
         .addReg(SrcReg, 0, RI.getSubRegFromChannel(i));
     SRegs.push_back(SGPR);
   }
 
   MachineInstrBuilder MIB =
       BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(),
               get(AMDGPU::REG_SEQUENCE), DstReg);
   for (unsigned i = 0; i < SubRegs; ++i) {
     MIB.addReg(SRegs[i]);
     MIB.addImm(RI.getSubRegFromChannel(i));
   }
   return DstReg;
 }
 
 void SIInstrInfo::legalizeOperandsSMRD(MachineRegisterInfo &MRI,
                                        MachineInstr &MI) const {
 
   // If the pointer is store in VGPRs, then we need to move them to
   // SGPRs using v_readfirstlane.  This is safe because we only select
   // loads with uniform pointers to SMRD instruction so we know the
   // pointer value is uniform.
   MachineOperand *SBase = getNamedOperand(MI, AMDGPU::OpName::sbase);
   if (SBase && !RI.isSGPRClass(MRI.getRegClass(SBase->getReg()))) {
       unsigned SGPR = readlaneVGPRToSGPR(SBase->getReg(), MI, MRI);
       SBase->setReg(SGPR);
   }
 }
 
 void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB,
                                          MachineBasicBlock::iterator I,
                                          const TargetRegisterClass *DstRC,
                                          MachineOperand &Op,
                                          MachineRegisterInfo &MRI,
                                          const DebugLoc &DL) const {
   unsigned OpReg = Op.getReg();
   unsigned OpSubReg = Op.getSubReg();
 
   const TargetRegisterClass *OpRC = RI.getSubClassWithSubReg(
       RI.getRegClassForReg(MRI, OpReg), OpSubReg);
 
   // Check if operand is already the correct register class.
   if (DstRC == OpRC)
     return;
 
   unsigned DstReg = MRI.createVirtualRegister(DstRC);
   MachineInstr *Copy =
       BuildMI(InsertMBB, I, DL, get(AMDGPU::COPY), DstReg).add(Op);
 
   Op.setReg(DstReg);
   Op.setSubReg(0);
 
   MachineInstr *Def = MRI.getVRegDef(OpReg);
   if (!Def)
     return;
 
   // Try to eliminate the copy if it is copying an immediate value.
   if (Def->isMoveImmediate())
     FoldImmediate(*Copy, *Def, OpReg, &MRI);
 }
 
 void SIInstrInfo::legalizeOperands(MachineInstr &MI) const {
   MachineFunction &MF = *MI.getParent()->getParent();
   MachineRegisterInfo &MRI = MF.getRegInfo();
 
   // Legalize VOP2
   if (isVOP2(MI) || isVOPC(MI)) {
     legalizeOperandsVOP2(MRI, MI);
     return;
   }
 
   // Legalize VOP3
   if (isVOP3(MI)) {
     legalizeOperandsVOP3(MRI, MI);
     return;
   }
 
   // Legalize SMRD
   if (isSMRD(MI)) {
     legalizeOperandsSMRD(MRI, MI);
     return;
   }
 
   // Legalize REG_SEQUENCE and PHI
   // The register class of the operands much be the same type as the register
   // class of the output.
   if (MI.getOpcode() == AMDGPU::PHI) {
     const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr;
     for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
       if (!MI.getOperand(i).isReg() ||
           !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg()))
         continue;
       const TargetRegisterClass *OpRC =
           MRI.getRegClass(MI.getOperand(i).getReg());
       if (RI.hasVGPRs(OpRC)) {
         VRC = OpRC;
       } else {
         SRC = OpRC;
       }
     }
 
     // If any of the operands are VGPR registers, then they all most be
     // otherwise we will create illegal VGPR->SGPR copies when legalizing
     // them.
     if (VRC || !RI.isSGPRClass(getOpRegClass(MI, 0))) {
       if (!VRC) {
         assert(SRC);
         VRC = RI.getEquivalentVGPRClass(SRC);
       }
       RC = VRC;
     } else {
       RC = SRC;
     }
 
     // Update all the operands so they have the same type.
     for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
       MachineOperand &Op = MI.getOperand(I);
       if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg()))
         continue;
 
       // MI is a PHI instruction.
       MachineBasicBlock *InsertBB = MI.getOperand(I + 1).getMBB();
       MachineBasicBlock::iterator Insert = InsertBB->getFirstTerminator();
 
       // Avoid creating no-op copies with the same src and dst reg class.  These
       // confuse some of the machine passes.
       legalizeGenericOperand(*InsertBB, Insert, RC, Op, MRI, MI.getDebugLoc());
     }
   }
 
   // REG_SEQUENCE doesn't really require operand legalization, but if one has a
   // VGPR dest type and SGPR sources, insert copies so all operands are
   // VGPRs. This seems to help operand folding / the register coalescer.
   if (MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
     MachineBasicBlock *MBB = MI.getParent();
     const TargetRegisterClass *DstRC = getOpRegClass(MI, 0);
     if (RI.hasVGPRs(DstRC)) {
       // Update all the operands so they are VGPR register classes. These may
       // not be the same register class because REG_SEQUENCE supports mixing
       // subregister index types e.g. sub0_sub1 + sub2 + sub3
       for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
         MachineOperand &Op = MI.getOperand(I);
         if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg()))
           continue;
 
         const TargetRegisterClass *OpRC = MRI.getRegClass(Op.getReg());
         const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(OpRC);
         if (VRC == OpRC)
           continue;
 
         legalizeGenericOperand(*MBB, MI, VRC, Op, MRI, MI.getDebugLoc());
         Op.setIsKill();
       }
     }
 
     return;
   }
 
   // Legalize INSERT_SUBREG
   // src0 must have the same register class as dst
   if (MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
     unsigned Dst = MI.getOperand(0).getReg();
     unsigned Src0 = MI.getOperand(1).getReg();
     const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
     const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0);
     if (DstRC != Src0RC) {
       MachineBasicBlock *MBB = MI.getParent();
       MachineOperand &Op = MI.getOperand(1);
       legalizeGenericOperand(*MBB, MI, DstRC, Op, MRI, MI.getDebugLoc());
     }
     return;
   }
 
   // Legalize MIMG and MUBUF/MTBUF for shaders.
   //
   // Shaders only generate MUBUF/MTBUF instructions via intrinsics or via
   // scratch memory access. In both cases, the legalization never involves
   // conversion to the addr64 form.
   if (isMIMG(MI) ||
       (AMDGPU::isShader(MF.getFunction().getCallingConv()) &&
        (isMUBUF(MI) || isMTBUF(MI)))) {
     MachineOperand *SRsrc = getNamedOperand(MI, AMDGPU::OpName::srsrc);
     if (SRsrc && !RI.isSGPRClass(MRI.getRegClass(SRsrc->getReg()))) {
       unsigned SGPR = readlaneVGPRToSGPR(SRsrc->getReg(), MI, MRI);
       SRsrc->setReg(SGPR);
     }
 
     MachineOperand *SSamp = getNamedOperand(MI, AMDGPU::OpName::ssamp);
     if (SSamp && !RI.isSGPRClass(MRI.getRegClass(SSamp->getReg()))) {
       unsigned SGPR = readlaneVGPRToSGPR(SSamp->getReg(), MI, MRI);
       SSamp->setReg(SGPR);
     }
     return;
   }
 
   // Legalize MUBUF* instructions by converting to addr64 form.
   // FIXME: If we start using the non-addr64 instructions for compute, we
   // may need to legalize them as above. This especially applies to the
   // buffer_load_format_* variants and variants with idxen (or bothen).
   int SRsrcIdx =
       AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
   if (SRsrcIdx != -1) {
     // We have an MUBUF instruction
     MachineOperand *SRsrc = &MI.getOperand(SRsrcIdx);
     unsigned SRsrcRC = get(MI.getOpcode()).OpInfo[SRsrcIdx].RegClass;
     if (RI.getCommonSubClass(MRI.getRegClass(SRsrc->getReg()),
                                              RI.getRegClass(SRsrcRC))) {
       // The operands are legal.
       // FIXME: We may need to legalize operands besided srsrc.
       return;
     }
 
     MachineBasicBlock &MBB = *MI.getParent();
 
     // Extract the ptr from the resource descriptor.
     unsigned SRsrcPtr = buildExtractSubReg(MI, MRI, *SRsrc,
       &AMDGPU::VReg_128RegClass, AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
 
     // Create an empty resource descriptor
     unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
     unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
     unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
     unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
     uint64_t RsrcDataFormat = getDefaultRsrcDataFormat();
 
     // Zero64 = 0
     BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::S_MOV_B64), Zero64)
         .addImm(0);
 
     // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
     BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::S_MOV_B32), SRsrcFormatLo)
         .addImm(RsrcDataFormat & 0xFFFFFFFF);
 
     // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
     BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::S_MOV_B32), SRsrcFormatHi)
         .addImm(RsrcDataFormat >> 32);
 
     // NewSRsrc = {Zero64, SRsrcFormat}
     BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewSRsrc)
         .addReg(Zero64)
         .addImm(AMDGPU::sub0_sub1)
         .addReg(SRsrcFormatLo)
         .addImm(AMDGPU::sub2)
         .addReg(SRsrcFormatHi)
         .addImm(AMDGPU::sub3);
 
     MachineOperand *VAddr = getNamedOperand(MI, AMDGPU::OpName::vaddr);
     unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
     if (VAddr) {
       // This is already an ADDR64 instruction so we need to add the pointer
       // extracted from the resource descriptor to the current value of VAddr.
       unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
       unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
       // NewVaddrLo = SRsrcPtr:sub0 + VAddr:sub0
       DebugLoc DL = MI.getDebugLoc();
       BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), NewVAddrLo)
         .addReg(SRsrcPtr, 0, AMDGPU::sub0)
         .addReg(VAddr->getReg(), 0, AMDGPU::sub0);
 
       // NewVaddrHi = SRsrcPtr:sub1 + VAddr:sub1
       BuildMI(MBB, MI, DL, get(AMDGPU::V_ADDC_U32_e32), NewVAddrHi)
         .addReg(SRsrcPtr, 0, AMDGPU::sub1)
         .addReg(VAddr->getReg(), 0, AMDGPU::sub1);
 
       // NewVaddr = {NewVaddrHi, NewVaddrLo}
       BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr)
           .addReg(NewVAddrLo)
           .addImm(AMDGPU::sub0)
           .addReg(NewVAddrHi)
           .addImm(AMDGPU::sub1);
     } else {
       // This instructions is the _OFFSET variant, so we need to convert it to
       // ADDR64.
       assert(MBB.getParent()->getSubtarget<SISubtarget>().getGeneration()
              < SISubtarget::VOLCANIC_ISLANDS &&
              "FIXME: Need to emit flat atomics here");
 
       MachineOperand *VData = getNamedOperand(MI, AMDGPU::OpName::vdata);
       MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset);
       MachineOperand *SOffset = getNamedOperand(MI, AMDGPU::OpName::soffset);
       unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI.getOpcode());
 
       // Atomics rith return have have an additional tied operand and are
       // missing some of the special bits.
       MachineOperand *VDataIn = getNamedOperand(MI, AMDGPU::OpName::vdata_in);
       MachineInstr *Addr64;
 
       if (!VDataIn) {
         // Regular buffer load / store.
         MachineInstrBuilder MIB =
             BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode))
                 .add(*VData)
                 .addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
                 // This will be replaced later
                 // with the new value of vaddr.
                 .add(*SRsrc)
                 .add(*SOffset)
                 .add(*Offset);
 
         // Atomics do not have this operand.
         if (const MachineOperand *GLC =
                 getNamedOperand(MI, AMDGPU::OpName::glc)) {
           MIB.addImm(GLC->getImm());
         }
 
         MIB.addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc));
 
         if (const MachineOperand *TFE =
                 getNamedOperand(MI, AMDGPU::OpName::tfe)) {
           MIB.addImm(TFE->getImm());
         }
 
         MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
         Addr64 = MIB;
       } else {
         // Atomics with return.
         Addr64 = BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode))
                      .add(*VData)
                      .add(*VDataIn)
                      .addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
                      // This will be replaced later
                      // with the new value of vaddr.
                      .add(*SRsrc)
                      .add(*SOffset)
                      .add(*Offset)
                      .addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc))
                      .setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
       }
 
       MI.removeFromParent();
 
       // NewVaddr = {NewVaddrHi, NewVaddrLo}
       BuildMI(MBB, Addr64, Addr64->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
               NewVAddr)
           .addReg(SRsrcPtr, 0, AMDGPU::sub0)
           .addImm(AMDGPU::sub0)
           .addReg(SRsrcPtr, 0, AMDGPU::sub1)
           .addImm(AMDGPU::sub1);
 
       VAddr = getNamedOperand(*Addr64, AMDGPU::OpName::vaddr);
       SRsrc = getNamedOperand(*Addr64, AMDGPU::OpName::srsrc);
     }
 
     // Update the instruction to use NewVaddr
     VAddr->setReg(NewVAddr);
     // Update the instruction to use NewSRsrc
     SRsrc->setReg(NewSRsrc);
   }
 }
 
 void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
   SetVectorType Worklist;
   Worklist.insert(&TopInst);
 
   while (!Worklist.empty()) {
     MachineInstr &Inst = *Worklist.pop_back_val();
     MachineBasicBlock *MBB = Inst.getParent();
     MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
 
     unsigned Opcode = Inst.getOpcode();
     unsigned NewOpcode = getVALUOp(Inst);
 
     // Handle some special cases
     switch (Opcode) {
     default:
       break;
     case AMDGPU::S_ADD_U64_PSEUDO:
     case AMDGPU::S_SUB_U64_PSEUDO:
       splitScalar64BitAddSub(Worklist, Inst);
       Inst.eraseFromParent();
       continue;
     case AMDGPU::S_ADD_I32:
     case AMDGPU::S_SUB_I32:
       // FIXME: The u32 versions currently selected use the carry.
       if (moveScalarAddSub(Worklist, Inst))
         continue;
 
       // Default handling
       break;
     case AMDGPU::S_AND_B64:
       splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_AND_B32_e64);
       Inst.eraseFromParent();
       continue;
 
     case AMDGPU::S_OR_B64:
       splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_OR_B32_e64);
       Inst.eraseFromParent();
       continue;
 
     case AMDGPU::S_XOR_B64:
       splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_XOR_B32_e64);
       Inst.eraseFromParent();
       continue;
 
     case AMDGPU::S_NOT_B64:
       splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::V_NOT_B32_e32);
       Inst.eraseFromParent();
       continue;
 
     case AMDGPU::S_BCNT1_I32_B64:
       splitScalar64BitBCNT(Worklist, Inst);
       Inst.eraseFromParent();
       continue;
 
     case AMDGPU::S_BFE_I64:
       splitScalar64BitBFE(Worklist, Inst);
       Inst.eraseFromParent();
       continue;
 
     case AMDGPU::S_LSHL_B32:
       if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
         NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
         swapOperands(Inst);
       }
       break;
     case AMDGPU::S_ASHR_I32:
       if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
         NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
         swapOperands(Inst);
       }
       break;
     case AMDGPU::S_LSHR_B32:
       if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
         NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
         swapOperands(Inst);
       }
       break;
     case AMDGPU::S_LSHL_B64:
       if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
         NewOpcode = AMDGPU::V_LSHLREV_B64;
         swapOperands(Inst);
       }
       break;
     case AMDGPU::S_ASHR_I64:
       if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
         NewOpcode = AMDGPU::V_ASHRREV_I64;
         swapOperands(Inst);
       }
       break;
     case AMDGPU::S_LSHR_B64:
       if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
         NewOpcode = AMDGPU::V_LSHRREV_B64;
         swapOperands(Inst);
       }
       break;
 
     case AMDGPU::S_ABS_I32:
       lowerScalarAbs(Worklist, Inst);
       Inst.eraseFromParent();
       continue;
 
     case AMDGPU::S_CBRANCH_SCC0:
     case AMDGPU::S_CBRANCH_SCC1:
       // Clear unused bits of vcc
       BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(AMDGPU::S_AND_B64),
               AMDGPU::VCC)
           .addReg(AMDGPU::EXEC)
           .addReg(AMDGPU::VCC);
       break;
 
     case AMDGPU::S_BFE_U64:
     case AMDGPU::S_BFM_B64:
       llvm_unreachable("Moving this op to VALU not implemented");
 
     case AMDGPU::S_PACK_LL_B32_B16:
     case AMDGPU::S_PACK_LH_B32_B16:
     case AMDGPU::S_PACK_HH_B32_B16:
       movePackToVALU(Worklist, MRI, Inst);
       Inst.eraseFromParent();
       continue;
 
     case AMDGPU::S_XNOR_B32:
       lowerScalarXnor(Worklist, Inst);
       Inst.eraseFromParent();
       continue;
 
     case AMDGPU::S_XNOR_B64:
       splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32);
       Inst.eraseFromParent();
       continue;
 
     case AMDGPU::S_BUFFER_LOAD_DWORD_SGPR: {
       unsigned VDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
       const MachineOperand *VAddr = getNamedOperand(Inst, AMDGPU::OpName::soff);
       auto Add = MRI.getUniqueVRegDef(VAddr->getReg());
       unsigned Offset = 0;
 
       // FIXME: This isn't safe because the addressing mode doesn't work
       // correctly if vaddr is negative.
       //
       // FIXME: Should probably be done somewhere else, maybe SIFoldOperands.
       //
       // See if we can extract an immediate offset by recognizing one of these:
       //   V_ADD_I32_e32 dst, imm, src1
       //   V_ADD_I32_e32 dst, (S_MOV_B32 imm), src1
       // V_ADD will be removed by "Remove dead machine instructions".
       if (Add &&
           (Add->getOpcode() == AMDGPU::V_ADD_I32_e32 ||
            Add->getOpcode() == AMDGPU::V_ADD_U32_e64)) {
         static const unsigned SrcNames[2] = {
           AMDGPU::OpName::src0,
           AMDGPU::OpName::src1,
         };
 
         // Find a literal offset in one of source operands.
         for (int i = 0; i < 2; i++) {
           const MachineOperand *Src =
             getNamedOperand(*Add, SrcNames[i]);
 
           if (Src->isReg()) {
             auto Mov = MRI.getUniqueVRegDef(Src->getReg());
             if (Mov && Mov->getOpcode() == AMDGPU::S_MOV_B32)
               Src = &Mov->getOperand(1);
           }
 
           if (Src) {
             if (Src->isImm())
               Offset = Src->getImm();
             else if (Src->isCImm())
               Offset = Src->getCImm()->getZExtValue();
           }
 
           if (Offset && isLegalMUBUFImmOffset(Offset)) {
             VAddr = getNamedOperand(*Add, SrcNames[!i]);
             break;
           }
 
           Offset = 0;
         }
       }
 
-      BuildMI(*MBB, Inst, Inst.getDebugLoc(),
+      MachineInstr *NewInstr =
+        BuildMI(*MBB, Inst, Inst.getDebugLoc(),
               get(AMDGPU::BUFFER_LOAD_DWORD_OFFEN), VDst)
         .add(*VAddr) // vaddr
         .add(*getNamedOperand(Inst, AMDGPU::OpName::sbase)) // srsrc
         .addImm(0) // soffset
         .addImm(Offset) // offset
         .addImm(getNamedOperand(Inst, AMDGPU::OpName::glc)->getImm())
         .addImm(0) // slc
         .addImm(0) // tfe
-        .setMemRefs(Inst.memoperands_begin(), Inst.memoperands_end());
+        .setMemRefs(Inst.memoperands_begin(), Inst.memoperands_end())
+        .getInstr();
 
       MRI.replaceRegWith(getNamedOperand(Inst, AMDGPU::OpName::sdst)->getReg(),
                          VDst);
       addUsersToMoveToVALUWorklist(VDst, MRI, Worklist);
       Inst.eraseFromParent();
+
+      // Legalize all operands other than the offset. Notably, convert the srsrc
+      // into SGPRs using v_readfirstlane if needed.
+      legalizeOperands(*NewInstr);
       continue;
     }
     }
 
     if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
       // We cannot move this instruction to the VALU, so we should try to
       // legalize its operands instead.
       legalizeOperands(Inst);
       continue;
     }
 
     // Use the new VALU Opcode.
     const MCInstrDesc &NewDesc = get(NewOpcode);
     Inst.setDesc(NewDesc);
 
     // Remove any references to SCC. Vector instructions can't read from it, and
     // We're just about to add the implicit use / defs of VCC, and we don't want
     // both.
     for (unsigned i = Inst.getNumOperands() - 1; i > 0; --i) {
       MachineOperand &Op = Inst.getOperand(i);
       if (Op.isReg() && Op.getReg() == AMDGPU::SCC) {
         Inst.RemoveOperand(i);
         addSCCDefUsersToVALUWorklist(Inst, Worklist);
       }
     }
 
     if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
       // We are converting these to a BFE, so we need to add the missing
       // operands for the size and offset.
       unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
       Inst.addOperand(MachineOperand::CreateImm(0));
       Inst.addOperand(MachineOperand::CreateImm(Size));
 
     } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
       // The VALU version adds the second operand to the result, so insert an
       // extra 0 operand.
       Inst.addOperand(MachineOperand::CreateImm(0));
     }
 
     Inst.addImplicitDefUseOperands(*Inst.getParent()->getParent());
 
     if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
       const MachineOperand &OffsetWidthOp = Inst.getOperand(2);
       // If we need to move this to VGPRs, we need to unpack the second operand
       // back into the 2 separate ones for bit offset and width.
       assert(OffsetWidthOp.isImm() &&
              "Scalar BFE is only implemented for constant width and offset");
       uint32_t Imm = OffsetWidthOp.getImm();
 
       uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
       uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
       Inst.RemoveOperand(2);                     // Remove old immediate.
       Inst.addOperand(MachineOperand::CreateImm(Offset));
       Inst.addOperand(MachineOperand::CreateImm(BitWidth));
     }
 
     bool HasDst = Inst.getOperand(0).isReg() && Inst.getOperand(0).isDef();
     unsigned NewDstReg = AMDGPU::NoRegister;
     if (HasDst) {
       unsigned DstReg = Inst.getOperand(0).getReg();
       if (TargetRegisterInfo::isPhysicalRegister(DstReg))
         continue;
 
       // Update the destination register class.
       const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(Inst);
       if (!NewDstRC)
         continue;
 
       if (Inst.isCopy() &&
           TargetRegisterInfo::isVirtualRegister(Inst.getOperand(1).getReg()) &&
           NewDstRC == RI.getRegClassForReg(MRI, Inst.getOperand(1).getReg())) {
         // Instead of creating a copy where src and dst are the same register
         // class, we just replace all uses of dst with src.  These kinds of
         // copies interfere with the heuristics MachineSink uses to decide
         // whether or not to split a critical edge.  Since the pass assumes
         // that copies will end up as machine instructions and not be
         // eliminated.
         addUsersToMoveToVALUWorklist(DstReg, MRI, Worklist);
         MRI.replaceRegWith(DstReg, Inst.getOperand(1).getReg());
         MRI.clearKillFlags(Inst.getOperand(1).getReg());
         Inst.getOperand(0).setReg(DstReg);
         continue;
       }
 
       NewDstReg = MRI.createVirtualRegister(NewDstRC);
       MRI.replaceRegWith(DstReg, NewDstReg);
     }
 
     // Legalize the operands
     legalizeOperands(Inst);
 
     if (HasDst)
      addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
   }
 }
 
 // Add/sub require special handling to deal with carry outs.
 bool SIInstrInfo::moveScalarAddSub(SetVectorType &Worklist,
                                    MachineInstr &Inst) const {
   if (ST.hasAddNoCarry()) {
     // Assume there is no user of scc since we don't select this in that case.
     // Since scc isn't used, it doesn't really matter if the i32 or u32 variant
     // is used.
 
     MachineBasicBlock &MBB = *Inst.getParent();
     MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
 
     unsigned OldDstReg = Inst.getOperand(0).getReg();
     unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
     unsigned Opc = Inst.getOpcode();
     assert(Opc == AMDGPU::S_ADD_I32 || Opc == AMDGPU::S_SUB_I32);
 
     unsigned NewOpc = Opc == AMDGPU::S_ADD_I32 ?
       AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
 
     assert(Inst.getOperand(3).getReg() == AMDGPU::SCC);
     Inst.RemoveOperand(3);
 
     Inst.setDesc(get(NewOpc));
     Inst.addImplicitDefUseOperands(*MBB.getParent());
     MRI.replaceRegWith(OldDstReg, ResultReg);
     legalizeOperands(Inst);
 
     addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
     return true;
   }
 
   return false;
 }
 
 void SIInstrInfo::lowerScalarAbs(SetVectorType &Worklist,
                                  MachineInstr &Inst) const {
   MachineBasicBlock &MBB = *Inst.getParent();
   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
   MachineBasicBlock::iterator MII = Inst;
   DebugLoc DL = Inst.getDebugLoc();
 
   MachineOperand &Dest = Inst.getOperand(0);
   MachineOperand &Src = Inst.getOperand(1);
   unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
   unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
   unsigned SubOp = ST.hasAddNoCarry() ?
     AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_I32_e32;
 
   BuildMI(MBB, MII, DL, get(SubOp), TmpReg)
     .addImm(0)
     .addReg(Src.getReg());
 
   BuildMI(MBB, MII, DL, get(AMDGPU::V_MAX_I32_e64), ResultReg)
     .addReg(Src.getReg())
     .addReg(TmpReg);
 
   MRI.replaceRegWith(Dest.getReg(), ResultReg);
   addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
 }
 
 void SIInstrInfo::lowerScalarXnor(SetVectorType &Worklist,
                                   MachineInstr &Inst) const {
   MachineBasicBlock &MBB = *Inst.getParent();
   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
   MachineBasicBlock::iterator MII = Inst;
   const DebugLoc &DL = Inst.getDebugLoc();
 
   MachineOperand &Dest = Inst.getOperand(0);
   MachineOperand &Src0 = Inst.getOperand(1);
   MachineOperand &Src1 = Inst.getOperand(2);
 
   legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src0, MRI, DL);
   legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src1, MRI, DL);
 
   unsigned Xor = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
   BuildMI(MBB, MII, DL, get(AMDGPU::V_XOR_B32_e64), Xor)
     .add(Src0)
     .add(Src1);
 
   unsigned Not = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
   BuildMI(MBB, MII, DL, get(AMDGPU::V_NOT_B32_e64), Not)
     .addReg(Xor);
 
   MRI.replaceRegWith(Dest.getReg(), Not);
   addUsersToMoveToVALUWorklist(Not, MRI, Worklist);
 }
 
 void SIInstrInfo::splitScalar64BitUnaryOp(
     SetVectorType &Worklist, MachineInstr &Inst,
     unsigned Opcode) const {
   MachineBasicBlock &MBB = *Inst.getParent();
   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
 
   MachineOperand &Dest = Inst.getOperand(0);
   MachineOperand &Src0 = Inst.getOperand(1);
   DebugLoc DL = Inst.getDebugLoc();
 
   MachineBasicBlock::iterator MII = Inst;
 
   const MCInstrDesc &InstDesc = get(Opcode);
   const TargetRegisterClass *Src0RC = Src0.isReg() ?
     MRI.getRegClass(Src0.getReg()) :
     &AMDGPU::SGPR_32RegClass;
 
   const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
 
   MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
                                                        AMDGPU::sub0, Src0SubRC);
 
   const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
   const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
   const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
 
   unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
   BuildMI(MBB, MII, DL, InstDesc, DestSub0).add(SrcReg0Sub0);
 
   MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
                                                        AMDGPU::sub1, Src0SubRC);
 
   unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
   BuildMI(MBB, MII, DL, InstDesc, DestSub1).add(SrcReg0Sub1);
 
   unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
   BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
     .addReg(DestSub0)
     .addImm(AMDGPU::sub0)
     .addReg(DestSub1)
     .addImm(AMDGPU::sub1);
 
   MRI.replaceRegWith(Dest.getReg(), FullDestReg);
 
   // We don't need to legalizeOperands here because for a single operand, src0
   // will support any kind of input.
 
   // Move all users of this moved value.
   addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
 }
 
 void SIInstrInfo::splitScalar64BitAddSub(
   SetVectorType &Worklist, MachineInstr &Inst) const {
   bool IsAdd = (Inst.getOpcode() == AMDGPU::S_ADD_U64_PSEUDO);
 
   MachineBasicBlock &MBB = *Inst.getParent();
   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
 
   unsigned FullDestReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
   unsigned DestSub0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
   unsigned DestSub1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
   unsigned CarryReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
   unsigned DeadCarryReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
 
   MachineOperand &Dest = Inst.getOperand(0);
   MachineOperand &Src0 = Inst.getOperand(1);
   MachineOperand &Src1 = Inst.getOperand(2);
   const DebugLoc &DL = Inst.getDebugLoc();
   MachineBasicBlock::iterator MII = Inst;
 
   const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0.getReg());
   const TargetRegisterClass *Src1RC = MRI.getRegClass(Src1.getReg());
   const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
   const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0);
 
   MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
                                                        AMDGPU::sub0, Src0SubRC);
   MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
                                                        AMDGPU::sub0, Src1SubRC);
 
 
   MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
                                                        AMDGPU::sub1, Src0SubRC);
   MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
                                                        AMDGPU::sub1, Src1SubRC);
 
   unsigned LoOpc = IsAdd ? AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
   MachineInstr *LoHalf =
     BuildMI(MBB, MII, DL, get(LoOpc), DestSub0)
     .addReg(CarryReg, RegState::Define)
     .add(SrcReg0Sub0)
     .add(SrcReg1Sub0);
 
   unsigned HiOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
   MachineInstr *HiHalf =
     BuildMI(MBB, MII, DL, get(HiOpc), DestSub1)
     .addReg(DeadCarryReg, RegState::Define | RegState::Dead)
     .add(SrcReg0Sub1)
     .add(SrcReg1Sub1)
     .addReg(CarryReg, RegState::Kill);
 
   BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
     .addReg(DestSub0)
     .addImm(AMDGPU::sub0)
     .addReg(DestSub1)
     .addImm(AMDGPU::sub1);
 
   MRI.replaceRegWith(Dest.getReg(), FullDestReg);
 
   // Try to legalize the operands in case we need to swap the order to keep it
   // valid.
   legalizeOperands(*LoHalf);
   legalizeOperands(*HiHalf);
 
   // Move all users of this moved vlaue.
   addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
 }
 
 void SIInstrInfo::splitScalar64BitBinaryOp(
     SetVectorType &Worklist, MachineInstr &Inst,
     unsigned Opcode) const {
   MachineBasicBlock &MBB = *Inst.getParent();
   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
 
   MachineOperand &Dest = Inst.getOperand(0);
   MachineOperand &Src0 = Inst.getOperand(1);
   MachineOperand &Src1 = Inst.getOperand(2);
   DebugLoc DL = Inst.getDebugLoc();
 
   MachineBasicBlock::iterator MII = Inst;
 
   const MCInstrDesc &InstDesc = get(Opcode);
   const TargetRegisterClass *Src0RC = Src0.isReg() ?
     MRI.getRegClass(Src0.getReg()) :
     &AMDGPU::SGPR_32RegClass;
 
   const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
   const TargetRegisterClass *Src1RC = Src1.isReg() ?
     MRI.getRegClass(Src1.getReg()) :
     &AMDGPU::SGPR_32RegClass;
 
   const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0);
 
   MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
                                                        AMDGPU::sub0, Src0SubRC);
   MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
                                                        AMDGPU::sub0, Src1SubRC);
 
   const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
   const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
   const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
 
   unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
   MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0)
                               .add(SrcReg0Sub0)
                               .add(SrcReg1Sub0);
 
   MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
                                                        AMDGPU::sub1, Src0SubRC);
   MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
                                                        AMDGPU::sub1, Src1SubRC);
 
   unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
   MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1)
                               .add(SrcReg0Sub1)
                               .add(SrcReg1Sub1);
 
   unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
   BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
     .addReg(DestSub0)
     .addImm(AMDGPU::sub0)
     .addReg(DestSub1)
     .addImm(AMDGPU::sub1);
 
   MRI.replaceRegWith(Dest.getReg(), FullDestReg);
 
   // Try to legalize the operands in case we need to swap the order to keep it
   // valid.
   legalizeOperands(LoHalf);
   legalizeOperands(HiHalf);
 
   // Move all users of this moved vlaue.
   addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
 }
 
 void SIInstrInfo::splitScalar64BitBCNT(
     SetVectorType &Worklist, MachineInstr &Inst) const {
   MachineBasicBlock &MBB = *Inst.getParent();
   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
 
   MachineBasicBlock::iterator MII = Inst;
   DebugLoc DL = Inst.getDebugLoc();
 
   MachineOperand &Dest = Inst.getOperand(0);
   MachineOperand &Src = Inst.getOperand(1);
 
   const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e64);
   const TargetRegisterClass *SrcRC = Src.isReg() ?
     MRI.getRegClass(Src.getReg()) :
     &AMDGPU::SGPR_32RegClass;
 
   unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
   unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
   const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0);
 
   MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
                                                       AMDGPU::sub0, SrcSubRC);
   MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
                                                       AMDGPU::sub1, SrcSubRC);
 
   BuildMI(MBB, MII, DL, InstDesc, MidReg).add(SrcRegSub0).addImm(0);
 
   BuildMI(MBB, MII, DL, InstDesc, ResultReg).add(SrcRegSub1).addReg(MidReg);
 
   MRI.replaceRegWith(Dest.getReg(), ResultReg);
 
   // We don't need to legalize operands here. src0 for etiher instruction can be
   // an SGPR, and the second input is unused or determined here.
   addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
 }
 
 void SIInstrInfo::splitScalar64BitBFE(SetVectorType &Worklist,
                                       MachineInstr &Inst) const {
   MachineBasicBlock &MBB = *Inst.getParent();
   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
   MachineBasicBlock::iterator MII = Inst;
   DebugLoc DL = Inst.getDebugLoc();
 
   MachineOperand &Dest = Inst.getOperand(0);
   uint32_t Imm = Inst.getOperand(2).getImm();
   uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
   uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
 
   (void) Offset;
 
   // Only sext_inreg cases handled.
   assert(Inst.getOpcode() == AMDGPU::S_BFE_I64 && BitWidth <= 32 &&
          Offset == 0 && "Not implemented");
 
   if (BitWidth < 32) {
     unsigned MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
     unsigned MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
     unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
 
     BuildMI(MBB, MII, DL, get(AMDGPU::V_BFE_I32), MidRegLo)
         .addReg(Inst.getOperand(1).getReg(), 0, AMDGPU::sub0)
         .addImm(0)
         .addImm(BitWidth);
 
     BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e32), MidRegHi)
       .addImm(31)
       .addReg(MidRegLo);
 
     BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
       .addReg(MidRegLo)
       .addImm(AMDGPU::sub0)
       .addReg(MidRegHi)
       .addImm(AMDGPU::sub1);
 
     MRI.replaceRegWith(Dest.getReg(), ResultReg);
     addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
     return;
   }
 
   MachineOperand &Src = Inst.getOperand(1);
   unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
   unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
 
   BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e64), TmpReg)
     .addImm(31)
     .addReg(Src.getReg(), 0, AMDGPU::sub0);
 
   BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
     .addReg(Src.getReg(), 0, AMDGPU::sub0)
     .addImm(AMDGPU::sub0)
     .addReg(TmpReg)
     .addImm(AMDGPU::sub1);
 
   MRI.replaceRegWith(Dest.getReg(), ResultReg);
   addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
 }
 
 void SIInstrInfo::addUsersToMoveToVALUWorklist(
   unsigned DstReg,
   MachineRegisterInfo &MRI,
   SetVectorType &Worklist) const {
   for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg),
          E = MRI.use_end(); I != E;) {
     MachineInstr &UseMI = *I->getParent();
     if (!canReadVGPR(UseMI, I.getOperandNo())) {
       Worklist.insert(&UseMI);
 
       do {
         ++I;
       } while (I != E && I->getParent() == &UseMI);
     } else {
       ++I;
     }
   }
 }
 
 void SIInstrInfo::movePackToVALU(SetVectorType &Worklist,
                                  MachineRegisterInfo &MRI,
                                  MachineInstr &Inst) const {
   unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
   MachineBasicBlock *MBB = Inst.getParent();
   MachineOperand &Src0 = Inst.getOperand(1);
   MachineOperand &Src1 = Inst.getOperand(2);
   const DebugLoc &DL = Inst.getDebugLoc();
 
   switch (Inst.getOpcode()) {
   case AMDGPU::S_PACK_LL_B32_B16: {
     unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
     unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
     // FIXME: Can do a lot better if we know the high bits of src0 or src1 are
     // 0.
     BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg)
       .addImm(0xffff);
 
     BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_B32_e64), TmpReg)
       .addReg(ImmReg, RegState::Kill)
       .add(Src0);
 
     BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHL_OR_B32), ResultReg)
       .add(Src1)
       .addImm(16)
       .addReg(TmpReg, RegState::Kill);
     break;
   }
   case AMDGPU::S_PACK_LH_B32_B16: {
     unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
     BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg)
       .addImm(0xffff);
     BuildMI(*MBB, Inst, DL, get(AMDGPU::V_BFI_B32), ResultReg)
       .addReg(ImmReg, RegState::Kill)
       .add(Src0)
       .add(Src1);
     break;
   }
   case AMDGPU::S_PACK_HH_B32_B16: {
     unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
     unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
     BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHRREV_B32_e64), TmpReg)
       .addImm(16)
       .add(Src0);
     BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg)
       .addImm(0xffff0000);
     BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_OR_B32), ResultReg)
       .add(Src1)
       .addReg(ImmReg, RegState::Kill)
       .addReg(TmpReg, RegState::Kill);
     break;
   }
   default:
     llvm_unreachable("unhandled s_pack_* instruction");
   }
 
   MachineOperand &Dest = Inst.getOperand(0);
   MRI.replaceRegWith(Dest.getReg(), ResultReg);
   addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
 }
 
 void SIInstrInfo::addSCCDefUsersToVALUWorklist(
     MachineInstr &SCCDefInst, SetVectorType &Worklist) const {
   // This assumes that all the users of SCC are in the same block
   // as the SCC def.
   for (MachineInstr &MI :
        make_range(MachineBasicBlock::iterator(SCCDefInst),
                       SCCDefInst.getParent()->end())) {
     // Exit if we find another SCC def.
     if (MI.findRegisterDefOperandIdx(AMDGPU::SCC) != -1)
       return;
 
     if (MI.findRegisterUseOperandIdx(AMDGPU::SCC) != -1)
       Worklist.insert(&MI);
   }
 }
 
 const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass(
   const MachineInstr &Inst) const {
   const TargetRegisterClass *NewDstRC = getOpRegClass(Inst, 0);
 
   switch (Inst.getOpcode()) {
   // For target instructions, getOpRegClass just returns the virtual register
   // class associated with the operand, so we need to find an equivalent VGPR
   // register class in order to move the instruction to the VALU.
   case AMDGPU::COPY:
   case AMDGPU::PHI:
   case AMDGPU::REG_SEQUENCE:
   case AMDGPU::INSERT_SUBREG:
   case AMDGPU::WQM:
   case AMDGPU::WWM:
     if (RI.hasVGPRs(NewDstRC))
       return nullptr;
 
     NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
     if (!NewDstRC)
       return nullptr;
     return NewDstRC;
   default:
     return NewDstRC;
   }
 }
 
 // Find the one SGPR operand we are allowed to use.
 unsigned SIInstrInfo::findUsedSGPR(const MachineInstr &MI,
                                    int OpIndices[3]) const {
   const MCInstrDesc &Desc = MI.getDesc();
 
   // Find the one SGPR operand we are allowed to use.
   //
   // First we need to consider the instruction's operand requirements before
   // legalizing. Some operands are required to be SGPRs, such as implicit uses
   // of VCC, but we are still bound by the constant bus requirement to only use
   // one.
   //
   // If the operand's class is an SGPR, we can never move it.
 
   unsigned SGPRReg = findImplicitSGPRRead(MI);
   if (SGPRReg != AMDGPU::NoRegister)
     return SGPRReg;
 
   unsigned UsedSGPRs[3] = { AMDGPU::NoRegister };
   const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
 
   for (unsigned i = 0; i < 3; ++i) {
     int Idx = OpIndices[i];
     if (Idx == -1)
       break;
 
     const MachineOperand &MO = MI.getOperand(Idx);
     if (!MO.isReg())
       continue;
 
     // Is this operand statically required to be an SGPR based on the operand
     // constraints?
     const TargetRegisterClass *OpRC = RI.getRegClass(Desc.OpInfo[Idx].RegClass);
     bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
     if (IsRequiredSGPR)
       return MO.getReg();
 
     // If this could be a VGPR or an SGPR, Check the dynamic register class.
     unsigned Reg = MO.getReg();
     const TargetRegisterClass *RegRC = MRI.getRegClass(Reg);
     if (RI.isSGPRClass(RegRC))
       UsedSGPRs[i] = Reg;
   }
 
   // We don't have a required SGPR operand, so we have a bit more freedom in
   // selecting operands to move.
 
   // Try to select the most used SGPR. If an SGPR is equal to one of the
   // others, we choose that.
   //
   // e.g.
   // V_FMA_F32 v0, s0, s0, s0 -> No moves
   // V_FMA_F32 v0, s0, s1, s0 -> Move s1
 
   // TODO: If some of the operands are 64-bit SGPRs and some 32, we should
   // prefer those.
 
   if (UsedSGPRs[0] != AMDGPU::NoRegister) {
     if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
       SGPRReg = UsedSGPRs[0];
   }
 
   if (SGPRReg == AMDGPU::NoRegister && UsedSGPRs[1] != AMDGPU::NoRegister) {
     if (UsedSGPRs[1] == UsedSGPRs[2])
       SGPRReg = UsedSGPRs[1];
   }
 
   return SGPRReg;
 }
 
 MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI,
                                              unsigned OperandName) const {
   int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName);
   if (Idx == -1)
     return nullptr;
 
   return &MI.getOperand(Idx);
 }
 
 uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const {
   uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT;
   if (ST.isAmdHsaOS()) {
     // Set ATC = 1. GFX9 doesn't have this bit.
     if (ST.getGeneration() <= SISubtarget::VOLCANIC_ISLANDS)
       RsrcDataFormat |= (1ULL << 56);
 
     // Set MTYPE = 2 (MTYPE_UC = uncached). GFX9 doesn't have this.
     // BTW, it disables TC L2 and therefore decreases performance.
     if (ST.getGeneration() == SISubtarget::VOLCANIC_ISLANDS)
       RsrcDataFormat |= (2ULL << 59);
   }
 
   return RsrcDataFormat;
 }
 
 uint64_t SIInstrInfo::getScratchRsrcWords23() const {
   uint64_t Rsrc23 = getDefaultRsrcDataFormat() |
                     AMDGPU::RSRC_TID_ENABLE |
                     0xffffffff; // Size;
 
   // GFX9 doesn't have ELEMENT_SIZE.
   if (ST.getGeneration() <= SISubtarget::VOLCANIC_ISLANDS) {
     uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize()) - 1;
     Rsrc23 |= EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT;
   }
 
   // IndexStride = 64.
   Rsrc23 |= UINT64_C(3) << AMDGPU::RSRC_INDEX_STRIDE_SHIFT;
 
   // If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17].
   // Clear them unless we want a huge stride.
   if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS)
     Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT;
 
   return Rsrc23;
 }
 
 bool SIInstrInfo::isLowLatencyInstruction(const MachineInstr &MI) const {
   unsigned Opc = MI.getOpcode();
 
   return isSMRD(Opc);
 }
 
 bool SIInstrInfo::isHighLatencyInstruction(const MachineInstr &MI) const {
   unsigned Opc = MI.getOpcode();
 
   return isMUBUF(Opc) || isMTBUF(Opc) || isMIMG(Opc);
 }
 
 unsigned SIInstrInfo::isStackAccess(const MachineInstr &MI,
                                     int &FrameIndex) const {
   const MachineOperand *Addr = getNamedOperand(MI, AMDGPU::OpName::vaddr);
   if (!Addr || !Addr->isFI())
     return AMDGPU::NoRegister;
 
   assert(!MI.memoperands_empty() &&
          (*MI.memoperands_begin())->getAddrSpace() == AMDGPUASI.PRIVATE_ADDRESS);
 
   FrameIndex = Addr->getIndex();
   return getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
 }
 
 unsigned SIInstrInfo::isSGPRStackAccess(const MachineInstr &MI,
                                         int &FrameIndex) const {
   const MachineOperand *Addr = getNamedOperand(MI, AMDGPU::OpName::addr);
   assert(Addr && Addr->isFI());
   FrameIndex = Addr->getIndex();
   return getNamedOperand(MI, AMDGPU::OpName::data)->getReg();
 }
 
 unsigned SIInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
                                           int &FrameIndex) const {
   if (!MI.mayLoad())
     return AMDGPU::NoRegister;
 
   if (isMUBUF(MI) || isVGPRSpill(MI))
     return isStackAccess(MI, FrameIndex);
 
   if (isSGPRSpill(MI))
     return isSGPRStackAccess(MI, FrameIndex);
 
   return AMDGPU::NoRegister;
 }
 
 unsigned SIInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
                                          int &FrameIndex) const {
   if (!MI.mayStore())
     return AMDGPU::NoRegister;
 
   if (isMUBUF(MI) || isVGPRSpill(MI))
     return isStackAccess(MI, FrameIndex);
 
   if (isSGPRSpill(MI))
     return isSGPRStackAccess(MI, FrameIndex);
 
   return AMDGPU::NoRegister;
 }
 
 unsigned SIInstrInfo::getInstBundleSize(const MachineInstr &MI) const {
   unsigned Size = 0;
   MachineBasicBlock::const_instr_iterator I = MI.getIterator();
   MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
   while (++I != E && I->isInsideBundle()) {
     assert(!I->isBundle() && "No nested bundle!");
     Size += getInstSizeInBytes(*I);
   }
 
   return Size;
 }
 
 unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
   unsigned Opc = MI.getOpcode();
   const MCInstrDesc &Desc = getMCOpcodeFromPseudo(Opc);
   unsigned DescSize = Desc.getSize();
 
   // If we have a definitive size, we can use it. Otherwise we need to inspect
   // the operands to know the size.
   //
   // FIXME: Instructions that have a base 32-bit encoding report their size as
   // 4, even though they are really 8 bytes if they have a literal operand.
   if (DescSize != 0 && DescSize != 4)
     return DescSize;
 
   // 4-byte instructions may have a 32-bit literal encoded after them. Check
   // operands that coud ever be literals.
   if (isVALU(MI) || isSALU(MI)) {
     if (isFixedSize(MI))
       return DescSize;
 
     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
     if (Src0Idx == -1)
       return 4; // No operands.
 
     if (isLiteralConstantLike(MI.getOperand(Src0Idx), Desc.OpInfo[Src0Idx]))
       return 8;
 
     int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
     if (Src1Idx == -1)
       return 4;
 
     if (isLiteralConstantLike(MI.getOperand(Src1Idx), Desc.OpInfo[Src1Idx]))
       return 8;
 
     return 4;
   }
 
   if (DescSize == 4)
     return 4;
 
   switch (Opc) {
   case TargetOpcode::IMPLICIT_DEF:
   case TargetOpcode::KILL:
   case TargetOpcode::DBG_VALUE:
   case TargetOpcode::EH_LABEL:
     return 0;
   case TargetOpcode::BUNDLE:
     return getInstBundleSize(MI);
   case TargetOpcode::INLINEASM: {
     const MachineFunction *MF = MI.getParent()->getParent();
     const char *AsmStr = MI.getOperand(0).getSymbolName();
     return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
   }
   default:
     llvm_unreachable("unable to find instruction size");
   }
 }
 
 bool SIInstrInfo::mayAccessFlatAddressSpace(const MachineInstr &MI) const {
   if (!isFLAT(MI))
     return false;
 
   if (MI.memoperands_empty())
     return true;
 
   for (const MachineMemOperand *MMO : MI.memoperands()) {
     if (MMO->getAddrSpace() == AMDGPUASI.FLAT_ADDRESS)
       return true;
   }
   return false;
 }
 
 bool SIInstrInfo::isNonUniformBranchInstr(MachineInstr &Branch) const {
   return Branch.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO;
 }
 
 void SIInstrInfo::convertNonUniformIfRegion(MachineBasicBlock *IfEntry,
                                             MachineBasicBlock *IfEnd) const {
   MachineBasicBlock::iterator TI = IfEntry->getFirstTerminator();
   assert(TI != IfEntry->end());
 
   MachineInstr *Branch = &(*TI);
   MachineFunction *MF = IfEntry->getParent();
   MachineRegisterInfo &MRI = IfEntry->getParent()->getRegInfo();
 
   if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
     unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
     MachineInstr *SIIF =
         BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_IF), DstReg)
             .add(Branch->getOperand(0))
             .add(Branch->getOperand(1));
     MachineInstr *SIEND =
         BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_END_CF))
             .addReg(DstReg);
 
     IfEntry->erase(TI);
     IfEntry->insert(IfEntry->end(), SIIF);
     IfEnd->insert(IfEnd->getFirstNonPHI(), SIEND);
   }
 }
 
 void SIInstrInfo::convertNonUniformLoopRegion(
     MachineBasicBlock *LoopEntry, MachineBasicBlock *LoopEnd) const {
   MachineBasicBlock::iterator TI = LoopEnd->getFirstTerminator();
   // We expect 2 terminators, one conditional and one unconditional.
   assert(TI != LoopEnd->end());
 
   MachineInstr *Branch = &(*TI);
   MachineFunction *MF = LoopEnd->getParent();
   MachineRegisterInfo &MRI = LoopEnd->getParent()->getRegInfo();
 
   if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
 
     unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
     unsigned BackEdgeReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
     MachineInstrBuilder HeaderPHIBuilder =
         BuildMI(*(MF), Branch->getDebugLoc(), get(TargetOpcode::PHI), DstReg);
     for (MachineBasicBlock::pred_iterator PI = LoopEntry->pred_begin(),
                                           E = LoopEntry->pred_end();
          PI != E; ++PI) {
       if (*PI == LoopEnd) {
         HeaderPHIBuilder.addReg(BackEdgeReg);
       } else {
         MachineBasicBlock *PMBB = *PI;
         unsigned ZeroReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
         materializeImmediate(*PMBB, PMBB->getFirstTerminator(), DebugLoc(),
                              ZeroReg, 0);
         HeaderPHIBuilder.addReg(ZeroReg);
       }
       HeaderPHIBuilder.addMBB(*PI);
     }
     MachineInstr *HeaderPhi = HeaderPHIBuilder;
     MachineInstr *SIIFBREAK = BuildMI(*(MF), Branch->getDebugLoc(),
                                       get(AMDGPU::SI_IF_BREAK), BackEdgeReg)
                                   .addReg(DstReg)
                                   .add(Branch->getOperand(0));
     MachineInstr *SILOOP =
         BuildMI(*(MF), Branch->getDebugLoc(), get(AMDGPU::SI_LOOP))
             .addReg(BackEdgeReg)
             .addMBB(LoopEntry);
 
     LoopEntry->insert(LoopEntry->begin(), HeaderPhi);
     LoopEnd->erase(TI);
     LoopEnd->insert(LoopEnd->end(), SIIFBREAK);
     LoopEnd->insert(LoopEnd->end(), SILOOP);
   }
 }
 
 ArrayRef<std::pair<int, const char *>>
 SIInstrInfo::getSerializableTargetIndices() const {
   static const std::pair<int, const char *> TargetIndices[] = {
       {AMDGPU::TI_CONSTDATA_START, "amdgpu-constdata-start"},
       {AMDGPU::TI_SCRATCH_RSRC_DWORD0, "amdgpu-scratch-rsrc-dword0"},
       {AMDGPU::TI_SCRATCH_RSRC_DWORD1, "amdgpu-scratch-rsrc-dword1"},
       {AMDGPU::TI_SCRATCH_RSRC_DWORD2, "amdgpu-scratch-rsrc-dword2"},
       {AMDGPU::TI_SCRATCH_RSRC_DWORD3, "amdgpu-scratch-rsrc-dword3"}};
   return makeArrayRef(TargetIndices);
 }
 
 /// This is used by the post-RA scheduler (SchedulePostRAList.cpp).  The
 /// post-RA version of misched uses CreateTargetMIHazardRecognizer.
 ScheduleHazardRecognizer *
 SIInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
                                             const ScheduleDAG *DAG) const {
   return new GCNHazardRecognizer(DAG->MF);
 }
 
 /// This is the hazard recognizer used at -O0 by the PostRAHazardRecognizer
 /// pass.
 ScheduleHazardRecognizer *
 SIInstrInfo::CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const {
   return new GCNHazardRecognizer(MF);
 }
 
 std::pair<unsigned, unsigned>
 SIInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
   return std::make_pair(TF & MO_MASK, TF & ~MO_MASK);
 }
 
 ArrayRef<std::pair<unsigned, const char *>>
 SIInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
   static const std::pair<unsigned, const char *> TargetFlags[] = {
     { MO_GOTPCREL, "amdgpu-gotprel" },
     { MO_GOTPCREL32_LO, "amdgpu-gotprel32-lo" },
     { MO_GOTPCREL32_HI, "amdgpu-gotprel32-hi" },
     { MO_REL32_LO, "amdgpu-rel32-lo" },
     { MO_REL32_HI, "amdgpu-rel32-hi" }
   };
 
   return makeArrayRef(TargetFlags);
 }
 
 bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI) const {
   return !MI.isTerminator() && MI.getOpcode() != AMDGPU::COPY &&
          MI.modifiesRegister(AMDGPU::EXEC, &RI);
 }
 
 MachineInstrBuilder
 SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator I,
                            const DebugLoc &DL,
                            unsigned DestReg) const {
   if (ST.hasAddNoCarry())
     return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_U32_e64), DestReg);
 
   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
   unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
   MRI.setRegAllocationHint(UnusedCarry, 0, AMDGPU::VCC);
 
   return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_I32_e64), DestReg)
            .addReg(UnusedCarry, RegState::Define | RegState::Dead);
 }
 
 bool SIInstrInfo::isKillTerminator(unsigned Opcode) {
   switch (Opcode) {
   case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
   case AMDGPU::SI_KILL_I1_TERMINATOR:
     return true;
   default:
     return false;
   }
 }
 
 const MCInstrDesc &SIInstrInfo::getKillTerminatorFromPseudo(unsigned Opcode) const {
   switch (Opcode) {
   case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
     return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
   case AMDGPU::SI_KILL_I1_PSEUDO:
     return get(AMDGPU::SI_KILL_I1_TERMINATOR);
   default:
     llvm_unreachable("invalid opcode, expected SI_KILL_*_PSEUDO");
   }
 }
Index: head/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
===================================================================
--- head/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp	(revision 329982)
+++ head/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp	(revision 329983)
@@ -1,757 +1,760 @@
 //===-- PPCCTRLoops.cpp - Identify and generate CTR loops -----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // This pass identifies loops where we can generate the PPC branch instructions
 // that decrement and test the count register (CTR) (bdnz and friends).
 //
 // The pattern that defines the induction variable can changed depending on
 // prior optimizations.  For example, the IndVarSimplify phase run by 'opt'
 // normalizes induction variables, and the Loop Strength Reduction pass
 // run by 'llc' may also make changes to the induction variable.
 //
 // Criteria for CTR loops:
 //  - Countable loops (w/ ind. var for a trip count)
 //  - Try inner-most loops first
 //  - No nested CTR loops.
 //  - No function calls in loops.
 //
 //===----------------------------------------------------------------------===//
 
 #include "PPC.h"
 #include "PPCSubtarget.h"
 #include "PPCTargetMachine.h"
 #include "PPCTargetTransformInfo.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/CodeMetrics.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/CodeGen/TargetSchedule.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/PassSupport.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
 
 #ifndef NDEBUG
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #endif
 
 using namespace llvm;
 
 #define DEBUG_TYPE "ctrloops"
 
 #ifndef NDEBUG
 static cl::opt<int> CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1));
 #endif
 
 // The latency of mtctr is only justified if there are more than 4
 // comparisons that will be removed as a result.
 static cl::opt<unsigned>
 SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden,
                       cl::desc("Loops with a constant trip count smaller than "
                                "this value will not use the count register."));
 
 STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops");
 
 namespace llvm {
   void initializePPCCTRLoopsPass(PassRegistry&);
 #ifndef NDEBUG
   void initializePPCCTRLoopsVerifyPass(PassRegistry&);
 #endif
 }
 
 namespace {
   struct PPCCTRLoops : public FunctionPass {
 
 #ifndef NDEBUG
     static int Counter;
 #endif
 
   public:
     static char ID;
 
     PPCCTRLoops() : FunctionPass(ID) {
       initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
     }
 
     bool runOnFunction(Function &F) override;
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequired<LoopInfoWrapperPass>();
       AU.addPreserved<LoopInfoWrapperPass>();
       AU.addRequired<DominatorTreeWrapperPass>();
       AU.addPreserved<DominatorTreeWrapperPass>();
       AU.addRequired<ScalarEvolutionWrapperPass>();
       AU.addRequired<AssumptionCacheTracker>();
       AU.addRequired<TargetTransformInfoWrapperPass>();
     }
 
   private:
     bool mightUseCTR(BasicBlock *BB);
     bool convertToCTRLoop(Loop *L);
 
   private:
     const PPCTargetMachine *TM;
     const PPCSubtarget *STI;
     const PPCTargetLowering *TLI;
     const DataLayout *DL;
     const TargetLibraryInfo *LibInfo;
     const TargetTransformInfo *TTI;
     LoopInfo *LI;
     ScalarEvolution *SE;
     DominatorTree *DT;
     bool PreserveLCSSA;
     TargetSchedModel SchedModel;
   };
 
   char PPCCTRLoops::ID = 0;
 #ifndef NDEBUG
   int PPCCTRLoops::Counter = 0;
 #endif
 
 #ifndef NDEBUG
   struct PPCCTRLoopsVerify : public MachineFunctionPass {
   public:
     static char ID;
 
     PPCCTRLoopsVerify() : MachineFunctionPass(ID) {
       initializePPCCTRLoopsVerifyPass(*PassRegistry::getPassRegistry());
     }
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequired<MachineDominatorTree>();
       MachineFunctionPass::getAnalysisUsage(AU);
     }
 
     bool runOnMachineFunction(MachineFunction &MF) override;
 
   private:
     MachineDominatorTree *MDT;
   };
 
   char PPCCTRLoopsVerify::ID = 0;
 #endif // NDEBUG
 } // end anonymous namespace
 
 INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
                       false, false)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
 INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
                     false, false)
 
 FunctionPass *llvm::createPPCCTRLoops() { return new PPCCTRLoops(); }
 
 #ifndef NDEBUG
 INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
                       "PowerPC CTR Loops Verify", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
 INITIALIZE_PASS_END(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
                     "PowerPC CTR Loops Verify", false, false)
 
 FunctionPass *llvm::createPPCCTRLoopsVerify() {
   return new PPCCTRLoopsVerify();
 }
 #endif // NDEBUG
 
 bool PPCCTRLoops::runOnFunction(Function &F) {
   if (skipFunction(F))
     return false;
 
   auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
   if (!TPC)
     return false;
 
   TM = &TPC->getTM<PPCTargetMachine>();
   STI = TM->getSubtargetImpl(F);
   TLI = STI->getTargetLowering();
 
   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
   DL = &F.getParent()->getDataLayout();
   auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
   LibInfo = TLIP ? &TLIP->getTLI() : nullptr;
   PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
 
   bool MadeChange = false;
 
   for (LoopInfo::iterator I = LI->begin(), E = LI->end();
        I != E; ++I) {
     Loop *L = *I;
     if (!L->getParentLoop())
       MadeChange |= convertToCTRLoop(L);
   }
 
   return MadeChange;
 }
 
 static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) {
   if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
     return ITy->getBitWidth() > (Is32Bit ? 32U : 64U);
 
   return false;
 }
 
 // Determining the address of a TLS variable results in a function call in
 // certain TLS models.
 static bool memAddrUsesCTR(const PPCTargetMachine &TM, const Value *MemAddr) {
   const auto *GV = dyn_cast<GlobalValue>(MemAddr);
   if (!GV) {
     // Recurse to check for constants that refer to TLS global variables.
     if (const auto *CV = dyn_cast<Constant>(MemAddr))
       for (const auto &CO : CV->operands())
         if (memAddrUsesCTR(TM, CO))
           return true;
 
     return false;
   }
 
   if (!GV->isThreadLocal())
     return false;
   TLSModel::Model Model = TM.getTLSModel(GV);
   return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic;
 }
 
 // Loop through the inline asm constraints and look for something that clobbers
 // ctr.
 static bool asmClobbersCTR(InlineAsm *IA) {
   InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints();
   for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) {
     InlineAsm::ConstraintInfo &C = CIV[i];
     if (C.Type != InlineAsm::isInput)
       for (unsigned j = 0, je = C.Codes.size(); j < je; ++j)
         if (StringRef(C.Codes[j]).equals_lower("{ctr}"))
           return true;
   }
   return false;
 }
 
 bool PPCCTRLoops::mightUseCTR(BasicBlock *BB) {
   for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
        J != JE; ++J) {
     if (CallInst *CI = dyn_cast<CallInst>(J)) {
       // Inline ASM is okay, unless it clobbers the ctr register.
       if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) {
         if (asmClobbersCTR(IA))
           return true;
         continue;
       }
 
       if (Function *F = CI->getCalledFunction()) {
         // Most intrinsics don't become function calls, but some might.
         // sin, cos, exp and log are always calls.
         unsigned Opcode = 0;
         if (F->getIntrinsicID() != Intrinsic::not_intrinsic) {
           switch (F->getIntrinsicID()) {
           default: continue;
           // If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr
           // we're definitely using CTR.
           case Intrinsic::ppc_is_decremented_ctr_nonzero:
           case Intrinsic::ppc_mtctr:
             return true;
 
 // VisualStudio defines setjmp as _setjmp
 #if defined(_MSC_VER) && defined(setjmp) && \
                        !defined(setjmp_undefined_for_msvc)
 #  pragma push_macro("setjmp")
 #  undef setjmp
 #  define setjmp_undefined_for_msvc
 #endif
 
           case Intrinsic::setjmp:
 
 #if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
  // let's return it to _setjmp state
 #  pragma pop_macro("setjmp")
 #  undef setjmp_undefined_for_msvc
 #endif
 
           case Intrinsic::longjmp:
 
           // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp
           // because, although it does clobber the counter register, the
           // control can't then return to inside the loop unless there is also
           // an eh_sjlj_setjmp.
           case Intrinsic::eh_sjlj_setjmp:
 
           case Intrinsic::memcpy:
           case Intrinsic::memmove:
           case Intrinsic::memset:
           case Intrinsic::powi:
           case Intrinsic::log:
           case Intrinsic::log2:
           case Intrinsic::log10:
           case Intrinsic::exp:
           case Intrinsic::exp2:
           case Intrinsic::pow:
           case Intrinsic::sin:
           case Intrinsic::cos:
             return true;
           case Intrinsic::copysign:
             if (CI->getArgOperand(0)->getType()->getScalarType()->
                 isPPC_FP128Ty())
               return true;
             else
               continue; // ISD::FCOPYSIGN is never a library call.
           case Intrinsic::sqrt:               Opcode = ISD::FSQRT;      break;
           case Intrinsic::floor:              Opcode = ISD::FFLOOR;     break;
           case Intrinsic::ceil:               Opcode = ISD::FCEIL;      break;
           case Intrinsic::trunc:              Opcode = ISD::FTRUNC;     break;
           case Intrinsic::rint:               Opcode = ISD::FRINT;      break;
           case Intrinsic::nearbyint:          Opcode = ISD::FNEARBYINT; break;
           case Intrinsic::round:              Opcode = ISD::FROUND;     break;
           case Intrinsic::minnum:             Opcode = ISD::FMINNUM;    break;
           case Intrinsic::maxnum:             Opcode = ISD::FMAXNUM;    break;
           case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO;      break;
           case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO;      break;
           }
         }
 
         // PowerPC does not use [US]DIVREM or other library calls for
         // operations on regular types which are not otherwise library calls
         // (i.e. soft float or atomics). If adapting for targets that do,
         // additional care is required here.
 
         LibFunc Func;
         if (!F->hasLocalLinkage() && F->hasName() && LibInfo &&
             LibInfo->getLibFunc(F->getName(), Func) &&
             LibInfo->hasOptimizedCodeGen(Func)) {
           // Non-read-only functions are never treated as intrinsics.
           if (!CI->onlyReadsMemory())
             return true;
 
           // Conversion happens only for FP calls.
           if (!CI->getArgOperand(0)->getType()->isFloatingPointTy())
             return true;
 
           switch (Func) {
           default: return true;
           case LibFunc_copysign:
           case LibFunc_copysignf:
             continue; // ISD::FCOPYSIGN is never a library call.
           case LibFunc_copysignl:
             return true;
           case LibFunc_fabs:
           case LibFunc_fabsf:
           case LibFunc_fabsl:
             continue; // ISD::FABS is never a library call.
           case LibFunc_sqrt:
           case LibFunc_sqrtf:
           case LibFunc_sqrtl:
             Opcode = ISD::FSQRT; break;
           case LibFunc_floor:
           case LibFunc_floorf:
           case LibFunc_floorl:
             Opcode = ISD::FFLOOR; break;
           case LibFunc_nearbyint:
           case LibFunc_nearbyintf:
           case LibFunc_nearbyintl:
             Opcode = ISD::FNEARBYINT; break;
           case LibFunc_ceil:
           case LibFunc_ceilf:
           case LibFunc_ceill:
             Opcode = ISD::FCEIL; break;
           case LibFunc_rint:
           case LibFunc_rintf:
           case LibFunc_rintl:
             Opcode = ISD::FRINT; break;
           case LibFunc_round:
           case LibFunc_roundf:
           case LibFunc_roundl:
             Opcode = ISD::FROUND; break;
           case LibFunc_trunc:
           case LibFunc_truncf:
           case LibFunc_truncl:
             Opcode = ISD::FTRUNC; break;
           case LibFunc_fmin:
           case LibFunc_fminf:
           case LibFunc_fminl:
             Opcode = ISD::FMINNUM; break;
           case LibFunc_fmax:
           case LibFunc_fmaxf:
           case LibFunc_fmaxl:
             Opcode = ISD::FMAXNUM; break;
           }
         }
 
         if (Opcode) {
           MVT VTy = TLI->getSimpleValueType(
               *DL, CI->getArgOperand(0)->getType(), true);
           if (VTy == MVT::Other)
             return true;
 
           if (TLI->isOperationLegalOrCustom(Opcode, VTy))
             continue;
           else if (VTy.isVector() &&
                    TLI->isOperationLegalOrCustom(Opcode, VTy.getScalarType()))
             continue;
 
           return true;
         }
       }
 
       return true;
     } else if (isa<BinaryOperator>(J) &&
                J->getType()->getScalarType()->isPPC_FP128Ty()) {
       // Most operations on ppc_f128 values become calls.
       return true;
     } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) ||
                isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) {
       CastInst *CI = cast<CastInst>(J);
       if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
           CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
           isLargeIntegerTy(!TM->isPPC64(), CI->getSrcTy()->getScalarType()) ||
           isLargeIntegerTy(!TM->isPPC64(), CI->getDestTy()->getScalarType()))
         return true;
     } else if (isLargeIntegerTy(!TM->isPPC64(),
                                 J->getType()->getScalarType()) &&
                (J->getOpcode() == Instruction::UDiv ||
                 J->getOpcode() == Instruction::SDiv ||
                 J->getOpcode() == Instruction::URem ||
                 J->getOpcode() == Instruction::SRem)) {
       return true;
     } else if (!TM->isPPC64() &&
                isLargeIntegerTy(false, J->getType()->getScalarType()) &&
                (J->getOpcode() == Instruction::Shl ||
                 J->getOpcode() == Instruction::AShr ||
                 J->getOpcode() == Instruction::LShr)) {
       // Only on PPC32, for 128-bit integers (specifically not 64-bit
       // integers), these might be runtime calls.
       return true;
     } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) {
       // On PowerPC, indirect jumps use the counter register.
       return true;
     } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
       if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries())
         return true;
     }
 
+    // FREM is always a call.
+    if (J->getOpcode() == Instruction::FRem)
+      return true;
+
     if (STI->useSoftFloat()) {
       switch(J->getOpcode()) {
       case Instruction::FAdd:
       case Instruction::FSub:
       case Instruction::FMul:
       case Instruction::FDiv:
-      case Instruction::FRem:
       case Instruction::FPTrunc:
       case Instruction::FPExt:
       case Instruction::FPToUI:
       case Instruction::FPToSI:
       case Instruction::UIToFP:
       case Instruction::SIToFP:
       case Instruction::FCmp:
         return true;
       }
     }
 
     for (Value *Operand : J->operands())
       if (memAddrUsesCTR(*TM, Operand))
         return true;
   }
 
   return false;
 }
 bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
   bool MadeChange = false;
 
   // Do not convert small short loops to CTR loop.
   unsigned ConstTripCount = SE->getSmallConstantTripCount(L);
   if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) {
     SmallPtrSet<const Value *, 32> EphValues;
     auto AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
         *L->getHeader()->getParent());
     CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
     CodeMetrics Metrics;
     for (BasicBlock *BB : L->blocks())
       Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
     // 6 is an approximate latency for the mtctr instruction.
     if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth()))
       return false;
   }
 
   // Process nested loops first.
   for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
     MadeChange |= convertToCTRLoop(*I);
     DEBUG(dbgs() << "Nested loop converted\n");
   }
 
   // If a nested loop has been converted, then we can't convert this loop.
   if (MadeChange)
     return MadeChange;
 
 #ifndef NDEBUG
   // Stop trying after reaching the limit (if any).
   int Limit = CTRLoopLimit;
   if (Limit >= 0) {
     if (Counter >= CTRLoopLimit)
       return false;
     Counter++;
   }
 #endif
 
   // We don't want to spill/restore the counter register, and so we don't
   // want to use the counter register if the loop contains calls.
   for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
        I != IE; ++I)
     if (mightUseCTR(*I))
       return MadeChange;
 
   SmallVector<BasicBlock*, 4> ExitingBlocks;
   L->getExitingBlocks(ExitingBlocks);
 
   BasicBlock *CountedExitBlock = nullptr;
   const SCEV *ExitCount = nullptr;
   BranchInst *CountedExitBranch = nullptr;
   for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
        IE = ExitingBlocks.end(); I != IE; ++I) {
     const SCEV *EC = SE->getExitCount(L, *I);
     DEBUG(dbgs() << "Exit Count for " << *L << " from block " <<
                     (*I)->getName() << ": " << *EC << "\n");
     if (isa<SCEVCouldNotCompute>(EC))
       continue;
     if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) {
       if (ConstEC->getValue()->isZero())
         continue;
     } else if (!SE->isLoopInvariant(EC, L))
       continue;
 
     if (SE->getTypeSizeInBits(EC->getType()) > (TM->isPPC64() ? 64 : 32))
       continue;
 
     // We now have a loop-invariant count of loop iterations (which is not the
     // constant zero) for which we know that this loop will not exit via this
     // exisiting block.
 
     // We need to make sure that this block will run on every loop iteration.
     // For this to be true, we must dominate all blocks with backedges. Such
     // blocks are in-loop predecessors to the header block.
     bool NotAlways = false;
     for (pred_iterator PI = pred_begin(L->getHeader()),
          PIE = pred_end(L->getHeader()); PI != PIE; ++PI) {
       if (!L->contains(*PI))
         continue;
 
       if (!DT->dominates(*I, *PI)) {
         NotAlways = true;
         break;
       }
     }
 
     if (NotAlways)
       continue;
 
     // Make sure this blocks ends with a conditional branch.
     Instruction *TI = (*I)->getTerminator();
     if (!TI)
       continue;
 
     if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
       if (!BI->isConditional())
         continue;
 
       CountedExitBranch = BI;
     } else
       continue;
 
     // Note that this block may not be the loop latch block, even if the loop
     // has a latch block.
     CountedExitBlock = *I;
     ExitCount = EC;
     break;
   }
 
   if (!CountedExitBlock)
     return MadeChange;
 
   BasicBlock *Preheader = L->getLoopPreheader();
 
   // If we don't have a preheader, then insert one. If we already have a
   // preheader, then we can use it (except if the preheader contains a use of
   // the CTR register because some such uses might be reordered by the
   // selection DAG after the mtctr instruction).
   if (!Preheader || mightUseCTR(Preheader))
     Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
   if (!Preheader)
     return MadeChange;
 
   DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName() << "\n");
 
   // Insert the count into the preheader and replace the condition used by the
   // selected branch.
   MadeChange = true;
 
   SCEVExpander SCEVE(*SE, *DL, "loopcnt");
   LLVMContext &C = SE->getContext();
   Type *CountType = TM->isPPC64() ? Type::getInt64Ty(C) : Type::getInt32Ty(C);
   if (!ExitCount->getType()->isPointerTy() &&
       ExitCount->getType() != CountType)
     ExitCount = SE->getZeroExtendExpr(ExitCount, CountType);
   ExitCount = SE->getAddExpr(ExitCount, SE->getOne(CountType));
   Value *ECValue =
       SCEVE.expandCodeFor(ExitCount, CountType, Preheader->getTerminator());
 
   IRBuilder<> CountBuilder(Preheader->getTerminator());
   Module *M = Preheader->getParent()->getParent();
   Value *MTCTRFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr,
                                                CountType);
   CountBuilder.CreateCall(MTCTRFunc, ECValue);
 
   IRBuilder<> CondBuilder(CountedExitBranch);
   Value *DecFunc =
     Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero);
   Value *NewCond = CondBuilder.CreateCall(DecFunc, {});
   Value *OldCond = CountedExitBranch->getCondition();
   CountedExitBranch->setCondition(NewCond);
 
   // The false branch must exit the loop.
   if (!L->contains(CountedExitBranch->getSuccessor(0)))
     CountedExitBranch->swapSuccessors();
 
   // The old condition may be dead now, and may have even created a dead PHI
   // (the original induction variable).
   RecursivelyDeleteTriviallyDeadInstructions(OldCond);
   // Run through the basic blocks of the loop and see if any of them have dead
   // PHIs that can be removed.
   for (auto I : L->blocks())
     DeleteDeadPHIs(I);
 
   ++NumCTRLoops;
   return MadeChange;
 }
 
 #ifndef NDEBUG
 static bool clobbersCTR(const MachineInstr &MI) {
   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI.getOperand(i);
     if (MO.isReg()) {
       if (MO.isDef() && (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8))
         return true;
     } else if (MO.isRegMask()) {
       if (MO.clobbersPhysReg(PPC::CTR) || MO.clobbersPhysReg(PPC::CTR8))
         return true;
     }
   }
 
   return false;
 }
 
 static bool verifyCTRBranch(MachineBasicBlock *MBB,
                             MachineBasicBlock::iterator I) {
   MachineBasicBlock::iterator BI = I;
   SmallSet<MachineBasicBlock *, 16>   Visited;
   SmallVector<MachineBasicBlock *, 8> Preds;
   bool CheckPreds;
 
   if (I == MBB->begin()) {
     Visited.insert(MBB);
     goto queue_preds;
   } else
     --I;
 
 check_block:
   Visited.insert(MBB);
   if (I == MBB->end())
     goto queue_preds;
 
   CheckPreds = true;
   for (MachineBasicBlock::iterator IE = MBB->begin();; --I) {
     unsigned Opc = I->getOpcode();
     if (Opc == PPC::MTCTRloop || Opc == PPC::MTCTR8loop) {
       CheckPreds = false;
       break;
     }
 
     if (I != BI && clobbersCTR(*I)) {
       DEBUG(dbgs() << printMBBReference(*MBB) << " (" << MBB->getFullName()
                    << ") instruction " << *I << " clobbers CTR, invalidating "
                    << printMBBReference(*BI->getParent()) << " ("
                    << BI->getParent()->getFullName() << ") instruction " << *BI
                    << "\n");
       return false;
     }
 
     if (I == IE)
       break;
   }
 
   if (!CheckPreds && Preds.empty())
     return true;
 
   if (CheckPreds) {
 queue_preds:
     if (MachineFunction::iterator(MBB) == MBB->getParent()->begin()) {
       DEBUG(dbgs() << "Unable to find a MTCTR instruction for "
                    << printMBBReference(*BI->getParent()) << " ("
                    << BI->getParent()->getFullName() << ") instruction " << *BI
                    << "\n");
       return false;
     }
 
     for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
          PIE = MBB->pred_end(); PI != PIE; ++PI)
       Preds.push_back(*PI);
   }
 
   do {
     MBB = Preds.pop_back_val();
     if (!Visited.count(MBB)) {
       I = MBB->getLastNonDebugInstr();
       goto check_block;
     }
   } while (!Preds.empty());
 
   return true;
 }
 
 bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) {
   MDT = &getAnalysis<MachineDominatorTree>();
 
   // Verify that all bdnz/bdz instructions are dominated by a loop mtctr before
   // any other instructions that might clobber the ctr register.
   for (MachineFunction::iterator I = MF.begin(), IE = MF.end();
        I != IE; ++I) {
     MachineBasicBlock *MBB = &*I;
     if (!MDT->isReachableFromEntry(MBB))
       continue;
 
     for (MachineBasicBlock::iterator MII = MBB->getFirstTerminator(),
       MIIE = MBB->end(); MII != MIIE; ++MII) {
       unsigned Opc = MII->getOpcode();
       if (Opc == PPC::BDNZ8 || Opc == PPC::BDNZ ||
           Opc == PPC::BDZ8  || Opc == PPC::BDZ)
         if (!verifyCTRBranch(MBB, MII))
           llvm_unreachable("Invalid PPC CTR loop!");
     }
   }
 
   return false;
 }
 #endif // NDEBUG
Index: head/contrib/llvm/lib/Target/X86/X86.td
===================================================================
--- head/contrib/llvm/lib/Target/X86/X86.td	(revision 329982)
+++ head/contrib/llvm/lib/Target/X86/X86.td	(revision 329983)
@@ -1,1069 +1,1075 @@
 //===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // This is a target description file for the Intel i386 architecture, referred
 // to here as the "X86" architecture.
 //
 //===----------------------------------------------------------------------===//
 
 // Get the target-independent interfaces which we are implementing...
 //
 include "llvm/Target/Target.td"
 
 //===----------------------------------------------------------------------===//
 // X86 Subtarget state
 //
 
 def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true",
                                   "64-bit mode (x86_64)">;
 def Mode32Bit : SubtargetFeature<"32bit-mode", "In32BitMode", "true",
                                   "32-bit mode (80386)">;
 def Mode16Bit : SubtargetFeature<"16bit-mode", "In16BitMode", "true",
                                   "16-bit mode (i8086)">;
 
 //===----------------------------------------------------------------------===//
 // X86 Subtarget features
 //===----------------------------------------------------------------------===//
 
 def FeatureX87     : SubtargetFeature<"x87","HasX87", "true",
                                       "Enable X87 float instructions">;
 
 def FeatureCMOV    : SubtargetFeature<"cmov","HasCMov", "true",
                                       "Enable conditional move instructions">;
 
 def FeaturePOPCNT   : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
                                        "Support POPCNT instruction">;
 
 def FeatureFXSR    : SubtargetFeature<"fxsr", "HasFXSR", "true",
                                       "Support fxsave/fxrestore instructions">;
 
 def FeatureXSAVE   : SubtargetFeature<"xsave", "HasXSAVE", "true",
                                        "Support xsave instructions">;
 
 def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true",
                                        "Support xsaveopt instructions">;
 
 def FeatureXSAVEC  : SubtargetFeature<"xsavec", "HasXSAVEC", "true",
                                        "Support xsavec instructions">;
 
 def FeatureXSAVES  : SubtargetFeature<"xsaves", "HasXSAVES", "true",
                                        "Support xsaves instructions">;
 
 def FeatureSSE1    : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
                                       "Enable SSE instructions",
                                       // SSE codegen depends on cmovs, and all
                                       // SSE1+ processors support them.
                                       [FeatureCMOV]>;
 def FeatureSSE2    : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
                                       "Enable SSE2 instructions",
                                       [FeatureSSE1]>;
 def FeatureSSE3    : SubtargetFeature<"sse3", "X86SSELevel", "SSE3",
                                       "Enable SSE3 instructions",
                                       [FeatureSSE2]>;
 def FeatureSSSE3   : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3",
                                       "Enable SSSE3 instructions",
                                       [FeatureSSE3]>;
 def FeatureSSE41   : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41",
                                       "Enable SSE 4.1 instructions",
                                       [FeatureSSSE3]>;
 def FeatureSSE42   : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42",
                                       "Enable SSE 4.2 instructions",
                                       [FeatureSSE41]>;
 // The MMX subtarget feature is separate from the rest of the SSE features
 // because it's important (for odd compatibility reasons) to be able to
 // turn it off explicitly while allowing SSE+ to be on.
 def FeatureMMX     : SubtargetFeature<"mmx","X863DNowLevel", "MMX",
                                       "Enable MMX instructions">;
 def Feature3DNow   : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
                                       "Enable 3DNow! instructions",
                                       [FeatureMMX]>;
 def Feature3DNowA  : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
                                       "Enable 3DNow! Athlon instructions",
                                       [Feature3DNow]>;
 // All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied
 // feature, because SSE2 can be disabled (e.g. for compiling OS kernels)
 // without disabling 64-bit mode.
 def Feature64Bit   : SubtargetFeature<"64bit", "HasX86_64", "true",
                                       "Support 64-bit instructions",
                                       [FeatureCMOV]>;
 def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true",
                                       "64-bit with cmpxchg16b",
                                       [Feature64Bit]>;
 def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
                                        "SHLD instruction is slow">;
 def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
                                         "PMULLD instruction is slow">;
 // FIXME: This should not apply to CPUs that do not have SSE.
 def FeatureSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
                                 "IsUAMem16Slow", "true",
                                 "Slow unaligned 16-byte memory access">;
 def FeatureSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
                                 "IsUAMem32Slow", "true",
                                 "Slow unaligned 32-byte memory access">;
 def FeatureSSE4A   : SubtargetFeature<"sse4a", "HasSSE4A", "true",
                                       "Support SSE 4a instructions",
                                       [FeatureSSE3]>;
 
 def FeatureAVX     : SubtargetFeature<"avx", "X86SSELevel", "AVX",
                                       "Enable AVX instructions",
                                       [FeatureSSE42]>;
 def FeatureAVX2    : SubtargetFeature<"avx2", "X86SSELevel", "AVX2",
                                       "Enable AVX2 instructions",
                                       [FeatureAVX]>;
 def FeatureFMA     : SubtargetFeature<"fma", "HasFMA", "true",
                                       "Enable three-operand fused multiple-add",
                                       [FeatureAVX]>;
 def FeatureF16C    : SubtargetFeature<"f16c", "HasF16C", "true",
                        "Support 16-bit floating point conversion instructions",
                        [FeatureAVX]>;
 def FeatureAVX512   : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512F",
                                       "Enable AVX-512 instructions",
                                       [FeatureAVX2, FeatureFMA, FeatureF16C]>;
 def FeatureERI      : SubtargetFeature<"avx512er", "HasERI", "true",
                       "Enable AVX-512 Exponential and Reciprocal Instructions",
                                       [FeatureAVX512]>;
 def FeatureCDI      : SubtargetFeature<"avx512cd", "HasCDI", "true",
                       "Enable AVX-512 Conflict Detection Instructions",
                                       [FeatureAVX512]>;
 def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ",
                        "true", "Enable AVX-512 Population Count Instructions",
                                       [FeatureAVX512]>;
 def FeaturePFI      : SubtargetFeature<"avx512pf", "HasPFI", "true",
                       "Enable AVX-512 PreFetch Instructions",
                                       [FeatureAVX512]>;
 def FeaturePREFETCHWT1  : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1",
                                    "true",
                                    "Prefetch with Intent to Write and T1 Hint">;
 def FeatureDQI     : SubtargetFeature<"avx512dq", "HasDQI", "true",
                       "Enable AVX-512 Doubleword and Quadword Instructions",
                                       [FeatureAVX512]>;
 def FeatureBWI     : SubtargetFeature<"avx512bw", "HasBWI", "true",
                       "Enable AVX-512 Byte and Word Instructions",
                                       [FeatureAVX512]>;
 def FeatureVLX     : SubtargetFeature<"avx512vl", "HasVLX", "true",
                       "Enable AVX-512 Vector Length eXtensions",
                                       [FeatureAVX512]>;
 def FeatureVBMI     : SubtargetFeature<"avx512vbmi", "HasVBMI", "true",
                       "Enable AVX-512 Vector Byte Manipulation Instructions",
                                       [FeatureBWI]>;
 def FeatureVBMI2    : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true",
                       "Enable AVX-512 further Vector Byte Manipulation Instructions",
                                       [FeatureBWI]>;
 def FeatureIFMA     : SubtargetFeature<"avx512ifma", "HasIFMA", "true",
                       "Enable AVX-512 Integer Fused Multiple-Add",
                                       [FeatureAVX512]>;
 def FeaturePKU   : SubtargetFeature<"pku", "HasPKU", "true",
                       "Enable protection keys">;
 def FeatureVNNI    : SubtargetFeature<"avx512vnni", "HasVNNI", "true",
                           "Enable AVX-512 Vector Neural Network Instructions",
                                       [FeatureAVX512]>;
 def FeatureBITALG  : SubtargetFeature<"avx512bitalg", "HasBITALG", "true",
                        "Enable AVX-512 Bit Algorithms",
                         [FeatureBWI]>;
 def FeaturePCLMUL  : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
                          "Enable packed carry-less multiplication instructions",
                                [FeatureSSE2]>;
 def FeatureGFNI    : SubtargetFeature<"gfni", "HasGFNI", "true",
                          "Enable Galois Field Arithmetic Instructions",
                                [FeatureSSE2]>;
 def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true",
                                          "Enable vpclmulqdq instructions",
                                          [FeatureAVX, FeaturePCLMUL]>;
 def FeatureFMA4    : SubtargetFeature<"fma4", "HasFMA4", "true",
                                       "Enable four-operand fused multiple-add",
                                       [FeatureAVX, FeatureSSE4A]>;
 def FeatureXOP     : SubtargetFeature<"xop", "HasXOP", "true",
                                       "Enable XOP instructions",
                                       [FeatureFMA4]>;
 def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem",
                                           "HasSSEUnalignedMem", "true",
                       "Allow unaligned memory operands with SSE instructions">;
 def FeatureAES     : SubtargetFeature<"aes", "HasAES", "true",
                                       "Enable AES instructions",
                                       [FeatureSSE2]>;
 def FeatureVAES    : SubtargetFeature<"vaes", "HasVAES", "true",
                        "Promote selected AES instructions to AVX512/AVX registers",
                         [FeatureAVX, FeatureAES]>;
 def FeatureTBM     : SubtargetFeature<"tbm", "HasTBM", "true",
                                       "Enable TBM instructions">;
 def FeatureLWP     : SubtargetFeature<"lwp", "HasLWP", "true",
                                       "Enable LWP instructions">;
 def FeatureMOVBE   : SubtargetFeature<"movbe", "HasMOVBE", "true",
                                       "Support MOVBE instruction">;
 def FeatureRDRAND  : SubtargetFeature<"rdrnd", "HasRDRAND", "true",
                                       "Support RDRAND instruction">;
 def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true",
                                        "Support FS/GS Base instructions">;
 def FeatureLZCNT   : SubtargetFeature<"lzcnt", "HasLZCNT", "true",
                                       "Support LZCNT instruction">;
 def FeatureBMI     : SubtargetFeature<"bmi", "HasBMI", "true",
                                       "Support BMI instructions">;
 def FeatureBMI2    : SubtargetFeature<"bmi2", "HasBMI2", "true",
                                       "Support BMI2 instructions">;
 def FeatureRTM     : SubtargetFeature<"rtm", "HasRTM", "true",
                                       "Support RTM instructions">;
 def FeatureADX     : SubtargetFeature<"adx", "HasADX", "true",
                                       "Support ADX instructions">;
 def FeatureSHA     : SubtargetFeature<"sha", "HasSHA", "true",
                                       "Enable SHA instructions",
                                       [FeatureSSE2]>;
 def FeatureSHSTK   : SubtargetFeature<"shstk", "HasSHSTK", "true",
                        "Support CET Shadow-Stack instructions">;
 def FeatureIBT     : SubtargetFeature<"ibt", "HasIBT", "true",
                        "Support CET Indirect-Branch-Tracking instructions">;
 def FeaturePRFCHW  : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
                                       "Support PRFCHW instructions">;
 def FeatureRDSEED  : SubtargetFeature<"rdseed", "HasRDSEED", "true",
                                       "Support RDSEED instruction">;
 def FeatureLAHFSAHF : SubtargetFeature<"sahf", "HasLAHFSAHF", "true",
                                        "Support LAHF and SAHF instructions">;
 def FeatureMWAITX  : SubtargetFeature<"mwaitx", "HasMWAITX", "true",
                                       "Enable MONITORX/MWAITX timer functionality">;
 def FeatureCLZERO  : SubtargetFeature<"clzero", "HasCLZERO", "true",
                                       "Enable Cache Line Zero">;
 def FeatureMPX     : SubtargetFeature<"mpx", "HasMPX", "true",
                                       "Support MPX instructions">;
 def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
                                      "Use LEA for adjusting the stack pointer">;
 def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb",
                                      "HasSlowDivide32", "true",
                                      "Use 8-bit divide for positive values less than 256">;
 def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divl",
                                      "HasSlowDivide64", "true",
                                      "Use 32-bit divide for positive values less than 2^32">;
 def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
                                      "PadShortFunctions", "true",
                                      "Pad short functions">;
 def FeatureSGX     : SubtargetFeature<"sgx", "HasSGX", "true",
                                       "Enable Software Guard Extensions">;
 def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true",
                                       "Flush A Cache Line Optimized">;
 def FeatureCLWB    : SubtargetFeature<"clwb", "HasCLWB", "true",
                                       "Cache Line Write Back">;
 // On some processors, instructions that implicitly take two memory operands are
 // slow. In practice, this means that CALL, PUSH, and POP with memory operands
 // should be avoided in favor of a MOV + register CALL/PUSH/POP.
 def FeatureSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops",
                                      "SlowTwoMemOps", "true",
                                      "Two memory operand instructions are slow">;
 def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
                                    "LEA instruction needs inputs at AG stage">;
 def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
                                    "LEA instruction with certain arguments is slow">;
 def FeatureSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true",
                                    "LEA instruction with 3 ops or certain registers is slow">;
 def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
                                    "INC and DEC instructions are slower than ADD and SUB">;
 def FeatureSoftFloat
     : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
                        "Use software floating point features.">;
 // On recent X86 (port bound) processors, its preferable to combine to a single shuffle
 // using a variable mask over multiple fixed shuffles.
 def FeatureFastVariableShuffle
     : SubtargetFeature<"fast-variable-shuffle",
                        "HasFastVariableShuffle",
                        "true", "Shuffles with variable masks are fast">;
 // On some X86 processors, there is no performance hazard to writing only the
 // lower parts of a YMM or ZMM register without clearing the upper part.
 def FeatureFastPartialYMMorZMMWrite
     : SubtargetFeature<"fast-partial-ymm-or-zmm-write",
                        "HasFastPartialYMMorZMMWrite",
                        "true", "Partial writes to YMM/ZMM registers are fast">;
 // FeatureFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency
 // than the corresponding NR code. FeatureFastVectorFSQRT should be enabled if
 // vector FSQRT has higher throughput than the corresponding NR code.
 // The idea is that throughput bound code is likely to be vectorized, so for
 // vectorized code we should care about the throughput of SQRT operations.
 // But if the code is scalar that probably means that the code has some kind of
 // dependency and we should care more about reducing the latency.
 def FeatureFastScalarFSQRT
     : SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT",
                        "true", "Scalar SQRT is fast (disable Newton-Raphson)">;
 def FeatureFastVectorFSQRT
     : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT",
                        "true", "Vector SQRT is fast (disable Newton-Raphson)">;
 // If lzcnt has equivalent latency/throughput to most simple integer ops, it can
 // be used to replace test/set sequences.
 def FeatureFastLZCNT
     : SubtargetFeature<
           "fast-lzcnt", "HasFastLZCNT", "true",
           "LZCNT instructions are as fast as most simple integer ops">;
 
 
 // Sandy Bridge and newer processors can use SHLD with the same source on both
 // inputs to implement rotate to avoid the partial flag update of the normal
 // rotate instructions.
 def FeatureFastSHLDRotate
     : SubtargetFeature<
           "fast-shld-rotate", "HasFastSHLDRotate", "true",
           "SHLD can be used as a faster rotate">;
 
 // Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
 // "string operations"). See "REP String Enhancement" in the Intel Software
 // Development Manual. This feature essentially means that REP MOVSB will copy
 // using the largest available size instead of copying bytes one by one, making
 // it at least as fast as REPMOVS{W,D,Q}.
 def FeatureERMSB
     : SubtargetFeature<
           "ermsb", "HasERMSB", "true",
           "REP MOVS/STOS are fast">;
 
 // Sandy Bridge and newer processors have many instructions that can be
 // fused with conditional branches and pass through the CPU as a single
 // operation.
 def FeatureMacroFusion
     : SubtargetFeature<"macrofusion", "HasMacroFusion", "true",
                  "Various instructions can be fused with conditional branches">;
 
 // Gather is available since Haswell (AVX2 set). So technically, we can
 // generate Gathers on all AVX2 processors. But the overhead on HSW is high.
 // Skylake Client processor has faster Gathers than HSW and performance is
 // similar to Skylake Server (AVX-512).
 def FeatureHasFastGather
     : SubtargetFeature<"fast-gather", "HasFastGather", "true",
                        "Indicates if gather is reasonably fast.">;
 
 // Enable mitigation of some aspects of speculative execution related
 // vulnerabilities by removing speculatable indirect branches. This disables
 // jump-table formation, rewrites explicit `indirectbr` instructions into
 // `switch` instructions, and uses a special construct called a "retpoline" to
 // prevent speculation of the remaining indirect branches (indirect calls and
 // tail calls).
 def FeatureRetpoline
     : SubtargetFeature<"retpoline", "UseRetpoline", "true",
                        "Remove speculation of indirect branches from the "
                        "generated code, either by avoiding them entirely or "
                        "lowering them with a speculation blocking construct.">;
 
 // Rely on external thunks for the emitted retpoline calls. This allows users
 // to provide their own custom thunk definitions in highly specialized
 // environments such as a kernel that does boot-time hot patching.
 def FeatureRetpolineExternalThunk
     : SubtargetFeature<
           "retpoline-external-thunk", "UseRetpolineExternalThunk", "true",
           "Enable retpoline, but with an externally provided thunk.",
           [FeatureRetpoline]>;
 
 //===----------------------------------------------------------------------===//
 // Register File Description
 //===----------------------------------------------------------------------===//
 
 include "X86RegisterInfo.td"
 include "X86RegisterBanks.td"
 
 //===----------------------------------------------------------------------===//
 // Instruction Descriptions
 //===----------------------------------------------------------------------===//
 
 include "X86Schedule.td"
 include "X86InstrInfo.td"
 
 def X86InstrInfo : InstrInfo;
 
 //===----------------------------------------------------------------------===//
 // X86 processors supported.
 //===----------------------------------------------------------------------===//
 
 include "X86ScheduleAtom.td"
 include "X86SchedSandyBridge.td"
 include "X86SchedHaswell.td"
 include "X86SchedBroadwell.td"
 include "X86ScheduleSLM.td"
 include "X86ScheduleZnver1.td"
 include "X86ScheduleBtVer2.td"
 include "X86SchedSkylakeClient.td"
 include "X86SchedSkylakeServer.td"
 
 def ProcIntelAtom : SubtargetFeature<"atom", "X86ProcFamily", "IntelAtom",
                     "Intel Atom processors">;
 def ProcIntelSLM  : SubtargetFeature<"slm", "X86ProcFamily", "IntelSLM",
                     "Intel Silvermont processors">;
 def ProcIntelGLM  : SubtargetFeature<"glm", "X86ProcFamily", "IntelGLM",
                     "Intel Goldmont processors">;
 def ProcIntelHSW  : SubtargetFeature<"haswell", "X86ProcFamily",
                     "IntelHaswell", "Intel Haswell processors">;
 def ProcIntelBDW  : SubtargetFeature<"broadwell", "X86ProcFamily",
                     "IntelBroadwell", "Intel Broadwell processors">;
 def ProcIntelSKL  : SubtargetFeature<"skylake", "X86ProcFamily",
                     "IntelSkylake", "Intel Skylake processors">;
 def ProcIntelKNL  : SubtargetFeature<"knl", "X86ProcFamily",
                     "IntelKNL", "Intel Knights Landing processors">;
 def ProcIntelSKX  : SubtargetFeature<"skx", "X86ProcFamily",
                     "IntelSKX", "Intel Skylake Server processors">;
 def ProcIntelCNL  : SubtargetFeature<"cannonlake", "X86ProcFamily",
                     "IntelCannonlake", "Intel Cannonlake processors">;
 def ProcIntelICL  : SubtargetFeature<"icelake", "X86ProcFamily",
                     "IntelIcelake", "Intel Icelake processors">;
 
 class Proc<string Name, list<SubtargetFeature> Features>
  : ProcessorModel<Name, GenericModel, Features>;
 
 def : Proc<"generic",         [FeatureX87, FeatureSlowUAMem16]>;
 def : Proc<"i386",            [FeatureX87, FeatureSlowUAMem16]>;
 def : Proc<"i486",            [FeatureX87, FeatureSlowUAMem16]>;
 def : Proc<"i586",            [FeatureX87, FeatureSlowUAMem16]>;
 def : Proc<"pentium",         [FeatureX87, FeatureSlowUAMem16]>;
 def : Proc<"pentium-mmx",     [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
 
 foreach P = ["i686", "pentiumpro"] in {
   def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMOV]>;
 }
 
 def : Proc<"pentium2",        [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
                                FeatureCMOV, FeatureFXSR]>;
 
 foreach P = ["pentium3", "pentium3m"] in {
   def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE1,
                  FeatureFXSR]>;
 }
 
 // Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
 // The intent is to enable it for pentium4 which is the current default
 // processor in a vanilla 32-bit clang compilation when no specific
 // architecture is specified.  This generally gives a nice performance
 // increase on silvermont, with largely neutral behavior on other
 // contemporary large core processors.
 // pentium-m, pentium4m, prescott and nocona are included as a preventative
 // measure to avoid performance surprises, in case clang's default cpu
 // changes slightly.
 
 def : ProcessorModel<"pentium-m", GenericPostRAModel,
                      [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
                       FeatureSSE2, FeatureFXSR]>;
 
 foreach P = ["pentium4", "pentium4m"] in {
   def : ProcessorModel<P, GenericPostRAModel,
                        [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
                         FeatureSSE2, FeatureFXSR]>;
 }
 
 // Intel Quark.
 def : Proc<"lakemont",        []>;
 
 // Intel Core Duo.
 def : ProcessorModel<"yonah", SandyBridgeModel,
                      [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3,
                       FeatureFXSR]>;
 
 // NetBurst.
 def : ProcessorModel<"prescott", GenericPostRAModel,
                      [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3,
                       FeatureFXSR]>;
 def : ProcessorModel<"nocona", GenericPostRAModel, [
   FeatureX87,
   FeatureSlowUAMem16,
   FeatureMMX,
   FeatureSSE3,
   FeatureFXSR,
   FeatureCMPXCHG16B
 ]>;
 
 // Intel Core 2 Solo/Duo.
 def : ProcessorModel<"core2", SandyBridgeModel, [
   FeatureX87,
   FeatureSlowUAMem16,
   FeatureMMX,
   FeatureSSSE3,
   FeatureFXSR,
   FeatureCMPXCHG16B,
   FeatureLAHFSAHF,
   FeatureMacroFusion
 ]>;
 def : ProcessorModel<"penryn", SandyBridgeModel, [
   FeatureX87,
   FeatureSlowUAMem16,
   FeatureMMX,
   FeatureSSE41,
   FeatureFXSR,
   FeatureCMPXCHG16B,
   FeatureLAHFSAHF,
   FeatureMacroFusion
 ]>;
 
 // Atom CPUs.
 class BonnellProc<string Name> : ProcessorModel<Name, AtomModel, [
   ProcIntelAtom,
   FeatureX87,
   FeatureSlowUAMem16,
   FeatureMMX,
   FeatureSSSE3,
   FeatureFXSR,
   FeatureCMPXCHG16B,
   FeatureMOVBE,
   FeatureLEAForSP,
   FeatureSlowDivide32,
   FeatureSlowDivide64,
   FeatureSlowTwoMemOps,
   FeatureLEAUsesAG,
   FeaturePadShortFunctions,
   FeatureLAHFSAHF
 ]>;
 def : BonnellProc<"bonnell">;
 def : BonnellProc<"atom">; // Pin the generic name to the baseline.
 
 class SilvermontProc<string Name> : ProcessorModel<Name, SLMModel, [
   ProcIntelSLM,
   FeatureX87,
   FeatureMMX,
   FeatureSSE42,
   FeatureFXSR,
   FeatureCMPXCHG16B,
   FeatureMOVBE,
   FeaturePOPCNT,
   FeaturePCLMUL,
   FeatureAES,
   FeatureSlowDivide64,
   FeatureSlowTwoMemOps,
   FeaturePRFCHW,
   FeatureSlowLEA,
   FeatureSlowIncDec,
   FeatureSlowPMULLD,
   FeatureLAHFSAHF
 ]>;
 def : SilvermontProc<"silvermont">;
 def : SilvermontProc<"slm">; // Legacy alias.
 
 class GoldmontProc<string Name> : ProcessorModel<Name, SLMModel, [
   ProcIntelGLM,
   FeatureX87,
   FeatureMMX,
   FeatureSSE42,
   FeatureFXSR,
   FeatureCMPXCHG16B,
   FeatureMOVBE,
   FeaturePOPCNT,
   FeaturePCLMUL,
   FeatureAES,
   FeaturePRFCHW,
   FeatureSlowTwoMemOps,
   FeatureSlowLEA,
   FeatureSlowIncDec,
   FeatureLAHFSAHF,
   FeatureMPX,
   FeatureSHA,
   FeatureRDRAND,
   FeatureRDSEED,
   FeatureXSAVE,
   FeatureXSAVEOPT,
   FeatureXSAVEC,
   FeatureXSAVES,
   FeatureCLFLUSHOPT,
   FeatureFSGSBase
 ]>;
 def : GoldmontProc<"goldmont">;
 
 // "Arrandale" along with corei3 and corei5
 class NehalemProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
   FeatureX87,
   FeatureMMX,
   FeatureSSE42,
   FeatureFXSR,
   FeatureCMPXCHG16B,
   FeaturePOPCNT,
   FeatureLAHFSAHF,
   FeatureMacroFusion
 ]>;
 def : NehalemProc<"nehalem">;
 def : NehalemProc<"corei7">;
 
 // Westmere is a similar machine to nehalem with some additional features.
 // Westmere is the corei3/i5/i7 path from nehalem to sandybridge
 class WestmereProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
   FeatureX87,
   FeatureMMX,
   FeatureSSE42,
   FeatureFXSR,
   FeatureCMPXCHG16B,
   FeaturePOPCNT,
   FeatureAES,
   FeaturePCLMUL,
   FeatureLAHFSAHF,
   FeatureMacroFusion
 ]>;
 def : WestmereProc<"westmere">;
 
 class ProcessorFeatures<list<SubtargetFeature> Inherited,
                         list<SubtargetFeature> NewFeatures> {
   list<SubtargetFeature> Value = !listconcat(Inherited, NewFeatures);
 }
 
 class ProcModel<string Name, SchedMachineModel Model,
                 list<SubtargetFeature> ProcFeatures,
                 list<SubtargetFeature> OtherFeatures> :
   ProcessorModel<Name, Model, !listconcat(ProcFeatures, OtherFeatures)>;
 
 // SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
 // rather than a superset.
 def SNBFeatures : ProcessorFeatures<[], [
   FeatureX87,
   FeatureMMX,
   FeatureAVX,
   FeatureFXSR,
   FeatureCMPXCHG16B,
   FeaturePOPCNT,
   FeatureAES,
   FeatureSlowDivide64,
   FeaturePCLMUL,
   FeatureXSAVE,
   FeatureXSAVEOPT,
   FeatureLAHFSAHF,
   FeatureSlow3OpsLEA,
   FeatureFastScalarFSQRT,
   FeatureFastSHLDRotate,
   FeatureSlowIncDec,
   FeatureMacroFusion
 ]>;
 
 class SandyBridgeProc<string Name> : ProcModel<Name, SandyBridgeModel,
                                                SNBFeatures.Value, [
   FeatureSlowUAMem32
 ]>;
 def : SandyBridgeProc<"sandybridge">;
 def : SandyBridgeProc<"corei7-avx">; // Legacy alias.
 
 def IVBFeatures : ProcessorFeatures<SNBFeatures.Value, [
   FeatureRDRAND,
   FeatureF16C,
   FeatureFSGSBase
 ]>;
 
 class IvyBridgeProc<string Name> : ProcModel<Name, SandyBridgeModel,
                                              IVBFeatures.Value, [
   FeatureSlowUAMem32
 ]>;
 def : IvyBridgeProc<"ivybridge">;
 def : IvyBridgeProc<"core-avx-i">; // Legacy alias.
 
 def HSWFeatures : ProcessorFeatures<IVBFeatures.Value, [
   FeatureAVX2,
   FeatureBMI,
   FeatureBMI2,
   FeatureERMSB,
   FeatureFMA,
   FeatureLZCNT,
   FeatureMOVBE,
   FeatureFastVariableShuffle
 ]>;
 
 class HaswellProc<string Name> : ProcModel<Name, HaswellModel,
                                            HSWFeatures.Value, [
   ProcIntelHSW
 ]>;
 def : HaswellProc<"haswell">;
 def : HaswellProc<"core-avx2">; // Legacy alias.
 
 def BDWFeatures : ProcessorFeatures<HSWFeatures.Value, [
   FeatureADX,
   FeatureRDSEED,
   FeaturePRFCHW
 ]>;
 class BroadwellProc<string Name> : ProcModel<Name, BroadwellModel,
                                              BDWFeatures.Value, [
   ProcIntelBDW
 ]>;
 def : BroadwellProc<"broadwell">;
 
 def SKLFeatures : ProcessorFeatures<BDWFeatures.Value, [
   FeatureMPX,
   FeatureRTM,
   FeatureXSAVEC,
   FeatureXSAVES,
   FeatureSGX,
   FeatureCLFLUSHOPT,
   FeatureFastVectorFSQRT
 ]>;
 
 class SkylakeClientProc<string Name> : ProcModel<Name, SkylakeClientModel,
                                                  SKLFeatures.Value, [
   ProcIntelSKL,
   FeatureHasFastGather
 ]>;
 def : SkylakeClientProc<"skylake">;
 
 def KNLFeatures : ProcessorFeatures<IVBFeatures.Value, [
   FeatureAVX512,
   FeatureERI,
   FeatureCDI,
   FeaturePFI,
   FeaturePREFETCHWT1,
   FeatureADX,
   FeatureRDSEED,
   FeatureMOVBE,
   FeatureLZCNT,
   FeatureBMI,
   FeatureBMI2,
   FeatureFMA,
   FeaturePRFCHW
 ]>;
 
 // FIXME: define KNL model
 class KnightsLandingProc<string Name> : ProcModel<Name, HaswellModel,
                                                   KNLFeatures.Value, [
   ProcIntelKNL,
   FeatureSlowTwoMemOps,
   FeatureFastPartialYMMorZMMWrite,
   FeatureHasFastGather
 ]>;
 def : KnightsLandingProc<"knl">;
 
 class KnightsMillProc<string Name> : ProcModel<Name, HaswellModel,
                                                KNLFeatures.Value, [
   ProcIntelKNL,
   FeatureSlowTwoMemOps,
   FeatureFastPartialYMMorZMMWrite,
   FeatureHasFastGather,
   FeatureVPOPCNTDQ
 ]>;
 def : KnightsMillProc<"knm">; // TODO Add AVX5124FMAPS/AVX5124VNNIW features
 
 def SKXFeatures : ProcessorFeatures<SKLFeatures.Value, [
   FeatureAVX512,
   FeatureCDI,
   FeatureDQI,
   FeatureBWI,
   FeatureVLX,
   FeaturePKU,
   FeatureCLWB
 ]>;
 
 class SkylakeServerProc<string Name> : ProcModel<Name, SkylakeServerModel,
                                                  SKXFeatures.Value, [
   ProcIntelSKX,
   FeatureHasFastGather
 ]>;
 def : SkylakeServerProc<"skylake-avx512">;
 def : SkylakeServerProc<"skx">; // Legacy alias.
 
-def CNLFeatures : ProcessorFeatures<SKXFeatures.Value, [
+def CNLFeatures : ProcessorFeatures<SKLFeatures.Value, [
+  FeatureAVX512,
+  FeatureCDI,
+  FeatureDQI,
+  FeatureBWI,
+  FeatureVLX,
+  FeaturePKU,
   FeatureVBMI,
   FeatureIFMA,
   FeatureSHA
 ]>;
 
 class CannonlakeProc<string Name> : ProcModel<Name, SkylakeServerModel,
                                               CNLFeatures.Value, [
   ProcIntelCNL,
   FeatureHasFastGather
 ]>;
 def : CannonlakeProc<"cannonlake">;
 
 def ICLFeatures : ProcessorFeatures<CNLFeatures.Value, [
   FeatureBITALG,
   FeatureVAES,
   FeatureVBMI2,
   FeatureVNNI,
   FeatureVPCLMULQDQ,
   FeatureVPOPCNTDQ,
   FeatureGFNI,
   FeatureCLWB
 ]>;
 
 class IcelakeProc<string Name> : ProcModel<Name, SkylakeServerModel,
                                            ICLFeatures.Value, [
   ProcIntelICL,
   FeatureHasFastGather
 ]>;
 def : IcelakeProc<"icelake">;
 
 // AMD CPUs.
 
 def : Proc<"k6",              [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
 def : Proc<"k6-2",            [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
 def : Proc<"k6-3",            [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
 
 foreach P = ["athlon", "athlon-tbird"] in {
   def : Proc<P, [FeatureX87, FeatureSlowUAMem16, Feature3DNowA, FeatureSlowSHLD]>;
 }
 
 foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in {
   def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureSSE1,
                  Feature3DNowA, FeatureFXSR, FeatureSlowSHLD]>;
 }
 
 foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
   def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA,
                  FeatureFXSR, Feature64Bit, FeatureSlowSHLD]>;
 }
 
 foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
   def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA,
                  FeatureFXSR, FeatureCMPXCHG16B, FeatureSlowSHLD]>;
 }
 
 foreach P = ["amdfam10", "barcelona"] in {
   def : Proc<P, [FeatureX87, FeatureSSE4A, Feature3DNowA, FeatureFXSR,
                  FeatureCMPXCHG16B, FeatureLZCNT, FeaturePOPCNT,
                  FeatureSlowSHLD, FeatureLAHFSAHF]>;
 }
 
 // Bobcat
 def : Proc<"btver1", [
   FeatureX87,
   FeatureMMX,
   FeatureSSSE3,
   FeatureSSE4A,
   FeatureFXSR,
   FeatureCMPXCHG16B,
   FeaturePRFCHW,
   FeatureLZCNT,
   FeaturePOPCNT,
   FeatureSlowSHLD,
   FeatureLAHFSAHF
 ]>;
 
 // Jaguar
 def : ProcessorModel<"btver2", BtVer2Model, [
   FeatureX87,
   FeatureMMX,
   FeatureAVX,
   FeatureFXSR,
   FeatureSSE4A,
   FeatureCMPXCHG16B,
   FeaturePRFCHW,
   FeatureAES,
   FeaturePCLMUL,
   FeatureBMI,
   FeatureF16C,
   FeatureMOVBE,
   FeatureLZCNT,
   FeatureFastLZCNT,
   FeaturePOPCNT,
   FeatureXSAVE,
   FeatureXSAVEOPT,
   FeatureSlowSHLD,
   FeatureLAHFSAHF,
   FeatureFastPartialYMMorZMMWrite
 ]>;
 
 // Bulldozer
 def : Proc<"bdver1", [
   FeatureX87,
   FeatureXOP,
   FeatureFMA4,
   FeatureCMPXCHG16B,
   FeatureAES,
   FeaturePRFCHW,
   FeaturePCLMUL,
   FeatureMMX,
   FeatureAVX,
   FeatureFXSR,
   FeatureSSE4A,
   FeatureLZCNT,
   FeaturePOPCNT,
   FeatureXSAVE,
   FeatureLWP,
   FeatureSlowSHLD,
   FeatureLAHFSAHF,
   FeatureMacroFusion
 ]>;
 // Piledriver
 def : Proc<"bdver2", [
   FeatureX87,
   FeatureXOP,
   FeatureFMA4,
   FeatureCMPXCHG16B,
   FeatureAES,
   FeaturePRFCHW,
   FeaturePCLMUL,
   FeatureMMX,
   FeatureAVX,
   FeatureFXSR,
   FeatureSSE4A,
   FeatureF16C,
   FeatureLZCNT,
   FeaturePOPCNT,
   FeatureXSAVE,
   FeatureBMI,
   FeatureTBM,
   FeatureLWP,
   FeatureFMA,
   FeatureSlowSHLD,
   FeatureLAHFSAHF,
   FeatureMacroFusion
 ]>;
 
 // Steamroller
 def : Proc<"bdver3", [
   FeatureX87,
   FeatureXOP,
   FeatureFMA4,
   FeatureCMPXCHG16B,
   FeatureAES,
   FeaturePRFCHW,
   FeaturePCLMUL,
   FeatureMMX,
   FeatureAVX,
   FeatureFXSR,
   FeatureSSE4A,
   FeatureF16C,
   FeatureLZCNT,
   FeaturePOPCNT,
   FeatureXSAVE,
   FeatureBMI,
   FeatureTBM,
   FeatureLWP,
   FeatureFMA,
   FeatureXSAVEOPT,
   FeatureSlowSHLD,
   FeatureFSGSBase,
   FeatureLAHFSAHF,
   FeatureMacroFusion
 ]>;
 
 // Excavator
 def : Proc<"bdver4", [
   FeatureX87,
   FeatureMMX,
   FeatureAVX2,
   FeatureFXSR,
   FeatureXOP,
   FeatureFMA4,
   FeatureCMPXCHG16B,
   FeatureAES,
   FeaturePRFCHW,
   FeaturePCLMUL,
   FeatureF16C,
   FeatureLZCNT,
   FeaturePOPCNT,
   FeatureXSAVE,
   FeatureBMI,
   FeatureBMI2,
   FeatureTBM,
   FeatureLWP,
   FeatureFMA,
   FeatureXSAVEOPT,
   FeatureSlowSHLD,
   FeatureFSGSBase,
   FeatureLAHFSAHF,
   FeatureMWAITX,
   FeatureMacroFusion
 ]>;
 
 // Znver1
 def: ProcessorModel<"znver1", Znver1Model, [
   FeatureADX,
   FeatureAES,
   FeatureAVX2,
   FeatureBMI,
   FeatureBMI2,
   FeatureCLFLUSHOPT,
   FeatureCLZERO,
   FeatureCMPXCHG16B,
   FeatureF16C,
   FeatureFMA,
   FeatureFSGSBase,
   FeatureFXSR,
   FeatureFastLZCNT,
   FeatureLAHFSAHF,
   FeatureLZCNT,
   FeatureMacroFusion,
   FeatureMMX,
   FeatureMOVBE,
   FeatureMWAITX,
   FeaturePCLMUL,
   FeaturePOPCNT,
   FeaturePRFCHW,
   FeatureRDRAND,
   FeatureRDSEED,
   FeatureSHA,
   FeatureSSE4A,
   FeatureSlowSHLD,
   FeatureX87,
   FeatureXSAVE,
   FeatureXSAVEC,
   FeatureXSAVEOPT,
   FeatureXSAVES]>;
 
 def : Proc<"geode",           [FeatureX87, FeatureSlowUAMem16, Feature3DNowA]>;
 
 def : Proc<"winchip-c6",      [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
 def : Proc<"winchip2",        [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
 def : Proc<"c3",              [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
 def : Proc<"c3-2",            [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
                                FeatureSSE1, FeatureFXSR]>;
 
 // We also provide a generic 64-bit specific x86 processor model which tries to
 // be good for modern chips without enabling instruction set encodings past the
 // basic SSE2 and 64-bit ones. It disables slow things from any mainstream and
 // modern 64-bit x86 chip, and enables features that are generally beneficial.
 //
 // We currently use the Sandy Bridge model as the default scheduling model as
 // we use it across Nehalem, Westmere, Sandy Bridge, and Ivy Bridge which
 // covers a huge swath of x86 processors. If there are specific scheduling
 // knobs which need to be tuned differently for AMD chips, we might consider
 // forming a common base for them.
 def : ProcessorModel<"x86-64", SandyBridgeModel, [
   FeatureX87,
   FeatureMMX,
   FeatureSSE2,
   FeatureFXSR,
   Feature64Bit,
   FeatureSlow3OpsLEA,
   FeatureSlowIncDec,
   FeatureMacroFusion
 ]>;
 
 //===----------------------------------------------------------------------===//
 // Calling Conventions
 //===----------------------------------------------------------------------===//
 
 include "X86CallingConv.td"
 
 
 //===----------------------------------------------------------------------===//
 // Assembly Parser
 //===----------------------------------------------------------------------===//
 
 def ATTAsmParserVariant : AsmParserVariant {
   int Variant = 0;
 
   // Variant name.
   string Name = "att";
 
   // Discard comments in assembly strings.
   string CommentDelimiter = "#";
 
   // Recognize hard coded registers.
   string RegisterPrefix = "%";
 }
 
 def IntelAsmParserVariant : AsmParserVariant {
   int Variant = 1;
 
   // Variant name.
   string Name = "intel";
 
   // Discard comments in assembly strings.
   string CommentDelimiter = ";";
 
   // Recognize hard coded registers.
   string RegisterPrefix = "";
 }
 
 //===----------------------------------------------------------------------===//
 // Assembly Printers
 //===----------------------------------------------------------------------===//
 
 // The X86 target supports two different syntaxes for emitting machine code.
 // This is controlled by the -x86-asm-syntax={att|intel}
 def ATTAsmWriter : AsmWriter {
   string AsmWriterClassName  = "ATTInstPrinter";
   int Variant = 0;
 }
 def IntelAsmWriter : AsmWriter {
   string AsmWriterClassName  = "IntelInstPrinter";
   int Variant = 1;
 }
 
 def X86 : Target {
   // Information about the instructions...
   let InstructionSet = X86InstrInfo;
   let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant];
   let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
 }
Index: head/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
===================================================================
--- head/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp	(revision 329982)
+++ head/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp	(revision 329983)
@@ -1,1744 +1,1759 @@
 //===- InstCombineSelect.cpp ----------------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // This file implements the visitSelect function.
 //
 //===----------------------------------------------------------------------===//
 
 #include "InstCombineInternal.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/CmpInstAnalysis.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/KnownBits.h"
 #include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
 #include <cassert>
 #include <utility>
 
 using namespace llvm;
 using namespace PatternMatch;
 
 #define DEBUG_TYPE "instcombine"
 
 static SelectPatternFlavor
 getInverseMinMaxSelectPattern(SelectPatternFlavor SPF) {
   switch (SPF) {
   default:
     llvm_unreachable("unhandled!");
 
   case SPF_SMIN:
     return SPF_SMAX;
   case SPF_UMIN:
     return SPF_UMAX;
   case SPF_SMAX:
     return SPF_SMIN;
   case SPF_UMAX:
     return SPF_UMIN;
   }
 }
 
 static CmpInst::Predicate getCmpPredicateForMinMax(SelectPatternFlavor SPF,
                                                    bool Ordered=false) {
   switch (SPF) {
   default:
     llvm_unreachable("unhandled!");
 
   case SPF_SMIN:
     return ICmpInst::ICMP_SLT;
   case SPF_UMIN:
     return ICmpInst::ICMP_ULT;
   case SPF_SMAX:
     return ICmpInst::ICMP_SGT;
   case SPF_UMAX:
     return ICmpInst::ICMP_UGT;
   case SPF_FMINNUM:
     return Ordered ? FCmpInst::FCMP_OLT : FCmpInst::FCMP_ULT;
   case SPF_FMAXNUM:
     return Ordered ? FCmpInst::FCMP_OGT : FCmpInst::FCMP_UGT;
   }
 }
 
 static Value *generateMinMaxSelectPattern(InstCombiner::BuilderTy &Builder,
                                           SelectPatternFlavor SPF, Value *A,
                                           Value *B) {
   CmpInst::Predicate Pred = getCmpPredicateForMinMax(SPF);
   assert(CmpInst::isIntPredicate(Pred));
   return Builder.CreateSelect(Builder.CreateICmp(Pred, A, B), A, B);
 }
 
 /// If one of the constants is zero (we know they can't both be) and we have an
 /// icmp instruction with zero, and we have an 'and' with the non-constant value
 /// and a power of two we can turn the select into a shift on the result of the
 /// 'and'.
 /// This folds:
 ///  select (icmp eq (and X, C1)), C2, C3
 ///    iff C1 is a power 2 and the difference between C2 and C3 is a power of 2.
 /// To something like:
 ///  (shr (and (X, C1)), (log2(C1) - log2(C2-C3))) + C3
 /// Or:
 ///  (shl (and (X, C1)), (log2(C2-C3) - log2(C1))) + C3
 /// With some variations depending if C3 is larger than C2, or the shift
 /// isn't needed, or the bit widths don't match.
 static Value *foldSelectICmpAnd(Type *SelType, const ICmpInst *IC,
                                 APInt TrueVal, APInt FalseVal,
                                 InstCombiner::BuilderTy &Builder) {
   assert(SelType->isIntOrIntVectorTy() && "Not an integer select?");
 
   // If this is a vector select, we need a vector compare.
   if (SelType->isVectorTy() != IC->getType()->isVectorTy())
     return nullptr;
 
   Value *V;
   APInt AndMask;
   bool CreateAnd = false;
   ICmpInst::Predicate Pred = IC->getPredicate();
   if (ICmpInst::isEquality(Pred)) {
     if (!match(IC->getOperand(1), m_Zero()))
       return nullptr;
 
     V = IC->getOperand(0);
 
     const APInt *AndRHS;
     if (!match(V, m_And(m_Value(), m_Power2(AndRHS))))
       return nullptr;
 
     AndMask = *AndRHS;
   } else if (decomposeBitTestICmp(IC->getOperand(0), IC->getOperand(1),
                                   Pred, V, AndMask)) {
     assert(ICmpInst::isEquality(Pred) && "Not equality test?");
 
     if (!AndMask.isPowerOf2())
       return nullptr;
 
     CreateAnd = true;
   } else {
     return nullptr;
   }
 
   // If both select arms are non-zero see if we have a select of the form
   // 'x ? 2^n + C : C'. Then we can offset both arms by C, use the logic
   // for 'x ? 2^n : 0' and fix the thing up at the end.
   APInt Offset(TrueVal.getBitWidth(), 0);
   if (!TrueVal.isNullValue() && !FalseVal.isNullValue()) {
     if ((TrueVal - FalseVal).isPowerOf2())
       Offset = FalseVal;
     else if ((FalseVal - TrueVal).isPowerOf2())
       Offset = TrueVal;
     else
       return nullptr;
 
     // Adjust TrueVal and FalseVal to the offset.
     TrueVal -= Offset;
     FalseVal -= Offset;
   }
 
   // Make sure one of the select arms is a power of 2.
   if (!TrueVal.isPowerOf2() && !FalseVal.isPowerOf2())
     return nullptr;
 
   // Determine which shift is needed to transform result of the 'and' into the
   // desired result.
   const APInt &ValC = !TrueVal.isNullValue() ? TrueVal : FalseVal;
   unsigned ValZeros = ValC.logBase2();
   unsigned AndZeros = AndMask.logBase2();
 
   if (CreateAnd) {
     // Insert the AND instruction on the input to the truncate.
     V = Builder.CreateAnd(V, ConstantInt::get(V->getType(), AndMask));
   }
 
   // If types don't match we can still convert the select by introducing a zext
   // or a trunc of the 'and'.
   if (ValZeros > AndZeros) {
     V = Builder.CreateZExtOrTrunc(V, SelType);
     V = Builder.CreateShl(V, ValZeros - AndZeros);
   } else if (ValZeros < AndZeros) {
     V = Builder.CreateLShr(V, AndZeros - ValZeros);
     V = Builder.CreateZExtOrTrunc(V, SelType);
   } else
     V = Builder.CreateZExtOrTrunc(V, SelType);
 
   // Okay, now we know that everything is set up, we just don't know whether we
   // have a icmp_ne or icmp_eq and whether the true or false val is the zero.
   bool ShouldNotVal = !TrueVal.isNullValue();
   ShouldNotVal ^= Pred == ICmpInst::ICMP_NE;
   if (ShouldNotVal)
     V = Builder.CreateXor(V, ValC);
 
   // Apply an offset if needed.
   if (!Offset.isNullValue())
     V = Builder.CreateAdd(V, ConstantInt::get(V->getType(), Offset));
   return V;
 }
 
 /// We want to turn code that looks like this:
 ///   %C = or %A, %B
 ///   %D = select %cond, %C, %A
 /// into:
 ///   %C = select %cond, %B, 0
 ///   %D = or %A, %C
 ///
 /// Assuming that the specified instruction is an operand to the select, return
 /// a bitmask indicating which operands of this instruction are foldable if they
 /// equal the other incoming value of the select.
 static unsigned getSelectFoldableOperands(BinaryOperator *I) {
   switch (I->getOpcode()) {
   case Instruction::Add:
   case Instruction::Mul:
   case Instruction::And:
   case Instruction::Or:
   case Instruction::Xor:
     return 3;              // Can fold through either operand.
   case Instruction::Sub:   // Can only fold on the amount subtracted.
   case Instruction::Shl:   // Can only fold on the shift amount.
   case Instruction::LShr:
   case Instruction::AShr:
     return 1;
   default:
     return 0;              // Cannot fold
   }
 }
 
 /// For the same transformation as the previous function, return the identity
 /// constant that goes into the select.
 static APInt getSelectFoldableConstant(BinaryOperator *I) {
   switch (I->getOpcode()) {
   default: llvm_unreachable("This cannot happen!");
   case Instruction::Add:
   case Instruction::Sub:
   case Instruction::Or:
   case Instruction::Xor:
   case Instruction::Shl:
   case Instruction::LShr:
   case Instruction::AShr:
     return APInt::getNullValue(I->getType()->getScalarSizeInBits());
   case Instruction::And:
     return APInt::getAllOnesValue(I->getType()->getScalarSizeInBits());
   case Instruction::Mul:
     return APInt(I->getType()->getScalarSizeInBits(), 1);
   }
 }
 
 /// We have (select c, TI, FI), and we know that TI and FI have the same opcode.
 Instruction *InstCombiner::foldSelectOpOp(SelectInst &SI, Instruction *TI,
                                           Instruction *FI) {
   // Don't break up min/max patterns. The hasOneUse checks below prevent that
   // for most cases, but vector min/max with bitcasts can be transformed. If the
   // one-use restrictions are eased for other patterns, we still don't want to
   // obfuscate min/max.
   if ((match(&SI, m_SMin(m_Value(), m_Value())) ||
        match(&SI, m_SMax(m_Value(), m_Value())) ||
        match(&SI, m_UMin(m_Value(), m_Value())) ||
        match(&SI, m_UMax(m_Value(), m_Value()))))
     return nullptr;
 
   // If this is a cast from the same type, merge.
   if (TI->getNumOperands() == 1 && TI->isCast()) {
     Type *FIOpndTy = FI->getOperand(0)->getType();
     if (TI->getOperand(0)->getType() != FIOpndTy)
       return nullptr;
 
     // The select condition may be a vector. We may only change the operand
     // type if the vector width remains the same (and matches the condition).
     Type *CondTy = SI.getCondition()->getType();
     if (CondTy->isVectorTy()) {
       if (!FIOpndTy->isVectorTy())
         return nullptr;
       if (CondTy->getVectorNumElements() != FIOpndTy->getVectorNumElements())
         return nullptr;
 
       // TODO: If the backend knew how to deal with casts better, we could
       // remove this limitation. For now, there's too much potential to create
       // worse codegen by promoting the select ahead of size-altering casts
       // (PR28160).
       //
       // Note that ValueTracking's matchSelectPattern() looks through casts
       // without checking 'hasOneUse' when it matches min/max patterns, so this
       // transform may end up happening anyway.
       if (TI->getOpcode() != Instruction::BitCast &&
           (!TI->hasOneUse() || !FI->hasOneUse()))
         return nullptr;
     } else if (!TI->hasOneUse() || !FI->hasOneUse()) {
       // TODO: The one-use restrictions for a scalar select could be eased if
       // the fold of a select in visitLoadInst() was enhanced to match a pattern
       // that includes a cast.
       return nullptr;
     }
 
     // Fold this by inserting a select from the input values.
     Value *NewSI =
         Builder.CreateSelect(SI.getCondition(), TI->getOperand(0),
                              FI->getOperand(0), SI.getName() + ".v", &SI);
     return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI,
                             TI->getType());
   }
 
   // Only handle binary operators with one-use here. As with the cast case
   // above, it may be possible to relax the one-use constraint, but that needs
   // be examined carefully since it may not reduce the total number of
   // instructions.
   BinaryOperator *BO = dyn_cast<BinaryOperator>(TI);
   if (!BO || !TI->hasOneUse() || !FI->hasOneUse())
     return nullptr;
 
   // Figure out if the operations have any operands in common.
   Value *MatchOp, *OtherOpT, *OtherOpF;
   bool MatchIsOpZero;
   if (TI->getOperand(0) == FI->getOperand(0)) {
     MatchOp  = TI->getOperand(0);
     OtherOpT = TI->getOperand(1);
     OtherOpF = FI->getOperand(1);
     MatchIsOpZero = true;
   } else if (TI->getOperand(1) == FI->getOperand(1)) {
     MatchOp  = TI->getOperand(1);
     OtherOpT = TI->getOperand(0);
     OtherOpF = FI->getOperand(0);
     MatchIsOpZero = false;
   } else if (!TI->isCommutative()) {
     return nullptr;
   } else if (TI->getOperand(0) == FI->getOperand(1)) {
     MatchOp  = TI->getOperand(0);
     OtherOpT = TI->getOperand(1);
     OtherOpF = FI->getOperand(0);
     MatchIsOpZero = true;
   } else if (TI->getOperand(1) == FI->getOperand(0)) {
     MatchOp  = TI->getOperand(1);
     OtherOpT = TI->getOperand(0);
     OtherOpF = FI->getOperand(1);
     MatchIsOpZero = true;
   } else {
     return nullptr;
   }
 
   // If we reach here, they do have operations in common.
   Value *NewSI = Builder.CreateSelect(SI.getCondition(), OtherOpT, OtherOpF,
                                       SI.getName() + ".v", &SI);
   Value *Op0 = MatchIsOpZero ? MatchOp : NewSI;
   Value *Op1 = MatchIsOpZero ? NewSI : MatchOp;
   return BinaryOperator::Create(BO->getOpcode(), Op0, Op1);
 }
 
 static bool isSelect01(const APInt &C1I, const APInt &C2I) {
   if (!C1I.isNullValue() && !C2I.isNullValue()) // One side must be zero.
     return false;
   return C1I.isOneValue() || C1I.isAllOnesValue() ||
          C2I.isOneValue() || C2I.isAllOnesValue();
 }
 
 /// Try to fold the select into one of the operands to allow further
 /// optimization.
 Instruction *InstCombiner::foldSelectIntoOp(SelectInst &SI, Value *TrueVal,
                                             Value *FalseVal) {
   // See the comment above GetSelectFoldableOperands for a description of the
   // transformation we are doing here.
   if (auto *TVI = dyn_cast<BinaryOperator>(TrueVal)) {
     if (TVI->hasOneUse() && !isa<Constant>(FalseVal)) {
       if (unsigned SFO = getSelectFoldableOperands(TVI)) {
         unsigned OpToFold = 0;
         if ((SFO & 1) && FalseVal == TVI->getOperand(0)) {
           OpToFold = 1;
         } else if ((SFO & 2) && FalseVal == TVI->getOperand(1)) {
           OpToFold = 2;
         }
 
         if (OpToFold) {
           APInt CI = getSelectFoldableConstant(TVI);
           Value *OOp = TVI->getOperand(2-OpToFold);
           // Avoid creating select between 2 constants unless it's selecting
           // between 0, 1 and -1.
           const APInt *OOpC;
           bool OOpIsAPInt = match(OOp, m_APInt(OOpC));
           if (!isa<Constant>(OOp) || (OOpIsAPInt && isSelect01(CI, *OOpC))) {
             Value *C = ConstantInt::get(OOp->getType(), CI);
             Value *NewSel = Builder.CreateSelect(SI.getCondition(), OOp, C);
             NewSel->takeName(TVI);
             BinaryOperator *BO = BinaryOperator::Create(TVI->getOpcode(),
                                                         FalseVal, NewSel);
             BO->copyIRFlags(TVI);
             return BO;
           }
         }
       }
     }
   }
 
   if (auto *FVI = dyn_cast<BinaryOperator>(FalseVal)) {
     if (FVI->hasOneUse() && !isa<Constant>(TrueVal)) {
       if (unsigned SFO = getSelectFoldableOperands(FVI)) {
         unsigned OpToFold = 0;
         if ((SFO & 1) && TrueVal == FVI->getOperand(0)) {
           OpToFold = 1;
         } else if ((SFO & 2) && TrueVal == FVI->getOperand(1)) {
           OpToFold = 2;
         }
 
         if (OpToFold) {
           APInt CI = getSelectFoldableConstant(FVI);
           Value *OOp = FVI->getOperand(2-OpToFold);
           // Avoid creating select between 2 constants unless it's selecting
           // between 0, 1 and -1.
           const APInt *OOpC;
           bool OOpIsAPInt = match(OOp, m_APInt(OOpC));
           if (!isa<Constant>(OOp) || (OOpIsAPInt && isSelect01(CI, *OOpC))) {
             Value *C = ConstantInt::get(OOp->getType(), CI);
             Value *NewSel = Builder.CreateSelect(SI.getCondition(), C, OOp);
             NewSel->takeName(FVI);
             BinaryOperator *BO = BinaryOperator::Create(FVI->getOpcode(),
                                                         TrueVal, NewSel);
             BO->copyIRFlags(FVI);
             return BO;
           }
         }
       }
     }
   }
 
   return nullptr;
 }
 
 /// We want to turn:
 ///   (select (icmp eq (and X, C1), 0), Y, (or Y, C2))
 /// into:
 ///   (or (shl (and X, C1), C3), Y)
 /// iff:
 ///   C1 and C2 are both powers of 2
 /// where:
 ///   C3 = Log(C2) - Log(C1)
 ///
 /// This transform handles cases where:
 /// 1. The icmp predicate is inverted
 /// 2. The select operands are reversed
 /// 3. The magnitude of C2 and C1 are flipped
 static Value *foldSelectICmpAndOr(const ICmpInst *IC, Value *TrueVal,
                                   Value *FalseVal,
                                   InstCombiner::BuilderTy &Builder) {
   // Only handle integer compares. Also, if this is a vector select, we need a
   // vector compare.
   if (!TrueVal->getType()->isIntOrIntVectorTy() ||
       TrueVal->getType()->isVectorTy() != IC->getType()->isVectorTy())
     return nullptr;
 
   Value *CmpLHS = IC->getOperand(0);
   Value *CmpRHS = IC->getOperand(1);
 
   Value *V;
   unsigned C1Log;
   bool IsEqualZero;
   bool NeedAnd = false;
   if (IC->isEquality()) {
     if (!match(CmpRHS, m_Zero()))
       return nullptr;
 
     const APInt *C1;
     if (!match(CmpLHS, m_And(m_Value(), m_Power2(C1))))
       return nullptr;
 
     V = CmpLHS;
     C1Log = C1->logBase2();
     IsEqualZero = IC->getPredicate() == ICmpInst::ICMP_EQ;
   } else if (IC->getPredicate() == ICmpInst::ICMP_SLT ||
              IC->getPredicate() == ICmpInst::ICMP_SGT) {
     // We also need to recognize (icmp slt (trunc (X)), 0) and
     // (icmp sgt (trunc (X)), -1).
     IsEqualZero = IC->getPredicate() == ICmpInst::ICMP_SGT;
     if ((IsEqualZero && !match(CmpRHS, m_AllOnes())) ||
         (!IsEqualZero && !match(CmpRHS, m_Zero())))
       return nullptr;
 
     if (!match(CmpLHS, m_OneUse(m_Trunc(m_Value(V)))))
       return nullptr;
 
     C1Log = CmpLHS->getType()->getScalarSizeInBits() - 1;
     NeedAnd = true;
   } else {
     return nullptr;
   }
 
   const APInt *C2;
   bool OrOnTrueVal = false;
   bool OrOnFalseVal = match(FalseVal, m_Or(m_Specific(TrueVal), m_Power2(C2)));
   if (!OrOnFalseVal)
     OrOnTrueVal = match(TrueVal, m_Or(m_Specific(FalseVal), m_Power2(C2)));
 
   if (!OrOnFalseVal && !OrOnTrueVal)
     return nullptr;
 
   Value *Y = OrOnFalseVal ? TrueVal : FalseVal;
 
   unsigned C2Log = C2->logBase2();
 
   bool NeedXor = (!IsEqualZero && OrOnFalseVal) || (IsEqualZero && OrOnTrueVal);
   bool NeedShift = C1Log != C2Log;
   bool NeedZExtTrunc = Y->getType()->getScalarSizeInBits() !=
                        V->getType()->getScalarSizeInBits();
 
   // Make sure we don't create more instructions than we save.
   Value *Or = OrOnFalseVal ? FalseVal : TrueVal;
   if ((NeedShift + NeedXor + NeedZExtTrunc) >
       (IC->hasOneUse() + Or->hasOneUse()))
     return nullptr;
 
   if (NeedAnd) {
     // Insert the AND instruction on the input to the truncate.
     APInt C1 = APInt::getOneBitSet(V->getType()->getScalarSizeInBits(), C1Log);
     V = Builder.CreateAnd(V, ConstantInt::get(V->getType(), C1));
   }
 
   if (C2Log > C1Log) {
     V = Builder.CreateZExtOrTrunc(V, Y->getType());
     V = Builder.CreateShl(V, C2Log - C1Log);
   } else if (C1Log > C2Log) {
     V = Builder.CreateLShr(V, C1Log - C2Log);
     V = Builder.CreateZExtOrTrunc(V, Y->getType());
   } else
     V = Builder.CreateZExtOrTrunc(V, Y->getType());
 
   if (NeedXor)
     V = Builder.CreateXor(V, *C2);
 
   return Builder.CreateOr(V, Y);
 }
 
 /// Attempt to fold a cttz/ctlz followed by a icmp plus select into a single
 /// call to cttz/ctlz with flag 'is_zero_undef' cleared.
 ///
 /// For example, we can fold the following code sequence:
 /// \code
 ///   %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true)
 ///   %1 = icmp ne i32 %x, 0
 ///   %2 = select i1 %1, i32 %0, i32 32
 /// \code
 ///
 /// into:
 ///   %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 false)
 static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal,
                                  InstCombiner::BuilderTy &Builder) {
   ICmpInst::Predicate Pred = ICI->getPredicate();
   Value *CmpLHS = ICI->getOperand(0);
   Value *CmpRHS = ICI->getOperand(1);
 
   // Check if the condition value compares a value for equality against zero.
   if (!ICI->isEquality() || !match(CmpRHS, m_Zero()))
     return nullptr;
 
   Value *Count = FalseVal;
   Value *ValueOnZero = TrueVal;
   if (Pred == ICmpInst::ICMP_NE)
     std::swap(Count, ValueOnZero);
 
   // Skip zero extend/truncate.
   Value *V = nullptr;
   if (match(Count, m_ZExt(m_Value(V))) ||
       match(Count, m_Trunc(m_Value(V))))
     Count = V;
 
   // Check if the value propagated on zero is a constant number equal to the
   // sizeof in bits of 'Count'.
   unsigned SizeOfInBits = Count->getType()->getScalarSizeInBits();
   if (!match(ValueOnZero, m_SpecificInt(SizeOfInBits)))
     return nullptr;
 
   // Check that 'Count' is a call to intrinsic cttz/ctlz. Also check that the
   // input to the cttz/ctlz is used as LHS for the compare instruction.
   if (match(Count, m_Intrinsic<Intrinsic::cttz>(m_Specific(CmpLHS))) ||
       match(Count, m_Intrinsic<Intrinsic::ctlz>(m_Specific(CmpLHS)))) {
     IntrinsicInst *II = cast<IntrinsicInst>(Count);
     // Explicitly clear the 'undef_on_zero' flag.
     IntrinsicInst *NewI = cast<IntrinsicInst>(II->clone());
     NewI->setArgOperand(1, ConstantInt::getFalse(NewI->getContext()));
     Builder.Insert(NewI);
     return Builder.CreateZExtOrTrunc(NewI, ValueOnZero->getType());
   }
 
   return nullptr;
 }
 
 /// Return true if we find and adjust an icmp+select pattern where the compare
 /// is with a constant that can be incremented or decremented to match the
 /// minimum or maximum idiom.
 static bool adjustMinMax(SelectInst &Sel, ICmpInst &Cmp) {
   ICmpInst::Predicate Pred = Cmp.getPredicate();
   Value *CmpLHS = Cmp.getOperand(0);
   Value *CmpRHS = Cmp.getOperand(1);
   Value *TrueVal = Sel.getTrueValue();
   Value *FalseVal = Sel.getFalseValue();
 
   // We may move or edit the compare, so make sure the select is the only user.
   const APInt *CmpC;
   if (!Cmp.hasOneUse() || !match(CmpRHS, m_APInt(CmpC)))
     return false;
 
   // These transforms only work for selects of integers or vector selects of
   // integer vectors.
   Type *SelTy = Sel.getType();
   auto *SelEltTy = dyn_cast<IntegerType>(SelTy->getScalarType());
   if (!SelEltTy || SelTy->isVectorTy() != Cmp.getType()->isVectorTy())
     return false;
 
   Constant *AdjustedRHS;
   if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_SGT)
     AdjustedRHS = ConstantInt::get(CmpRHS->getType(), *CmpC + 1);
   else if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_SLT)
     AdjustedRHS = ConstantInt::get(CmpRHS->getType(), *CmpC - 1);
   else
     return false;
 
   // X > C ? X : C+1  -->  X < C+1 ? C+1 : X
   // X < C ? X : C-1  -->  X > C-1 ? C-1 : X
   if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) ||
       (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) {
     ; // Nothing to do here. Values match without any sign/zero extension.
   }
   // Types do not match. Instead of calculating this with mixed types, promote
   // all to the larger type. This enables scalar evolution to analyze this
   // expression.
   else if (CmpRHS->getType()->getScalarSizeInBits() < SelEltTy->getBitWidth()) {
     Constant *SextRHS = ConstantExpr::getSExt(AdjustedRHS, SelTy);
 
     // X = sext x; x >s c ? X : C+1 --> X = sext x; X <s C+1 ? C+1 : X
     // X = sext x; x <s c ? X : C-1 --> X = sext x; X >s C-1 ? C-1 : X
     // X = sext x; x >u c ? X : C+1 --> X = sext x; X <u C+1 ? C+1 : X
     // X = sext x; x <u c ? X : C-1 --> X = sext x; X >u C-1 ? C-1 : X
     if (match(TrueVal, m_SExt(m_Specific(CmpLHS))) && SextRHS == FalseVal) {
       CmpLHS = TrueVal;
       AdjustedRHS = SextRHS;
     } else if (match(FalseVal, m_SExt(m_Specific(CmpLHS))) &&
                SextRHS == TrueVal) {
       CmpLHS = FalseVal;
       AdjustedRHS = SextRHS;
     } else if (Cmp.isUnsigned()) {
       Constant *ZextRHS = ConstantExpr::getZExt(AdjustedRHS, SelTy);
       // X = zext x; x >u c ? X : C+1 --> X = zext x; X <u C+1 ? C+1 : X
       // X = zext x; x <u c ? X : C-1 --> X = zext x; X >u C-1 ? C-1 : X
       // zext + signed compare cannot be changed:
       //    0xff <s 0x00, but 0x00ff >s 0x0000
       if (match(TrueVal, m_ZExt(m_Specific(CmpLHS))) && ZextRHS == FalseVal) {
         CmpLHS = TrueVal;
         AdjustedRHS = ZextRHS;
       } else if (match(FalseVal, m_ZExt(m_Specific(CmpLHS))) &&
                  ZextRHS == TrueVal) {
         CmpLHS = FalseVal;
         AdjustedRHS = ZextRHS;
       } else {
         return false;
       }
     } else {
       return false;
     }
   } else {
     return false;
   }
 
   Pred = ICmpInst::getSwappedPredicate(Pred);
   CmpRHS = AdjustedRHS;
   std::swap(FalseVal, TrueVal);
   Cmp.setPredicate(Pred);
   Cmp.setOperand(0, CmpLHS);
   Cmp.setOperand(1, CmpRHS);
   Sel.setOperand(1, TrueVal);
   Sel.setOperand(2, FalseVal);
   Sel.swapProfMetadata();
 
   // Move the compare instruction right before the select instruction. Otherwise
   // the sext/zext value may be defined after the compare instruction uses it.
   Cmp.moveBefore(&Sel);
 
   return true;
 }
 
 /// If this is an integer min/max (icmp + select) with a constant operand,
 /// create the canonical icmp for the min/max operation and canonicalize the
 /// constant to the 'false' operand of the select:
 /// select (icmp Pred X, C1), C2, X --> select (icmp Pred' X, C2), X, C2
 /// Note: if C1 != C2, this will change the icmp constant to the existing
 /// constant operand of the select.
 static Instruction *
 canonicalizeMinMaxWithConstant(SelectInst &Sel, ICmpInst &Cmp,
                                InstCombiner::BuilderTy &Builder) {
   if (!Cmp.hasOneUse() || !isa<Constant>(Cmp.getOperand(1)))
     return nullptr;
 
   // Canonicalize the compare predicate based on whether we have min or max.
   Value *LHS, *RHS;
   ICmpInst::Predicate NewPred;
   SelectPatternResult SPR = matchSelectPattern(&Sel, LHS, RHS);
   switch (SPR.Flavor) {
   case SPF_SMIN: NewPred = ICmpInst::ICMP_SLT; break;
   case SPF_UMIN: NewPred = ICmpInst::ICMP_ULT; break;
   case SPF_SMAX: NewPred = ICmpInst::ICMP_SGT; break;
   case SPF_UMAX: NewPred = ICmpInst::ICMP_UGT; break;
   default: return nullptr;
   }
 
   // Is this already canonical?
   if (Cmp.getOperand(0) == LHS && Cmp.getOperand(1) == RHS &&
       Cmp.getPredicate() == NewPred)
     return nullptr;
 
   // Create the canonical compare and plug it into the select.
   Sel.setCondition(Builder.CreateICmp(NewPred, LHS, RHS));
 
   // If the select operands did not change, we're done.
   if (Sel.getTrueValue() == LHS && Sel.getFalseValue() == RHS)
     return &Sel;
 
   // If we are swapping the select operands, swap the metadata too.
   assert(Sel.getTrueValue() == RHS && Sel.getFalseValue() == LHS &&
          "Unexpected results from matchSelectPattern");
   Sel.setTrueValue(LHS);
   Sel.setFalseValue(RHS);
   Sel.swapProfMetadata();
   return &Sel;
 }
 
 /// Visit a SelectInst that has an ICmpInst as its first operand.
 Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI,
                                                   ICmpInst *ICI) {
   Value *TrueVal = SI.getTrueValue();
   Value *FalseVal = SI.getFalseValue();
 
   if (Instruction *NewSel = canonicalizeMinMaxWithConstant(SI, *ICI, Builder))
     return NewSel;
 
   bool Changed = adjustMinMax(SI, *ICI);
 
   ICmpInst::Predicate Pred = ICI->getPredicate();
   Value *CmpLHS = ICI->getOperand(0);
   Value *CmpRHS = ICI->getOperand(1);
 
   // Transform (X >s -1) ? C1 : C2 --> ((X >>s 31) & (C2 - C1)) + C1
   // and       (X <s  0) ? C2 : C1 --> ((X >>s 31) & (C2 - C1)) + C1
   // FIXME: Type and constness constraints could be lifted, but we have to
   //        watch code size carefully. We should consider xor instead of
   //        sub/add when we decide to do that.
   // TODO: Merge this with foldSelectICmpAnd somehow.
   if (CmpLHS->getType()->isIntOrIntVectorTy() &&
       CmpLHS->getType() == TrueVal->getType()) {
     const APInt *C1, *C2;
     if (match(TrueVal, m_APInt(C1)) && match(FalseVal, m_APInt(C2))) {
       ICmpInst::Predicate Pred = ICI->getPredicate();
       Value *X;
       APInt Mask;
       if (decomposeBitTestICmp(CmpLHS, CmpRHS, Pred, X, Mask, false)) {
         if (Mask.isSignMask()) {
           assert(X == CmpLHS && "Expected to use the compare input directly");
           assert(ICmpInst::isEquality(Pred) && "Expected equality predicate");
 
           if (Pred == ICmpInst::ICMP_NE)
             std::swap(C1, C2);
 
           // This shift results in either -1 or 0.
           Value *AShr = Builder.CreateAShr(X, Mask.getBitWidth() - 1);
 
           // Check if we can express the operation with a single or.
           if (C2->isAllOnesValue())
             return replaceInstUsesWith(SI, Builder.CreateOr(AShr, *C1));
 
           Value *And = Builder.CreateAnd(AShr, *C2 - *C1);
           return replaceInstUsesWith(SI, Builder.CreateAdd(And,
                                         ConstantInt::get(And->getType(), *C1)));
         }
       }
     }
   }
 
   {
     const APInt *TrueValC, *FalseValC;
     if (match(TrueVal, m_APInt(TrueValC)) &&
         match(FalseVal, m_APInt(FalseValC)))
       if (Value *V = foldSelectICmpAnd(SI.getType(), ICI, *TrueValC,
                                        *FalseValC, Builder))
         return replaceInstUsesWith(SI, V);
   }
 
   // NOTE: if we wanted to, this is where to detect integer MIN/MAX
 
   if (CmpRHS != CmpLHS && isa<Constant>(CmpRHS)) {
     if (CmpLHS == TrueVal && Pred == ICmpInst::ICMP_EQ) {
       // Transform (X == C) ? X : Y -> (X == C) ? C : Y
       SI.setOperand(1, CmpRHS);
       Changed = true;
     } else if (CmpLHS == FalseVal && Pred == ICmpInst::ICMP_NE) {
       // Transform (X != C) ? Y : X -> (X != C) ? Y : C
       SI.setOperand(2, CmpRHS);
       Changed = true;
     }
   }
 
   // FIXME: This code is nearly duplicated in InstSimplify. Using/refactoring
   // decomposeBitTestICmp() might help.
   {
     unsigned BitWidth =
         DL.getTypeSizeInBits(TrueVal->getType()->getScalarType());
     APInt MinSignedValue = APInt::getSignedMinValue(BitWidth);
     Value *X;
     const APInt *Y, *C;
     bool TrueWhenUnset;
     bool IsBitTest = false;
     if (ICmpInst::isEquality(Pred) &&
         match(CmpLHS, m_And(m_Value(X), m_Power2(Y))) &&
         match(CmpRHS, m_Zero())) {
       IsBitTest = true;
       TrueWhenUnset = Pred == ICmpInst::ICMP_EQ;
     } else if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, m_Zero())) {
       X = CmpLHS;
       Y = &MinSignedValue;
       IsBitTest = true;
       TrueWhenUnset = false;
     } else if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, m_AllOnes())) {
       X = CmpLHS;
       Y = &MinSignedValue;
       IsBitTest = true;
       TrueWhenUnset = true;
     }
     if (IsBitTest) {
       Value *V = nullptr;
       // (X & Y) == 0 ? X : X ^ Y  --> X & ~Y
       if (TrueWhenUnset && TrueVal == X &&
           match(FalseVal, m_Xor(m_Specific(X), m_APInt(C))) && *Y == *C)
         V = Builder.CreateAnd(X, ~(*Y));
       // (X & Y) != 0 ? X ^ Y : X  --> X & ~Y
       else if (!TrueWhenUnset && FalseVal == X &&
                match(TrueVal, m_Xor(m_Specific(X), m_APInt(C))) && *Y == *C)
         V = Builder.CreateAnd(X, ~(*Y));
       // (X & Y) == 0 ? X ^ Y : X  --> X | Y
       else if (TrueWhenUnset && FalseVal == X &&
                match(TrueVal, m_Xor(m_Specific(X), m_APInt(C))) && *Y == *C)
         V = Builder.CreateOr(X, *Y);
       // (X & Y) != 0 ? X : X ^ Y  --> X | Y
       else if (!TrueWhenUnset && TrueVal == X &&
                match(FalseVal, m_Xor(m_Specific(X), m_APInt(C))) && *Y == *C)
         V = Builder.CreateOr(X, *Y);
 
       if (V)
         return replaceInstUsesWith(SI, V);
     }
   }
 
   if (Value *V = foldSelectICmpAndOr(ICI, TrueVal, FalseVal, Builder))
     return replaceInstUsesWith(SI, V);
 
   if (Value *V = foldSelectCttzCtlz(ICI, TrueVal, FalseVal, Builder))
     return replaceInstUsesWith(SI, V);
 
   return Changed ? &SI : nullptr;
 }
 
 
 /// SI is a select whose condition is a PHI node (but the two may be in
 /// different blocks). See if the true/false values (V) are live in all of the
 /// predecessor blocks of the PHI. For example, cases like this can't be mapped:
 ///
 ///   X = phi [ C1, BB1], [C2, BB2]
 ///   Y = add
 ///   Z = select X, Y, 0
 ///
 /// because Y is not live in BB1/BB2.
 static bool canSelectOperandBeMappingIntoPredBlock(const Value *V,
                                                    const SelectInst &SI) {
   // If the value is a non-instruction value like a constant or argument, it
   // can always be mapped.
   const Instruction *I = dyn_cast<Instruction>(V);
   if (!I) return true;
 
   // If V is a PHI node defined in the same block as the condition PHI, we can
   // map the arguments.
   const PHINode *CondPHI = cast<PHINode>(SI.getCondition());
 
   if (const PHINode *VP = dyn_cast<PHINode>(I))
     if (VP->getParent() == CondPHI->getParent())
       return true;
 
   // Otherwise, if the PHI and select are defined in the same block and if V is
   // defined in a different block, then we can transform it.
   if (SI.getParent() == CondPHI->getParent() &&
       I->getParent() != CondPHI->getParent())
     return true;
 
   // Otherwise we have a 'hard' case and we can't tell without doing more
   // detailed dominator based analysis, punt.
   return false;
 }
 
 /// We have an SPF (e.g. a min or max) of an SPF of the form:
 ///   SPF2(SPF1(A, B), C)
 Instruction *InstCombiner::foldSPFofSPF(Instruction *Inner,
                                         SelectPatternFlavor SPF1,
                                         Value *A, Value *B,
                                         Instruction &Outer,
                                         SelectPatternFlavor SPF2, Value *C) {
   if (Outer.getType() != Inner->getType())
     return nullptr;
 
   if (C == A || C == B) {
     // MAX(MAX(A, B), B) -> MAX(A, B)
     // MIN(MIN(a, b), a) -> MIN(a, b)
     if (SPF1 == SPF2)
       return replaceInstUsesWith(Outer, Inner);
 
     // MAX(MIN(a, b), a) -> a
     // MIN(MAX(a, b), a) -> a
     if ((SPF1 == SPF_SMIN && SPF2 == SPF_SMAX) ||
         (SPF1 == SPF_SMAX && SPF2 == SPF_SMIN) ||
         (SPF1 == SPF_UMIN && SPF2 == SPF_UMAX) ||
         (SPF1 == SPF_UMAX && SPF2 == SPF_UMIN))
       return replaceInstUsesWith(Outer, C);
   }
 
   if (SPF1 == SPF2) {
     const APInt *CB, *CC;
     if (match(B, m_APInt(CB)) && match(C, m_APInt(CC))) {
       // MIN(MIN(A, 23), 97) -> MIN(A, 23)
       // MAX(MAX(A, 97), 23) -> MAX(A, 97)
       if ((SPF1 == SPF_UMIN && CB->ule(*CC)) ||
           (SPF1 == SPF_SMIN && CB->sle(*CC)) ||
           (SPF1 == SPF_UMAX && CB->uge(*CC)) ||
           (SPF1 == SPF_SMAX && CB->sge(*CC)))
         return replaceInstUsesWith(Outer, Inner);
 
       // MIN(MIN(A, 97), 23) -> MIN(A, 23)
       // MAX(MAX(A, 23), 97) -> MAX(A, 97)
       if ((SPF1 == SPF_UMIN && CB->ugt(*CC)) ||
           (SPF1 == SPF_SMIN && CB->sgt(*CC)) ||
           (SPF1 == SPF_UMAX && CB->ult(*CC)) ||
           (SPF1 == SPF_SMAX && CB->slt(*CC))) {
         Outer.replaceUsesOfWith(Inner, A);
         return &Outer;
       }
     }
   }
 
   // ABS(ABS(X)) -> ABS(X)
   // NABS(NABS(X)) -> NABS(X)
   if (SPF1 == SPF2 && (SPF1 == SPF_ABS || SPF1 == SPF_NABS)) {
     return replaceInstUsesWith(Outer, Inner);
   }
 
   // ABS(NABS(X)) -> ABS(X)
   // NABS(ABS(X)) -> NABS(X)
   if ((SPF1 == SPF_ABS && SPF2 == SPF_NABS) ||
       (SPF1 == SPF_NABS && SPF2 == SPF_ABS)) {
     SelectInst *SI = cast<SelectInst>(Inner);
     Value *NewSI =
         Builder.CreateSelect(SI->getCondition(), SI->getFalseValue(),
                              SI->getTrueValue(), SI->getName(), SI);
     return replaceInstUsesWith(Outer, NewSI);
   }
 
   auto IsFreeOrProfitableToInvert =
       [&](Value *V, Value *&NotV, bool &ElidesXor) {
     if (match(V, m_Not(m_Value(NotV)))) {
       // If V has at most 2 uses then we can get rid of the xor operation
       // entirely.
       ElidesXor |= !V->hasNUsesOrMore(3);
       return true;
     }
 
     if (IsFreeToInvert(V, !V->hasNUsesOrMore(3))) {
       NotV = nullptr;
       return true;
     }
 
     return false;
   };
 
   Value *NotA, *NotB, *NotC;
   bool ElidesXor = false;
 
   // MIN(MIN(~A, ~B), ~C) == ~MAX(MAX(A, B), C)
   // MIN(MAX(~A, ~B), ~C) == ~MAX(MIN(A, B), C)
   // MAX(MIN(~A, ~B), ~C) == ~MIN(MAX(A, B), C)
   // MAX(MAX(~A, ~B), ~C) == ~MIN(MIN(A, B), C)
   //
   // This transform is performance neutral if we can elide at least one xor from
   // the set of three operands, since we'll be tacking on an xor at the very
   // end.
   if (SelectPatternResult::isMinOrMax(SPF1) &&
       SelectPatternResult::isMinOrMax(SPF2) &&
       IsFreeOrProfitableToInvert(A, NotA, ElidesXor) &&
       IsFreeOrProfitableToInvert(B, NotB, ElidesXor) &&
       IsFreeOrProfitableToInvert(C, NotC, ElidesXor) && ElidesXor) {
     if (!NotA)
       NotA = Builder.CreateNot(A);
     if (!NotB)
       NotB = Builder.CreateNot(B);
     if (!NotC)
       NotC = Builder.CreateNot(C);
 
     Value *NewInner = generateMinMaxSelectPattern(
         Builder, getInverseMinMaxSelectPattern(SPF1), NotA, NotB);
     Value *NewOuter = Builder.CreateNot(generateMinMaxSelectPattern(
         Builder, getInverseMinMaxSelectPattern(SPF2), NewInner, NotC));
     return replaceInstUsesWith(Outer, NewOuter);
   }
 
   return nullptr;
 }
 
 /// Turn select C, (X + Y), (X - Y) --> (X + (select C, Y, (-Y))).
 /// This is even legal for FP.
 static Instruction *foldAddSubSelect(SelectInst &SI,
                                      InstCombiner::BuilderTy &Builder) {
   Value *CondVal = SI.getCondition();
   Value *TrueVal = SI.getTrueValue();
   Value *FalseVal = SI.getFalseValue();
   auto *TI = dyn_cast<Instruction>(TrueVal);
   auto *FI = dyn_cast<Instruction>(FalseVal);
   if (!TI || !FI || !TI->hasOneUse() || !FI->hasOneUse())
     return nullptr;
 
   Instruction *AddOp = nullptr, *SubOp = nullptr;
   if ((TI->getOpcode() == Instruction::Sub &&
        FI->getOpcode() == Instruction::Add) ||
       (TI->getOpcode() == Instruction::FSub &&
        FI->getOpcode() == Instruction::FAdd)) {
     AddOp = FI;
     SubOp = TI;
   } else if ((FI->getOpcode() == Instruction::Sub &&
               TI->getOpcode() == Instruction::Add) ||
              (FI->getOpcode() == Instruction::FSub &&
               TI->getOpcode() == Instruction::FAdd)) {
     AddOp = TI;
     SubOp = FI;
   }
 
   if (AddOp) {
     Value *OtherAddOp = nullptr;
     if (SubOp->getOperand(0) == AddOp->getOperand(0)) {
       OtherAddOp = AddOp->getOperand(1);
     } else if (SubOp->getOperand(0) == AddOp->getOperand(1)) {
       OtherAddOp = AddOp->getOperand(0);
     }
 
     if (OtherAddOp) {
       // So at this point we know we have (Y -> OtherAddOp):
       //        select C, (add X, Y), (sub X, Z)
       Value *NegVal; // Compute -Z
       if (SI.getType()->isFPOrFPVectorTy()) {
         NegVal = Builder.CreateFNeg(SubOp->getOperand(1));
         if (Instruction *NegInst = dyn_cast<Instruction>(NegVal)) {
           FastMathFlags Flags = AddOp->getFastMathFlags();
           Flags &= SubOp->getFastMathFlags();
           NegInst->setFastMathFlags(Flags);
         }
       } else {
         NegVal = Builder.CreateNeg(SubOp->getOperand(1));
       }
 
       Value *NewTrueOp = OtherAddOp;
       Value *NewFalseOp = NegVal;
       if (AddOp != TI)
         std::swap(NewTrueOp, NewFalseOp);
       Value *NewSel = Builder.CreateSelect(CondVal, NewTrueOp, NewFalseOp,
                                            SI.getName() + ".p", &SI);
 
       if (SI.getType()->isFPOrFPVectorTy()) {
         Instruction *RI =
             BinaryOperator::CreateFAdd(SubOp->getOperand(0), NewSel);
 
         FastMathFlags Flags = AddOp->getFastMathFlags();
         Flags &= SubOp->getFastMathFlags();
         RI->setFastMathFlags(Flags);
         return RI;
       } else
         return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel);
     }
   }
   return nullptr;
 }
 
 Instruction *InstCombiner::foldSelectExtConst(SelectInst &Sel) {
   Instruction *ExtInst;
   if (!match(Sel.getTrueValue(), m_Instruction(ExtInst)) &&
       !match(Sel.getFalseValue(), m_Instruction(ExtInst)))
     return nullptr;
 
   auto ExtOpcode = ExtInst->getOpcode();
   if (ExtOpcode != Instruction::ZExt && ExtOpcode != Instruction::SExt)
     return nullptr;
 
   // TODO: Handle larger types? That requires adjusting FoldOpIntoSelect too.
   Value *X = ExtInst->getOperand(0);
   Type *SmallType = X->getType();
   if (!SmallType->isIntOrIntVectorTy(1))
     return nullptr;
 
   Constant *C;
   if (!match(Sel.getTrueValue(), m_Constant(C)) &&
       !match(Sel.getFalseValue(), m_Constant(C)))
     return nullptr;
 
   // If the constant is the same after truncation to the smaller type and
   // extension to the original type, we can narrow the select.
   Value *Cond = Sel.getCondition();
   Type *SelType = Sel.getType();
   Constant *TruncC = ConstantExpr::getTrunc(C, SmallType);
   Constant *ExtC = ConstantExpr::getCast(ExtOpcode, TruncC, SelType);
   if (ExtC == C) {
     Value *TruncCVal = cast<Value>(TruncC);
     if (ExtInst == Sel.getFalseValue())
       std::swap(X, TruncCVal);
 
     // select Cond, (ext X), C --> ext(select Cond, X, C')
     // select Cond, C, (ext X) --> ext(select Cond, C', X)
     Value *NewSel = Builder.CreateSelect(Cond, X, TruncCVal, "narrow", &Sel);
     return CastInst::Create(Instruction::CastOps(ExtOpcode), NewSel, SelType);
   }
 
   // If one arm of the select is the extend of the condition, replace that arm
   // with the extension of the appropriate known bool value.
   if (Cond == X) {
     if (ExtInst == Sel.getTrueValue()) {
       // select X, (sext X), C --> select X, -1, C
       // select X, (zext X), C --> select X,  1, C
       Constant *One = ConstantInt::getTrue(SmallType);
       Constant *AllOnesOrOne = ConstantExpr::getCast(ExtOpcode, One, SelType);
       return SelectInst::Create(Cond, AllOnesOrOne, C, "", nullptr, &Sel);
     } else {
       // select X, C, (sext X) --> select X, C, 0
       // select X, C, (zext X) --> select X, C, 0
       Constant *Zero = ConstantInt::getNullValue(SelType);
       return SelectInst::Create(Cond, C, Zero, "", nullptr, &Sel);
     }
   }
 
   return nullptr;
 }
 
 /// Try to transform a vector select with a constant condition vector into a
 /// shuffle for easier combining with other shuffles and insert/extract.
 static Instruction *canonicalizeSelectToShuffle(SelectInst &SI) {
   Value *CondVal = SI.getCondition();
   Constant *CondC;
   if (!CondVal->getType()->isVectorTy() || !match(CondVal, m_Constant(CondC)))
     return nullptr;
 
   unsigned NumElts = CondVal->getType()->getVectorNumElements();
   SmallVector<Constant *, 16> Mask;
   Mask.reserve(NumElts);
   Type *Int32Ty = Type::getInt32Ty(CondVal->getContext());
   for (unsigned i = 0; i != NumElts; ++i) {
     Constant *Elt = CondC->getAggregateElement(i);
     if (!Elt)
       return nullptr;
 
     if (Elt->isOneValue()) {
       // If the select condition element is true, choose from the 1st vector.
       Mask.push_back(ConstantInt::get(Int32Ty, i));
     } else if (Elt->isNullValue()) {
       // If the select condition element is false, choose from the 2nd vector.
       Mask.push_back(ConstantInt::get(Int32Ty, i + NumElts));
     } else if (isa<UndefValue>(Elt)) {
       // Undef in a select condition (choose one of the operands) does not mean
       // the same thing as undef in a shuffle mask (any value is acceptable), so
       // give up.
       return nullptr;
     } else {
       // Bail out on a constant expression.
       return nullptr;
     }
   }
 
   return new ShuffleVectorInst(SI.getTrueValue(), SI.getFalseValue(),
                                ConstantVector::get(Mask));
 }
 
 /// Reuse bitcasted operands between a compare and select:
 /// select (cmp (bitcast C), (bitcast D)), (bitcast' C), (bitcast' D) -->
 /// bitcast (select (cmp (bitcast C), (bitcast D)), (bitcast C), (bitcast D))
 static Instruction *foldSelectCmpBitcasts(SelectInst &Sel,
                                           InstCombiner::BuilderTy &Builder) {
   Value *Cond = Sel.getCondition();
   Value *TVal = Sel.getTrueValue();
   Value *FVal = Sel.getFalseValue();
 
   CmpInst::Predicate Pred;
   Value *A, *B;
   if (!match(Cond, m_Cmp(Pred, m_Value(A), m_Value(B))))
     return nullptr;
 
   // The select condition is a compare instruction. If the select's true/false
   // values are already the same as the compare operands, there's nothing to do.
   if (TVal == A || TVal == B || FVal == A || FVal == B)
     return nullptr;
 
   Value *C, *D;
   if (!match(A, m_BitCast(m_Value(C))) || !match(B, m_BitCast(m_Value(D))))
     return nullptr;
 
   // select (cmp (bitcast C), (bitcast D)), (bitcast TSrc), (bitcast FSrc)
   Value *TSrc, *FSrc;
   if (!match(TVal, m_BitCast(m_Value(TSrc))) ||
       !match(FVal, m_BitCast(m_Value(FSrc))))
     return nullptr;
 
   // If the select true/false values are *different bitcasts* of the same source
   // operands, make the select operands the same as the compare operands and
   // cast the result. This is the canonical select form for min/max.
   Value *NewSel;
   if (TSrc == C && FSrc == D) {
     // select (cmp (bitcast C), (bitcast D)), (bitcast' C), (bitcast' D) -->
     // bitcast (select (cmp A, B), A, B)
     NewSel = Builder.CreateSelect(Cond, A, B, "", &Sel);
   } else if (TSrc == D && FSrc == C) {
     // select (cmp (bitcast C), (bitcast D)), (bitcast' D), (bitcast' C) -->
     // bitcast (select (cmp A, B), B, A)
     NewSel = Builder.CreateSelect(Cond, B, A, "", &Sel);
   } else {
     return nullptr;
   }
   return CastInst::CreateBitOrPointerCast(NewSel, Sel.getType());
 }
 
 /// Try to eliminate select instructions that test the returned flag of cmpxchg
 /// instructions.
 ///
 /// If a select instruction tests the returned flag of a cmpxchg instruction and
 /// selects between the returned value of the cmpxchg instruction its compare
 /// operand, the result of the select will always be equal to its false value.
 /// For example:
 ///
 ///   %0 = cmpxchg i64* %ptr, i64 %compare, i64 %new_value seq_cst seq_cst
 ///   %1 = extractvalue { i64, i1 } %0, 1
 ///   %2 = extractvalue { i64, i1 } %0, 0
 ///   %3 = select i1 %1, i64 %compare, i64 %2
 ///   ret i64 %3
 ///
 /// The returned value of the cmpxchg instruction (%2) is the original value
 /// located at %ptr prior to any update. If the cmpxchg operation succeeds, %2
 /// must have been equal to %compare. Thus, the result of the select is always
 /// equal to %2, and the code can be simplified to:
 ///
 ///   %0 = cmpxchg i64* %ptr, i64 %compare, i64 %new_value seq_cst seq_cst
 ///   %1 = extractvalue { i64, i1 } %0, 0
 ///   ret i64 %1
 ///
 static Instruction *foldSelectCmpXchg(SelectInst &SI) {
   // A helper that determines if V is an extractvalue instruction whose
   // aggregate operand is a cmpxchg instruction and whose single index is equal
   // to I. If such conditions are true, the helper returns the cmpxchg
   // instruction; otherwise, a nullptr is returned.
   auto isExtractFromCmpXchg = [](Value *V, unsigned I) -> AtomicCmpXchgInst * {
     auto *Extract = dyn_cast<ExtractValueInst>(V);
     if (!Extract)
       return nullptr;
     if (Extract->getIndices()[0] != I)
       return nullptr;
     return dyn_cast<AtomicCmpXchgInst>(Extract->getAggregateOperand());
   };
 
   // If the select has a single user, and this user is a select instruction that
   // we can simplify, skip the cmpxchg simplification for now.
   if (SI.hasOneUse())
     if (auto *Select = dyn_cast<SelectInst>(SI.user_back()))
       if (Select->getCondition() == SI.getCondition())
         if (Select->getFalseValue() == SI.getTrueValue() ||
             Select->getTrueValue() == SI.getFalseValue())
           return nullptr;
 
   // Ensure the select condition is the returned flag of a cmpxchg instruction.
   auto *CmpXchg = isExtractFromCmpXchg(SI.getCondition(), 1);
   if (!CmpXchg)
     return nullptr;
 
   // Check the true value case: The true value of the select is the returned
   // value of the same cmpxchg used by the condition, and the false value is the
   // cmpxchg instruction's compare operand.
   if (auto *X = isExtractFromCmpXchg(SI.getTrueValue(), 0))
     if (X == CmpXchg && X->getCompareOperand() == SI.getFalseValue()) {
       SI.setTrueValue(SI.getFalseValue());
       return &SI;
     }
 
   // Check the false value case: The false value of the select is the returned
   // value of the same cmpxchg used by the condition, and the true value is the
   // cmpxchg instruction's compare operand.
   if (auto *X = isExtractFromCmpXchg(SI.getFalseValue(), 0))
     if (X == CmpXchg && X->getCompareOperand() == SI.getTrueValue()) {
       SI.setTrueValue(SI.getFalseValue());
       return &SI;
     }
 
   return nullptr;
 }
 
 Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
   Value *CondVal = SI.getCondition();
   Value *TrueVal = SI.getTrueValue();
   Value *FalseVal = SI.getFalseValue();
   Type *SelType = SI.getType();
 
   // FIXME: Remove this workaround when freeze related patches are done.
   // For select with undef operand which feeds into an equality comparison,
   // don't simplify it so loop unswitch can know the equality comparison
   // may have an undef operand. This is a workaround for PR31652 caused by
   // descrepancy about branch on undef between LoopUnswitch and GVN.
   if (isa<UndefValue>(TrueVal) || isa<UndefValue>(FalseVal)) {
     if (llvm::any_of(SI.users(), [&](User *U) {
           ICmpInst *CI = dyn_cast<ICmpInst>(U);
           if (CI && CI->isEquality())
             return true;
           return false;
         })) {
       return nullptr;
     }
   }
 
   if (Value *V = SimplifySelectInst(CondVal, TrueVal, FalseVal,
                                     SQ.getWithInstruction(&SI)))
     return replaceInstUsesWith(SI, V);
 
   if (Instruction *I = canonicalizeSelectToShuffle(SI))
     return I;
 
   // Canonicalize a one-use integer compare with a non-canonical predicate by
   // inverting the predicate and swapping the select operands. This matches a
   // compare canonicalization for conditional branches.
   // TODO: Should we do the same for FP compares?
   CmpInst::Predicate Pred;
   if (match(CondVal, m_OneUse(m_ICmp(Pred, m_Value(), m_Value()))) &&
       !isCanonicalPredicate(Pred)) {
     // Swap true/false values and condition.
     CmpInst *Cond = cast<CmpInst>(CondVal);
     Cond->setPredicate(CmpInst::getInversePredicate(Pred));
     SI.setOperand(1, FalseVal);
     SI.setOperand(2, TrueVal);
     SI.swapProfMetadata();
     Worklist.Add(Cond);
     return &SI;
   }
 
   if (SelType->isIntOrIntVectorTy(1) &&
       TrueVal->getType() == CondVal->getType()) {
     if (match(TrueVal, m_One())) {
       // Change: A = select B, true, C --> A = or B, C
       return BinaryOperator::CreateOr(CondVal, FalseVal);
     }
     if (match(TrueVal, m_Zero())) {
       // Change: A = select B, false, C --> A = and !B, C
       Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName());
       return BinaryOperator::CreateAnd(NotCond, FalseVal);
     }
     if (match(FalseVal, m_Zero())) {
       // Change: A = select B, C, false --> A = and B, C
       return BinaryOperator::CreateAnd(CondVal, TrueVal);
     }
     if (match(FalseVal, m_One())) {
       // Change: A = select B, C, true --> A = or !B, C
       Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName());
       return BinaryOperator::CreateOr(NotCond, TrueVal);
     }
 
     // select a, a, b  -> a | b
     // select a, b, a  -> a & b
     if (CondVal == TrueVal)
       return BinaryOperator::CreateOr(CondVal, FalseVal);
     if (CondVal == FalseVal)
       return BinaryOperator::CreateAnd(CondVal, TrueVal);
 
     // select a, ~a, b -> (~a) & b
     // select a, b, ~a -> (~a) | b
     if (match(TrueVal, m_Not(m_Specific(CondVal))))
       return BinaryOperator::CreateAnd(TrueVal, FalseVal);
     if (match(FalseVal, m_Not(m_Specific(CondVal))))
       return BinaryOperator::CreateOr(TrueVal, FalseVal);
   }
 
   // Selecting between two integer or vector splat integer constants?
   //
   // Note that we don't handle a scalar select of vectors:
   // select i1 %c, <2 x i8> <1, 1>, <2 x i8> <0, 0>
   // because that may need 3 instructions to splat the condition value:
   // extend, insertelement, shufflevector.
   if (SelType->isIntOrIntVectorTy() &&
       CondVal->getType()->isVectorTy() == SelType->isVectorTy()) {
     // select C, 1, 0 -> zext C to int
     if (match(TrueVal, m_One()) && match(FalseVal, m_Zero()))
       return new ZExtInst(CondVal, SelType);
 
     // select C, -1, 0 -> sext C to int
     if (match(TrueVal, m_AllOnes()) && match(FalseVal, m_Zero()))
       return new SExtInst(CondVal, SelType);
 
     // select C, 0, 1 -> zext !C to int
     if (match(TrueVal, m_Zero()) && match(FalseVal, m_One())) {
       Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName());
       return new ZExtInst(NotCond, SelType);
     }
 
     // select C, 0, -1 -> sext !C to int
     if (match(TrueVal, m_Zero()) && match(FalseVal, m_AllOnes())) {
       Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName());
       return new SExtInst(NotCond, SelType);
     }
   }
 
   // See if we are selecting two values based on a comparison of the two values.
   if (FCmpInst *FCI = dyn_cast<FCmpInst>(CondVal)) {
     if (FCI->getOperand(0) == TrueVal && FCI->getOperand(1) == FalseVal) {
       // Transform (X == Y) ? X : Y  -> Y
       if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) {
         // This is not safe in general for floating point:
         // consider X== -0, Y== +0.
         // It becomes safe if either operand is a nonzero constant.
         ConstantFP *CFPt, *CFPf;
         if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
               !CFPt->getValueAPF().isZero()) ||
             ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
              !CFPf->getValueAPF().isZero()))
         return replaceInstUsesWith(SI, FalseVal);
       }
       // Transform (X une Y) ? X : Y  -> X
       if (FCI->getPredicate() == FCmpInst::FCMP_UNE) {
         // This is not safe in general for floating point:
         // consider X== -0, Y== +0.
         // It becomes safe if either operand is a nonzero constant.
         ConstantFP *CFPt, *CFPf;
         if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
               !CFPt->getValueAPF().isZero()) ||
             ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
              !CFPf->getValueAPF().isZero()))
         return replaceInstUsesWith(SI, TrueVal);
       }
 
       // Canonicalize to use ordered comparisons by swapping the select
       // operands.
       //
       // e.g.
       // (X ugt Y) ? X : Y -> (X ole Y) ? Y : X
       if (FCI->hasOneUse() && FCmpInst::isUnordered(FCI->getPredicate())) {
         FCmpInst::Predicate InvPred = FCI->getInversePredicate();
         IRBuilder<>::FastMathFlagGuard FMFG(Builder);
         Builder.setFastMathFlags(FCI->getFastMathFlags());
         Value *NewCond = Builder.CreateFCmp(InvPred, TrueVal, FalseVal,
                                             FCI->getName() + ".inv");
 
         return SelectInst::Create(NewCond, FalseVal, TrueVal,
                                   SI.getName() + ".p");
       }
 
       // NOTE: if we wanted to, this is where to detect MIN/MAX
     } else if (FCI->getOperand(0) == FalseVal && FCI->getOperand(1) == TrueVal){
       // Transform (X == Y) ? Y : X  -> X
       if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) {
         // This is not safe in general for floating point:
         // consider X== -0, Y== +0.
         // It becomes safe if either operand is a nonzero constant.
         ConstantFP *CFPt, *CFPf;
         if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
               !CFPt->getValueAPF().isZero()) ||
             ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
              !CFPf->getValueAPF().isZero()))
           return replaceInstUsesWith(SI, FalseVal);
       }
       // Transform (X une Y) ? Y : X  -> Y
       if (FCI->getPredicate() == FCmpInst::FCMP_UNE) {
         // This is not safe in general for floating point:
         // consider X== -0, Y== +0.
         // It becomes safe if either operand is a nonzero constant.
         ConstantFP *CFPt, *CFPf;
         if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
               !CFPt->getValueAPF().isZero()) ||
             ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
              !CFPf->getValueAPF().isZero()))
           return replaceInstUsesWith(SI, TrueVal);
       }
 
       // Canonicalize to use ordered comparisons by swapping the select
       // operands.
       //
       // e.g.
       // (X ugt Y) ? X : Y -> (X ole Y) ? X : Y
       if (FCI->hasOneUse() && FCmpInst::isUnordered(FCI->getPredicate())) {
         FCmpInst::Predicate InvPred = FCI->getInversePredicate();
         IRBuilder<>::FastMathFlagGuard FMFG(Builder);
         Builder.setFastMathFlags(FCI->getFastMathFlags());
         Value *NewCond = Builder.CreateFCmp(InvPred, FalseVal, TrueVal,
                                             FCI->getName() + ".inv");
 
         return SelectInst::Create(NewCond, FalseVal, TrueVal,
                                   SI.getName() + ".p");
       }
 
       // NOTE: if we wanted to, this is where to detect MIN/MAX
     }
     // NOTE: if we wanted to, this is where to detect ABS
   }
 
   // See if we are selecting two values based on a comparison of the two values.
   if (ICmpInst *ICI = dyn_cast<ICmpInst>(CondVal))
     if (Instruction *Result = foldSelectInstWithICmp(SI, ICI))
       return Result;
 
   if (Instruction *Add = foldAddSubSelect(SI, Builder))
     return Add;
 
   // Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z))
   auto *TI = dyn_cast<Instruction>(TrueVal);
   auto *FI = dyn_cast<Instruction>(FalseVal);
   if (TI && FI && TI->getOpcode() == FI->getOpcode())
     if (Instruction *IV = foldSelectOpOp(SI, TI, FI))
       return IV;
 
   if (Instruction *I = foldSelectExtConst(SI))
     return I;
 
   // See if we can fold the select into one of our operands.
   if (SelType->isIntOrIntVectorTy() || SelType->isFPOrFPVectorTy()) {
     if (Instruction *FoldI = foldSelectIntoOp(SI, TrueVal, FalseVal))
       return FoldI;
 
     Value *LHS, *RHS, *LHS2, *RHS2;
     Instruction::CastOps CastOp;
     SelectPatternResult SPR = matchSelectPattern(&SI, LHS, RHS, &CastOp);
     auto SPF = SPR.Flavor;
 
     if (SelectPatternResult::isMinOrMax(SPF)) {
       // Canonicalize so that
       // - type casts are outside select patterns.
       // - float clamp is transformed to min/max pattern
 
       bool IsCastNeeded = LHS->getType() != SelType;
       Value *CmpLHS = cast<CmpInst>(CondVal)->getOperand(0);
       Value *CmpRHS = cast<CmpInst>(CondVal)->getOperand(1);
       if (IsCastNeeded ||
           (LHS->getType()->isFPOrFPVectorTy() &&
            ((CmpLHS != LHS && CmpLHS != RHS) ||
             (CmpRHS != LHS && CmpRHS != RHS)))) {
         CmpInst::Predicate Pred = getCmpPredicateForMinMax(SPF, SPR.Ordered);
 
         Value *Cmp;
         if (CmpInst::isIntPredicate(Pred)) {
           Cmp = Builder.CreateICmp(Pred, LHS, RHS);
         } else {
           IRBuilder<>::FastMathFlagGuard FMFG(Builder);
           auto FMF = cast<FPMathOperator>(SI.getCondition())->getFastMathFlags();
           Builder.setFastMathFlags(FMF);
           Cmp = Builder.CreateFCmp(Pred, LHS, RHS);
         }
 
         Value *NewSI = Builder.CreateSelect(Cmp, LHS, RHS, SI.getName(), &SI);
         if (!IsCastNeeded)
           return replaceInstUsesWith(SI, NewSI);
 
         Value *NewCast = Builder.CreateCast(CastOp, NewSI, SelType);
         return replaceInstUsesWith(SI, NewCast);
       }
     }
 
     if (SPF) {
       // MAX(MAX(a, b), a) -> MAX(a, b)
       // MIN(MIN(a, b), a) -> MIN(a, b)
       // MAX(MIN(a, b), a) -> a
       // MIN(MAX(a, b), a) -> a
       // ABS(ABS(a)) -> ABS(a)
       // NABS(NABS(a)) -> NABS(a)
       if (SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2).Flavor)
         if (Instruction *R = foldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2,
                                           SI, SPF, RHS))
           return R;
       if (SelectPatternFlavor SPF2 = matchSelectPattern(RHS, LHS2, RHS2).Flavor)
         if (Instruction *R = foldSPFofSPF(cast<Instruction>(RHS),SPF2,LHS2,RHS2,
                                           SI, SPF, LHS))
           return R;
     }
 
     // MAX(~a, ~b) -> ~MIN(a, b)
     if ((SPF == SPF_SMAX || SPF == SPF_UMAX) &&
         IsFreeToInvert(LHS, LHS->hasNUses(2)) &&
         IsFreeToInvert(RHS, RHS->hasNUses(2))) {
       // For this transform to be profitable, we need to eliminate at least two
       // 'not' instructions if we're going to add one 'not' instruction.
       int NumberOfNots =
           (LHS->hasNUses(2) && match(LHS, m_Not(m_Value()))) +
           (RHS->hasNUses(2) && match(RHS, m_Not(m_Value()))) +
           (SI.hasOneUse() && match(*SI.user_begin(), m_Not(m_Value())));
 
       if (NumberOfNots >= 2) {
         Value *NewLHS = Builder.CreateNot(LHS);
         Value *NewRHS = Builder.CreateNot(RHS);
         Value *NewCmp = SPF == SPF_SMAX ? Builder.CreateICmpSLT(NewLHS, NewRHS)
                                         : Builder.CreateICmpULT(NewLHS, NewRHS);
         Value *NewSI =
             Builder.CreateNot(Builder.CreateSelect(NewCmp, NewLHS, NewRHS));
         return replaceInstUsesWith(SI, NewSI);
       }
     }
 
     // TODO.
     // ABS(-X) -> ABS(X)
   }
 
   // See if we can fold the select into a phi node if the condition is a select.
   if (auto *PN = dyn_cast<PHINode>(SI.getCondition()))
     // The true/false values have to be live in the PHI predecessor's blocks.
     if (canSelectOperandBeMappingIntoPredBlock(TrueVal, SI) &&
         canSelectOperandBeMappingIntoPredBlock(FalseVal, SI))
       if (Instruction *NV = foldOpIntoPhi(SI, PN))
         return NV;
 
   if (SelectInst *TrueSI = dyn_cast<SelectInst>(TrueVal)) {
     if (TrueSI->getCondition()->getType() == CondVal->getType()) {
       // select(C, select(C, a, b), c) -> select(C, a, c)
       if (TrueSI->getCondition() == CondVal) {
         if (SI.getTrueValue() == TrueSI->getTrueValue())
           return nullptr;
         SI.setOperand(1, TrueSI->getTrueValue());
         return &SI;
       }
       // select(C0, select(C1, a, b), b) -> select(C0&C1, a, b)
       // We choose this as normal form to enable folding on the And and shortening
       // paths for the values (this helps GetUnderlyingObjects() for example).
       if (TrueSI->getFalseValue() == FalseVal && TrueSI->hasOneUse()) {
         Value *And = Builder.CreateAnd(CondVal, TrueSI->getCondition());
         SI.setOperand(0, And);
         SI.setOperand(1, TrueSI->getTrueValue());
         return &SI;
       }
     }
   }
   if (SelectInst *FalseSI = dyn_cast<SelectInst>(FalseVal)) {
     if (FalseSI->getCondition()->getType() == CondVal->getType()) {
       // select(C, a, select(C, b, c)) -> select(C, a, c)
       if (FalseSI->getCondition() == CondVal) {
         if (SI.getFalseValue() == FalseSI->getFalseValue())
           return nullptr;
         SI.setOperand(2, FalseSI->getFalseValue());
         return &SI;
       }
       // select(C0, a, select(C1, a, b)) -> select(C0|C1, a, b)
       if (FalseSI->getTrueValue() == TrueVal && FalseSI->hasOneUse()) {
         Value *Or = Builder.CreateOr(CondVal, FalseSI->getCondition());
         SI.setOperand(0, Or);
         SI.setOperand(2, FalseSI->getFalseValue());
         return &SI;
       }
     }
   }
 
+  auto canMergeSelectThroughBinop = [](BinaryOperator *BO) {
+    // The select might be preventing a division by 0.
+    switch (BO->getOpcode()) {
+    default:
+      return true;
+    case Instruction::SRem:
+    case Instruction::URem:
+    case Instruction::SDiv:
+    case Instruction::UDiv:
+      return false;
+    }
+  };
+
   // Try to simplify a binop sandwiched between 2 selects with the same
   // condition.
   // select(C, binop(select(C, X, Y), W), Z) -> select(C, binop(X, W), Z)
   BinaryOperator *TrueBO;
-  if (match(TrueVal, m_OneUse(m_BinOp(TrueBO)))) {
+  if (match(TrueVal, m_OneUse(m_BinOp(TrueBO))) &&
+      canMergeSelectThroughBinop(TrueBO)) {
     if (auto *TrueBOSI = dyn_cast<SelectInst>(TrueBO->getOperand(0))) {
       if (TrueBOSI->getCondition() == CondVal) {
         TrueBO->setOperand(0, TrueBOSI->getTrueValue());
         Worklist.Add(TrueBO);
         return &SI;
       }
     }
     if (auto *TrueBOSI = dyn_cast<SelectInst>(TrueBO->getOperand(1))) {
       if (TrueBOSI->getCondition() == CondVal) {
         TrueBO->setOperand(1, TrueBOSI->getTrueValue());
         Worklist.Add(TrueBO);
         return &SI;
       }
     }
   }
 
   // select(C, Z, binop(select(C, X, Y), W)) -> select(C, Z, binop(Y, W))
   BinaryOperator *FalseBO;
-  if (match(FalseVal, m_OneUse(m_BinOp(FalseBO)))) {
+  if (match(FalseVal, m_OneUse(m_BinOp(FalseBO))) &&
+      canMergeSelectThroughBinop(FalseBO)) {
     if (auto *FalseBOSI = dyn_cast<SelectInst>(FalseBO->getOperand(0))) {
       if (FalseBOSI->getCondition() == CondVal) {
         FalseBO->setOperand(0, FalseBOSI->getFalseValue());
         Worklist.Add(FalseBO);
         return &SI;
       }
     }
     if (auto *FalseBOSI = dyn_cast<SelectInst>(FalseBO->getOperand(1))) {
       if (FalseBOSI->getCondition() == CondVal) {
         FalseBO->setOperand(1, FalseBOSI->getFalseValue());
         Worklist.Add(FalseBO);
         return &SI;
       }
     }
   }
 
   if (BinaryOperator::isNot(CondVal)) {
     SI.setOperand(0, BinaryOperator::getNotArgument(CondVal));
     SI.setOperand(1, FalseVal);
     SI.setOperand(2, TrueVal);
     return &SI;
   }
 
   if (VectorType *VecTy = dyn_cast<VectorType>(SelType)) {
     unsigned VWidth = VecTy->getNumElements();
     APInt UndefElts(VWidth, 0);
     APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
     if (Value *V = SimplifyDemandedVectorElts(&SI, AllOnesEltMask, UndefElts)) {
       if (V != &SI)
         return replaceInstUsesWith(SI, V);
       return &SI;
     }
   }
 
   // See if we can determine the result of this select based on a dominating
   // condition.
   BasicBlock *Parent = SI.getParent();
   if (BasicBlock *Dom = Parent->getSinglePredecessor()) {
     auto *PBI = dyn_cast_or_null<BranchInst>(Dom->getTerminator());
     if (PBI && PBI->isConditional() &&
         PBI->getSuccessor(0) != PBI->getSuccessor(1) &&
         (PBI->getSuccessor(0) == Parent || PBI->getSuccessor(1) == Parent)) {
       bool CondIsTrue = PBI->getSuccessor(0) == Parent;
       Optional<bool> Implication = isImpliedCondition(
           PBI->getCondition(), SI.getCondition(), DL, CondIsTrue);
       if (Implication) {
         Value *V = *Implication ? TrueVal : FalseVal;
         return replaceInstUsesWith(SI, V);
       }
     }
   }
 
   // If we can compute the condition, there's no need for a select.
   // Like the above fold, we are attempting to reduce compile-time cost by
   // putting this fold here with limitations rather than in InstSimplify.
   // The motivation for this call into value tracking is to take advantage of
   // the assumption cache, so make sure that is populated.
   if (!CondVal->getType()->isVectorTy() && !AC.assumptions().empty()) {
     KnownBits Known(1);
     computeKnownBits(CondVal, Known, 0, &SI);
     if (Known.One.isOneValue())
       return replaceInstUsesWith(SI, TrueVal);
     if (Known.Zero.isOneValue())
       return replaceInstUsesWith(SI, FalseVal);
   }
 
   if (Instruction *BitCastSel = foldSelectCmpBitcasts(SI, Builder))
     return BitCastSel;
 
   // Simplify selects that test the returned flag of cmpxchg instructions.
   if (Instruction *Select = foldSelectCmpXchg(SI))
     return Select;
 
   return nullptr;
 }
Index: head/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
===================================================================
--- head/contrib/llvm/lib/Transforms/Scalar/LICM.cpp	(revision 329982)
+++ head/contrib/llvm/lib/Transforms/Scalar/LICM.cpp	(revision 329983)
@@ -1,1561 +1,1576 @@
 //===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // This pass performs loop invariant code motion, attempting to remove as much
 // code from the body of a loop as possible.  It does this by either hoisting
 // code into the preheader block, or by sinking code to the exit blocks if it is
 // safe.  This pass also promotes must-aliased memory locations in the loop to
 // live in registers, thus hoisting and sinking "invariant" loads and stores.
 //
 // This pass uses alias analysis for two purposes:
 //
 //  1. Moving loop invariant loads and calls out of loops.  If we can determine
 //     that a load or call inside of a loop never aliases anything stored to,
 //     we can hoist it or sink it like any other instruction.
 //  2. Scalar Promotion of Memory - If there is a store instruction inside of
 //     the loop, we try to move the store to happen AFTER the loop instead of
 //     inside of the loop.  This can only happen if a few conditions are true:
 //       A. The pointer stored through is loop invariant
 //       B. There are no stores or loads in the loop which _may_ alias the
 //          pointer.  There are no calls in the loop which mod/ref the pointer.
 //     If these conditions are true, we can promote the loads and stores in the
 //     loop of the pointer to use a temporary alloca'd variable.  We then use
 //     the SSAUpdater to construct the appropriate SSA form for the value.
 //
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Scalar/LICM.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AliasSetTracker.h"
 #include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/CaptureTracking.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/MemorySSA.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/PredIteratorCache.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Scalar/LoopPassManager.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
 #include <algorithm>
 #include <utility>
 using namespace llvm;
 
 #define DEBUG_TYPE "licm"
 
 STATISTIC(NumSunk, "Number of instructions sunk out of loop");
 STATISTIC(NumHoisted, "Number of instructions hoisted out of loop");
 STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
 STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk");
 STATISTIC(NumPromoted, "Number of memory locations promoted to registers");
 
 /// Memory promotion is enabled by default.
 static cl::opt<bool>
     DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false),
                      cl::desc("Disable memory promotion in LICM pass"));
 
 static cl::opt<uint32_t> MaxNumUsesTraversed(
     "licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
     cl::desc("Max num uses visited for identifying load "
              "invariance in loop using invariant start (default = 8)"));
 
 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
                                   const LoopSafetyInfo *SafetyInfo,
                                   TargetTransformInfo *TTI, bool &FreeInLoop);
 static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
                   const LoopSafetyInfo *SafetyInfo,
                   OptimizationRemarkEmitter *ORE);
 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
-                 const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
+                 const Loop *CurLoop, LoopSafetyInfo *SafetyInfo,
                  OptimizationRemarkEmitter *ORE, bool FreeInLoop);
 static bool isSafeToExecuteUnconditionally(Instruction &Inst,
                                            const DominatorTree *DT,
                                            const Loop *CurLoop,
                                            const LoopSafetyInfo *SafetyInfo,
                                            OptimizationRemarkEmitter *ORE,
                                            const Instruction *CtxI = nullptr);
 static bool pointerInvalidatedByLoop(Value *V, uint64_t Size,
                                      const AAMDNodes &AAInfo,
                                      AliasSetTracker *CurAST);
 static Instruction *
 CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,
                             const LoopInfo *LI,
                             const LoopSafetyInfo *SafetyInfo);
 
 namespace {
 struct LoopInvariantCodeMotion {
   bool runOnLoop(Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT,
                  TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
                  ScalarEvolution *SE, MemorySSA *MSSA,
                  OptimizationRemarkEmitter *ORE, bool DeleteAST);
 
   DenseMap<Loop *, AliasSetTracker *> &getLoopToAliasSetMap() {
     return LoopToAliasSetMap;
   }
 
 private:
   DenseMap<Loop *, AliasSetTracker *> LoopToAliasSetMap;
 
   AliasSetTracker *collectAliasInfoForLoop(Loop *L, LoopInfo *LI,
                                            AliasAnalysis *AA);
 };
 
 struct LegacyLICMPass : public LoopPass {
   static char ID; // Pass identification, replacement for typeid
   LegacyLICMPass() : LoopPass(ID) {
     initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry());
   }
 
   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
     if (skipLoop(L)) {
       // If we have run LICM on a previous loop but now we are skipping
       // (because we've hit the opt-bisect limit), we need to clear the
       // loop alias information.
       for (auto &LTAS : LICM.getLoopToAliasSetMap())
         delete LTAS.second;
       LICM.getLoopToAliasSetMap().clear();
       return false;
     }
 
     auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
     MemorySSA *MSSA = EnableMSSALoopDependency
                           ? (&getAnalysis<MemorySSAWrapperPass>().getMSSA())
                           : nullptr;
     // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
     // pass.  Function analyses need to be preserved across loop transformations
     // but ORE cannot be preserved (see comment before the pass definition).
     OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
     return LICM.runOnLoop(L,
                           &getAnalysis<AAResultsWrapperPass>().getAAResults(),
                           &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
                           &getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
                           &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
                           &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
                               *L->getHeader()->getParent()),
                           SE ? &SE->getSE() : nullptr, MSSA, &ORE, false);
   }
 
   /// This transformation requires natural loop information & requires that
   /// loop preheaders be inserted into the CFG...
   ///
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
     AU.addRequired<TargetLibraryInfoWrapperPass>();
     if (EnableMSSALoopDependency)
       AU.addRequired<MemorySSAWrapperPass>();
     AU.addRequired<TargetTransformInfoWrapperPass>();
     getLoopAnalysisUsage(AU);
   }
 
   using llvm::Pass::doFinalization;
 
   bool doFinalization() override {
     assert(LICM.getLoopToAliasSetMap().empty() &&
            "Didn't free loop alias sets");
     return false;
   }
 
 private:
   LoopInvariantCodeMotion LICM;
 
   /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
   void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To,
                                Loop *L) override;
 
   /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
   /// set.
   void deleteAnalysisValue(Value *V, Loop *L) override;
 
   /// Simple Analysis hook. Delete loop L from alias set map.
   void deleteAnalysisLoop(Loop *L) override;
 };
 } // namespace
 
 PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
                                 LoopStandardAnalysisResults &AR, LPMUpdater &) {
   const auto &FAM =
       AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
   Function *F = L.getHeader()->getParent();
 
   auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
   // FIXME: This should probably be optional rather than required.
   if (!ORE)
     report_fatal_error("LICM: OptimizationRemarkEmitterAnalysis not "
                        "cached at a higher level");
 
   LoopInvariantCodeMotion LICM;
   if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.TTI, &AR.SE,
                       AR.MSSA, ORE, true))
     return PreservedAnalyses::all();
 
   auto PA = getLoopPassPreservedAnalyses();
   PA.preserveSet<CFGAnalyses>();
   return PA;
 }
 
 char LegacyLICMPass::ID = 0;
 INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion",
                       false, false)
 INITIALIZE_PASS_DEPENDENCY(LoopPass)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
 INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
                     false)
 
 Pass *llvm::createLICMPass() { return new LegacyLICMPass(); }
 
 /// Hoist expressions out of the specified loop. Note, alias info for inner
 /// loop is not preserved so it is not a good idea to run LICM multiple
 /// times on one loop.
 /// We should delete AST for inner loops in the new pass manager to avoid
 /// memory leak.
 ///
 bool LoopInvariantCodeMotion::runOnLoop(
     Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT,
     TargetLibraryInfo *TLI, TargetTransformInfo *TTI, ScalarEvolution *SE,
     MemorySSA *MSSA, OptimizationRemarkEmitter *ORE, bool DeleteAST) {
   bool Changed = false;
 
   assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
 
   AliasSetTracker *CurAST = collectAliasInfoForLoop(L, LI, AA);
 
   // Get the preheader block to move instructions into...
   BasicBlock *Preheader = L->getLoopPreheader();
 
   // Compute loop safety information.
   LoopSafetyInfo SafetyInfo;
   computeLoopSafetyInfo(&SafetyInfo, L);
 
   // We want to visit all of the instructions in this loop... that are not parts
   // of our subloops (they have already had their invariants hoisted out of
   // their loop, into this loop, so there is no need to process the BODIES of
   // the subloops).
   //
   // Traverse the body of the loop in depth first order on the dominator tree so
   // that we are guaranteed to see definitions before we see uses.  This allows
   // us to sink instructions in one pass, without iteration.  After sinking
   // instructions, we perform another pass to hoist them out of the loop.
   //
   if (L->hasDedicatedExits())
     Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
                           CurAST, &SafetyInfo, ORE);
   if (Preheader)
     Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
                            CurAST, &SafetyInfo, ORE);
 
   // Now that all loop invariants have been removed from the loop, promote any
   // memory references to scalars that we can.
   // Don't sink stores from loops without dedicated block exits. Exits
   // containing indirect branches are not transformed by loop simplify,
   // make sure we catch that. An additional load may be generated in the
   // preheader for SSA updater, so also avoid sinking when no preheader
   // is available.
   if (!DisablePromotion && Preheader && L->hasDedicatedExits()) {
     // Figure out the loop exits and their insertion points
     SmallVector<BasicBlock *, 8> ExitBlocks;
     L->getUniqueExitBlocks(ExitBlocks);
 
     // We can't insert into a catchswitch.
     bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) {
       return isa<CatchSwitchInst>(Exit->getTerminator());
     });
 
     if (!HasCatchSwitch) {
       SmallVector<Instruction *, 8> InsertPts;
       InsertPts.reserve(ExitBlocks.size());
       for (BasicBlock *ExitBlock : ExitBlocks)
         InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
 
       PredIteratorCache PIC;
 
       bool Promoted = false;
 
       // Loop over all of the alias sets in the tracker object.
       for (AliasSet &AS : *CurAST) {
         // We can promote this alias set if it has a store, if it is a "Must"
         // alias set, if the pointer is loop invariant, and if we are not
         // eliminating any volatile loads or stores.
         if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
             AS.isVolatile() || !L->isLoopInvariant(AS.begin()->getValue()))
           continue;
 
         assert(
             !AS.empty() &&
             "Must alias set should have at least one pointer element in it!");
 
         SmallSetVector<Value *, 8> PointerMustAliases;
         for (const auto &ASI : AS)
           PointerMustAliases.insert(ASI.getValue());
 
         Promoted |= promoteLoopAccessesToScalars(PointerMustAliases, ExitBlocks,
                                                  InsertPts, PIC, LI, DT, TLI, L,
                                                  CurAST, &SafetyInfo, ORE);
       }
 
       // Once we have promoted values across the loop body we have to
       // recursively reform LCSSA as any nested loop may now have values defined
       // within the loop used in the outer loop.
       // FIXME: This is really heavy handed. It would be a bit better to use an
       // SSAUpdater strategy during promotion that was LCSSA aware and reformed
       // it as it went.
       if (Promoted)
         formLCSSARecursively(*L, *DT, LI, SE);
 
       Changed |= Promoted;
     }
   }
 
   // Check that neither this loop nor its parent have had LCSSA broken. LICM is
   // specifically moving instructions across the loop boundary and so it is
   // especially in need of sanity checking here.
   assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!");
   assert((!L->getParentLoop() || L->getParentLoop()->isLCSSAForm(*DT)) &&
          "Parent loop not left in LCSSA form after LICM!");
 
   // If this loop is nested inside of another one, save the alias information
   // for when we process the outer loop.
   if (L->getParentLoop() && !DeleteAST)
     LoopToAliasSetMap[L] = CurAST;
   else
     delete CurAST;
 
   if (Changed && SE)
     SE->forgetLoopDispositions(L);
   return Changed;
 }
 
 /// Walk the specified region of the CFG (defined by all blocks dominated by
 /// the specified block, and that are in the current loop) in reverse depth
 /// first order w.r.t the DominatorTree.  This allows us to visit uses before
 /// definitions, allowing us to sink a loop body in one pass without iteration.
 ///
 bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
                       DominatorTree *DT, TargetLibraryInfo *TLI,
                       TargetTransformInfo *TTI, Loop *CurLoop,
                       AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo,
                       OptimizationRemarkEmitter *ORE) {
 
   // Verify inputs.
   assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
          CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr &&
          "Unexpected input to sinkRegion");
 
   // We want to visit children before parents. We will enque all the parents
   // before their children in the worklist and process the worklist in reverse
   // order.
   SmallVector<DomTreeNode *, 16> Worklist = collectChildrenInLoop(N, CurLoop);
 
   bool Changed = false;
   for (DomTreeNode *DTN : reverse(Worklist)) {
     BasicBlock *BB = DTN->getBlock();
     // Only need to process the contents of this block if it is not part of a
     // subloop (which would already have been processed).
     if (inSubLoop(BB, CurLoop, LI))
       continue;
 
     for (BasicBlock::iterator II = BB->end(); II != BB->begin();) {
       Instruction &I = *--II;
 
       // If the instruction is dead, we would try to sink it because it isn't
       // used in the loop, instead, just delete it.
       if (isInstructionTriviallyDead(&I, TLI)) {
         DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
         ++II;
         CurAST->deleteValue(&I);
         I.eraseFromParent();
         Changed = true;
         continue;
       }
 
       // Check to see if we can sink this instruction to the exit blocks
       // of the loop.  We can do this if the all users of the instruction are
       // outside of the loop.  In this case, it doesn't even matter if the
       // operands of the instruction are loop invariant.
       //
       bool FreeInLoop = false;
       if (isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) &&
           canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE)) {
         if (sink(I, LI, DT, CurLoop, SafetyInfo, ORE, FreeInLoop)) {
           if (!FreeInLoop) {
             ++II;
             CurAST->deleteValue(&I);
             I.eraseFromParent();
           }
           Changed = true;
         }
       }
     }
   }
   return Changed;
 }
 
 /// Walk the specified region of the CFG (defined by all blocks dominated by
 /// the specified block, and that are in the current loop) in depth first
 /// order w.r.t the DominatorTree.  This allows us to visit definitions before
 /// uses, allowing us to hoist a loop body in one pass without iteration.
 ///
 bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
                        DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
                        AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo,
                        OptimizationRemarkEmitter *ORE) {
   // Verify inputs.
   assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
          CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr &&
          "Unexpected input to hoistRegion");
 
   // We want to visit parents before children. We will enque all the parents
   // before their children in the worklist and process the worklist in order.
   SmallVector<DomTreeNode *, 16> Worklist = collectChildrenInLoop(N, CurLoop);
 
   bool Changed = false;
   for (DomTreeNode *DTN : Worklist) {
     BasicBlock *BB = DTN->getBlock();
     // Only need to process the contents of this block if it is not part of a
     // subloop (which would already have been processed).
     if (!inSubLoop(BB, CurLoop, LI))
       for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
         Instruction &I = *II++;
         // Try constant folding this instruction.  If all the operands are
         // constants, it is technically hoistable, but it would be better to
         // just fold it.
         if (Constant *C = ConstantFoldInstruction(
                 &I, I.getModule()->getDataLayout(), TLI)) {
           DEBUG(dbgs() << "LICM folding inst: " << I << "  --> " << *C << '\n');
           CurAST->copyValue(&I, C);
           I.replaceAllUsesWith(C);
           if (isInstructionTriviallyDead(&I, TLI)) {
             CurAST->deleteValue(&I);
             I.eraseFromParent();
           }
           Changed = true;
           continue;
         }
 
         // Attempt to remove floating point division out of the loop by
         // converting it to a reciprocal multiplication.
         if (I.getOpcode() == Instruction::FDiv &&
             CurLoop->isLoopInvariant(I.getOperand(1)) &&
             I.hasAllowReciprocal()) {
           auto Divisor = I.getOperand(1);
           auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0);
           auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor);
           ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags());
           ReciprocalDivisor->insertBefore(&I);
 
           auto Product =
               BinaryOperator::CreateFMul(I.getOperand(0), ReciprocalDivisor);
           Product->setFastMathFlags(I.getFastMathFlags());
           Product->insertAfter(&I);
           I.replaceAllUsesWith(Product);
           I.eraseFromParent();
 
           hoist(*ReciprocalDivisor, DT, CurLoop, SafetyInfo, ORE);
           Changed = true;
           continue;
         }
 
         // Try hoisting the instruction out to the preheader.  We can only do
         // this if all of the operands of the instruction are loop invariant and
         // if it is safe to hoist the instruction.
         //
         if (CurLoop->hasLoopInvariantOperands(&I) &&
             canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE) &&
             isSafeToExecuteUnconditionally(
                 I, DT, CurLoop, SafetyInfo, ORE,
                 CurLoop->getLoopPreheader()->getTerminator()))
           Changed |= hoist(I, DT, CurLoop, SafetyInfo, ORE);
       }
   }
 
   return Changed;
 }
 
 /// Computes loop safety information, checks loop body & header
 /// for the possibility of may throw exception.
 ///
 void llvm::computeLoopSafetyInfo(LoopSafetyInfo *SafetyInfo, Loop *CurLoop) {
   assert(CurLoop != nullptr && "CurLoop cant be null");
   BasicBlock *Header = CurLoop->getHeader();
   // Setting default safety values.
   SafetyInfo->MayThrow = false;
   SafetyInfo->HeaderMayThrow = false;
   // Iterate over header and compute safety info.
   for (BasicBlock::iterator I = Header->begin(), E = Header->end();
        (I != E) && !SafetyInfo->HeaderMayThrow; ++I)
     SafetyInfo->HeaderMayThrow |=
         !isGuaranteedToTransferExecutionToSuccessor(&*I);
 
   SafetyInfo->MayThrow = SafetyInfo->HeaderMayThrow;
   // Iterate over loop instructions and compute safety info.
   // Skip header as it has been computed and stored in HeaderMayThrow.
   // The first block in loopinfo.Blocks is guaranteed to be the header.
   assert(Header == *CurLoop->getBlocks().begin() &&
          "First block must be header");
   for (Loop::block_iterator BB = std::next(CurLoop->block_begin()),
                             BBE = CurLoop->block_end();
        (BB != BBE) && !SafetyInfo->MayThrow; ++BB)
     for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end();
          (I != E) && !SafetyInfo->MayThrow; ++I)
       SafetyInfo->MayThrow |= !isGuaranteedToTransferExecutionToSuccessor(&*I);
 
   // Compute funclet colors if we might sink/hoist in a function with a funclet
   // personality routine.
   Function *Fn = CurLoop->getHeader()->getParent();
   if (Fn->hasPersonalityFn())
     if (Constant *PersonalityFn = Fn->getPersonalityFn())
       if (isFuncletEHPersonality(classifyEHPersonality(PersonalityFn)))
         SafetyInfo->BlockColors = colorEHFunclets(*Fn);
 }
 
 // Return true if LI is invariant within scope of the loop. LI is invariant if
 // CurLoop is dominated by an invariant.start representing the same memory
 // location and size as the memory location LI loads from, and also the
 // invariant.start has no uses.
 static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
                                   Loop *CurLoop) {
   Value *Addr = LI->getOperand(0);
   const DataLayout &DL = LI->getModule()->getDataLayout();
   const uint32_t LocSizeInBits = DL.getTypeSizeInBits(
       cast<PointerType>(Addr->getType())->getElementType());
 
   // if the type is i8 addrspace(x)*, we know this is the type of
   // llvm.invariant.start operand
   auto *PtrInt8Ty = PointerType::get(Type::getInt8Ty(LI->getContext()),
                                      LI->getPointerAddressSpace());
   unsigned BitcastsVisited = 0;
   // Look through bitcasts until we reach the i8* type (this is invariant.start
   // operand type).
   while (Addr->getType() != PtrInt8Ty) {
     auto *BC = dyn_cast<BitCastInst>(Addr);
     // Avoid traversing high number of bitcast uses.
     if (++BitcastsVisited > MaxNumUsesTraversed || !BC)
       return false;
     Addr = BC->getOperand(0);
   }
 
   unsigned UsesVisited = 0;
   // Traverse all uses of the load operand value, to see if invariant.start is
   // one of the uses, and whether it dominates the load instruction.
   for (auto *U : Addr->users()) {
     // Avoid traversing for Load operand with high number of users.
     if (++UsesVisited > MaxNumUsesTraversed)
       return false;
     IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
     // If there are escaping uses of invariant.start instruction, the load maybe
     // non-invariant.
     if (!II || II->getIntrinsicID() != Intrinsic::invariant_start ||
         !II->use_empty())
       continue;
     unsigned InvariantSizeInBits =
         cast<ConstantInt>(II->getArgOperand(0))->getSExtValue() * 8;
     // Confirm the invariant.start location size contains the load operand size
     // in bits. Also, the invariant.start should dominate the load, and we
     // should not hoist the load out of a loop that contains this dominating
     // invariant.start.
     if (LocSizeInBits <= InvariantSizeInBits &&
         DT->properlyDominates(II->getParent(), CurLoop->getHeader()))
       return true;
   }
 
   return false;
 }
 
 bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
                               Loop *CurLoop, AliasSetTracker *CurAST,
                               LoopSafetyInfo *SafetyInfo,
                               OptimizationRemarkEmitter *ORE) {
   // SafetyInfo is nullptr if we are checking for sinking from preheader to
   // loop body.
   const bool SinkingToLoopBody = !SafetyInfo;
   // Loads have extra constraints we have to verify before we can hoist them.
   if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
     if (!LI->isUnordered())
       return false; // Don't sink/hoist volatile or ordered atomic loads!
 
     // Loads from constant memory are always safe to move, even if they end up
     // in the same alias set as something that ends up being modified.
     if (AA->pointsToConstantMemory(LI->getOperand(0)))
       return true;
     if (LI->getMetadata(LLVMContext::MD_invariant_load))
       return true;
 
     if (LI->isAtomic() && SinkingToLoopBody)
       return false; // Don't sink unordered atomic loads to loop body.
 
     // This checks for an invariant.start dominating the load.
     if (isLoadInvariantInLoop(LI, DT, CurLoop))
       return true;
 
     // Don't hoist loads which have may-aliased stores in loop.
     uint64_t Size = 0;
     if (LI->getType()->isSized())
       Size = I.getModule()->getDataLayout().getTypeStoreSize(LI->getType());
 
     AAMDNodes AAInfo;
     LI->getAAMetadata(AAInfo);
 
     bool Invalidated =
         pointerInvalidatedByLoop(LI->getOperand(0), Size, AAInfo, CurAST);
     // Check loop-invariant address because this may also be a sinkable load
     // whose address is not necessarily loop-invariant.
     if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand()))
       ORE->emit([&]() {
         return OptimizationRemarkMissed(
                    DEBUG_TYPE, "LoadWithLoopInvariantAddressInvalidated", LI)
                << "failed to move load with loop-invariant address "
                   "because the loop may invalidate its value";
       });
 
     return !Invalidated;
   } else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
     // Don't sink or hoist dbg info; it's legal, but not useful.
     if (isa<DbgInfoIntrinsic>(I))
       return false;
 
     // Don't sink calls which can throw.
     if (CI->mayThrow())
       return false;
 
     // Handle simple cases by querying alias analysis.
     FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI);
     if (Behavior == FMRB_DoesNotAccessMemory)
       return true;
     if (AliasAnalysis::onlyReadsMemory(Behavior)) {
       // A readonly argmemonly function only reads from memory pointed to by
       // it's arguments with arbitrary offsets.  If we can prove there are no
       // writes to this memory in the loop, we can hoist or sink.
       if (AliasAnalysis::onlyAccessesArgPointees(Behavior)) {
         for (Value *Op : CI->arg_operands())
           if (Op->getType()->isPointerTy() &&
               pointerInvalidatedByLoop(Op, MemoryLocation::UnknownSize,
                                        AAMDNodes(), CurAST))
             return false;
         return true;
       }
       // If this call only reads from memory and there are no writes to memory
       // in the loop, we can hoist or sink the call as appropriate.
       bool FoundMod = false;
       for (AliasSet &AS : *CurAST) {
         if (!AS.isForwardingAliasSet() && AS.isMod()) {
           FoundMod = true;
           break;
         }
       }
       if (!FoundMod)
         return true;
     }
 
     // FIXME: This should use mod/ref information to see if we can hoist or
     // sink the call.
 
     return false;
   }
 
   // Only these instructions are hoistable/sinkable.
   if (!isa<BinaryOperator>(I) && !isa<CastInst>(I) && !isa<SelectInst>(I) &&
       !isa<GetElementPtrInst>(I) && !isa<CmpInst>(I) &&
       !isa<InsertElementInst>(I) && !isa<ExtractElementInst>(I) &&
       !isa<ShuffleVectorInst>(I) && !isa<ExtractValueInst>(I) &&
       !isa<InsertValueInst>(I))
     return false;
 
   // If we are checking for sinking from preheader to loop body it will be
   // always safe as there is no speculative execution.
   if (SinkingToLoopBody)
     return true;
 
   // TODO: Plumb the context instruction through to make hoisting and sinking
   // more powerful. Hoisting of loads already works due to the special casing
   // above.
   return isSafeToExecuteUnconditionally(I, DT, CurLoop, SafetyInfo, nullptr);
 }
 
 /// Returns true if a PHINode is a trivially replaceable with an
 /// Instruction.
 /// This is true when all incoming values are that instruction.
 /// This pattern occurs most often with LCSSA PHI nodes.
 ///
 static bool isTriviallyReplacablePHI(const PHINode &PN, const Instruction &I) {
   for (const Value *IncValue : PN.incoming_values())
     if (IncValue != &I)
       return false;
 
   return true;
 }
 
 /// Return true if the instruction is free in the loop.
 static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop,
                          const TargetTransformInfo *TTI) {
 
   if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I)) {
     if (TTI->getUserCost(GEP) != TargetTransformInfo::TCC_Free)
       return false;
     // For a GEP, we cannot simply use getUserCost because currently it
     // optimistically assume that a GEP will fold into addressing mode
     // regardless of its users.
     const BasicBlock *BB = GEP->getParent();
     for (const User *U : GEP->users()) {
       const Instruction *UI = cast<Instruction>(U);
       if (CurLoop->contains(UI) &&
           (BB != UI->getParent() ||
            (!isa<StoreInst>(UI) && !isa<LoadInst>(UI))))
         return false;
     }
     return true;
   } else
     return TTI->getUserCost(&I) == TargetTransformInfo::TCC_Free;
 }
 
 /// Return true if the only users of this instruction are outside of
 /// the loop. If this is true, we can sink the instruction to the exit
 /// blocks of the loop.
 ///
 /// We also return true if the instruction could be folded away in lowering.
 /// (e.g.,  a GEP can be folded into a load as an addressing mode in the loop).
 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
                                   const LoopSafetyInfo *SafetyInfo,
                                   TargetTransformInfo *TTI, bool &FreeInLoop) {
   const auto &BlockColors = SafetyInfo->BlockColors;
   bool IsFree = isFreeInLoop(I, CurLoop, TTI);
   for (const User *U : I.users()) {
     const Instruction *UI = cast<Instruction>(U);
     if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
       const BasicBlock *BB = PN->getParent();
       // We cannot sink uses in catchswitches.
       if (isa<CatchSwitchInst>(BB->getTerminator()))
         return false;
 
       // We need to sink a callsite to a unique funclet.  Avoid sinking if the
       // phi use is too muddled.
       if (isa<CallInst>(I))
         if (!BlockColors.empty() &&
             BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1)
           return false;
     }
 
     if (CurLoop->contains(UI)) {
       if (IsFree) {
         FreeInLoop = true;
         continue;
       }
       return false;
     }
   }
   return true;
 }
 
 static Instruction *
 CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,
                             const LoopInfo *LI,
                             const LoopSafetyInfo *SafetyInfo) {
   Instruction *New;
   if (auto *CI = dyn_cast<CallInst>(&I)) {
     const auto &BlockColors = SafetyInfo->BlockColors;
 
     // Sinking call-sites need to be handled differently from other
     // instructions.  The cloned call-site needs a funclet bundle operand
     // appropriate for it's location in the CFG.
     SmallVector<OperandBundleDef, 1> OpBundles;
     for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles();
          BundleIdx != BundleEnd; ++BundleIdx) {
       OperandBundleUse Bundle = CI->getOperandBundleAt(BundleIdx);
       if (Bundle.getTagID() == LLVMContext::OB_funclet)
         continue;
 
       OpBundles.emplace_back(Bundle);
     }
 
     if (!BlockColors.empty()) {
       const ColorVector &CV = BlockColors.find(&ExitBlock)->second;
       assert(CV.size() == 1 && "non-unique color for exit block!");
       BasicBlock *BBColor = CV.front();
       Instruction *EHPad = BBColor->getFirstNonPHI();
       if (EHPad->isEHPad())
         OpBundles.emplace_back("funclet", EHPad);
     }
 
     New = CallInst::Create(CI, OpBundles);
   } else {
     New = I.clone();
   }
 
   ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New);
   if (!I.getName().empty())
     New->setName(I.getName() + ".le");
 
   // Build LCSSA PHI nodes for any in-loop operands. Note that this is
   // particularly cheap because we can rip off the PHI node that we're
   // replacing for the number and blocks of the predecessors.
   // OPT: If this shows up in a profile, we can instead finish sinking all
   // invariant instructions, and then walk their operands to re-establish
   // LCSSA. That will eliminate creating PHI nodes just to nuke them when
   // sinking bottom-up.
   for (User::op_iterator OI = New->op_begin(), OE = New->op_end(); OI != OE;
        ++OI)
     if (Instruction *OInst = dyn_cast<Instruction>(*OI))
       if (Loop *OLoop = LI->getLoopFor(OInst->getParent()))
         if (!OLoop->contains(&PN)) {
           PHINode *OpPN =
               PHINode::Create(OInst->getType(), PN.getNumIncomingValues(),
                               OInst->getName() + ".lcssa", &ExitBlock.front());
           for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
             OpPN->addIncoming(OInst, PN.getIncomingBlock(i));
           *OI = OpPN;
         }
   return New;
 }
 
 static Instruction *sinkThroughTriviallyReplacablePHI(
     PHINode *TPN, Instruction *I, LoopInfo *LI,
     SmallDenseMap<BasicBlock *, Instruction *, 32> &SunkCopies,
     const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop) {
   assert(isTriviallyReplacablePHI(*TPN, *I) &&
          "Expect only trivially replacalbe PHI");
   BasicBlock *ExitBlock = TPN->getParent();
   Instruction *New;
   auto It = SunkCopies.find(ExitBlock);
   if (It != SunkCopies.end())
     New = It->second;
   else
     New = SunkCopies[ExitBlock] =
         CloneInstructionInExitBlock(*I, *ExitBlock, *TPN, LI, SafetyInfo);
   return New;
 }
 
-static bool canSplitPredecessors(PHINode *PN) {
+static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) {
   BasicBlock *BB = PN->getParent();
   if (!BB->canSplitPredecessors())
     return false;
+  // It's not impossible to split EHPad blocks, but if BlockColors already exist
+  // it require updating BlockColors for all offspring blocks accordingly. By
+  // skipping such corner case, we can make updating BlockColors after splitting
+  // predecessor fairly simple.
+  if (!SafetyInfo->BlockColors.empty() && BB->getFirstNonPHI()->isEHPad())
+    return false;
   for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
     BasicBlock *BBPred = *PI;
     if (isa<IndirectBrInst>(BBPred->getTerminator()))
       return false;
   }
   return true;
 }
 
 static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT,
-                                        LoopInfo *LI, const Loop *CurLoop) {
+                                        LoopInfo *LI, const Loop *CurLoop,
+                                        LoopSafetyInfo *SafetyInfo) {
 #ifndef NDEBUG
   SmallVector<BasicBlock *, 32> ExitBlocks;
   CurLoop->getUniqueExitBlocks(ExitBlocks);
   SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
                                              ExitBlocks.end());
 #endif
   BasicBlock *ExitBB = PN->getParent();
   assert(ExitBlockSet.count(ExitBB) && "Expect the PHI is in an exit block.");
 
   // Split predecessors of the loop exit to make instructions in the loop are
   // exposed to exit blocks through trivially replacable PHIs while keeping the
   // loop in the canonical form where each predecessor of each exit block should
   // be contained within the loop. For example, this will convert the loop below
   // from
   //
   // LB1:
   //   %v1 =
   //   br %LE, %LB2
   // LB2:
   //   %v2 =
   //   br %LE, %LB1
   // LE:
   //   %p = phi [%v1, %LB1], [%v2, %LB2] <-- non-trivially replacable
   //
   // to
   //
   // LB1:
   //   %v1 =
   //   br %LE.split, %LB2
   // LB2:
   //   %v2 =
   //   br %LE.split2, %LB1
   // LE.split:
   //   %p1 = phi [%v1, %LB1]  <-- trivially replacable
   //   br %LE
   // LE.split2:
   //   %p2 = phi [%v2, %LB2]  <-- trivially replacable
   //   br %LE
   // LE:
   //   %p = phi [%p1, %LE.split], [%p2, %LE.split2]
   //
+  auto &BlockColors = SafetyInfo->BlockColors;
   SmallSetVector<BasicBlock *, 8> PredBBs(pred_begin(ExitBB), pred_end(ExitBB));
   while (!PredBBs.empty()) {
     BasicBlock *PredBB = *PredBBs.begin();
     assert(CurLoop->contains(PredBB) &&
            "Expect all predecessors are in the loop");
-    if (PN->getBasicBlockIndex(PredBB) >= 0)
-      SplitBlockPredecessors(ExitBB, PredBB, ".split.loop.exit", DT, LI, true);
+    if (PN->getBasicBlockIndex(PredBB) >= 0) {
+      BasicBlock *NewPred = SplitBlockPredecessors(
+          ExitBB, PredBB, ".split.loop.exit", DT, LI, true);
+      // Since we do not allow splitting EH-block with BlockColors in
+      // canSplitPredecessors(), we can simply assign predecessor's color to
+      // the new block.
+      if (!BlockColors.empty())
+        BlockColors[NewPred] = BlockColors[PredBB];
+    }
     PredBBs.remove(PredBB);
   }
 }
 
 /// When an instruction is found to only be used outside of the loop, this
 /// function moves it to the exit blocks and patches up SSA form as needed.
 /// This method is guaranteed to remove the original instruction from its
 /// position, and may either delete it or move it to outside of the loop.
 ///
 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
-                 const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
+                 const Loop *CurLoop, LoopSafetyInfo *SafetyInfo,
                  OptimizationRemarkEmitter *ORE, bool FreeInLoop) {
   DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
   ORE->emit([&]() {
     return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
            << "sinking " << ore::NV("Inst", &I);
   });
   bool Changed = false;
   if (isa<LoadInst>(I))
     ++NumMovedLoads;
   else if (isa<CallInst>(I))
     ++NumMovedCalls;
   ++NumSunk;
 
   // Iterate over users to be ready for actual sinking. Replace users via
   // unrechable blocks with undef and make all user PHIs trivially replcable.
   SmallPtrSet<Instruction *, 8> VisitedUsers;
   for (Value::user_iterator UI = I.user_begin(), UE = I.user_end(); UI != UE;) {
     auto *User = cast<Instruction>(*UI);
     Use &U = UI.getUse();
     ++UI;
 
     if (VisitedUsers.count(User) || CurLoop->contains(User))
       continue;
 
     if (!DT->isReachableFromEntry(User->getParent())) {
       U = UndefValue::get(I.getType());
       Changed = true;
       continue;
     }
 
     // The user must be a PHI node.
     PHINode *PN = cast<PHINode>(User);
 
     // Surprisingly, instructions can be used outside of loops without any
     // exits.  This can only happen in PHI nodes if the incoming block is
     // unreachable.
     BasicBlock *BB = PN->getIncomingBlock(U);
     if (!DT->isReachableFromEntry(BB)) {
       U = UndefValue::get(I.getType());
       Changed = true;
       continue;
     }
 
     VisitedUsers.insert(PN);
     if (isTriviallyReplacablePHI(*PN, I))
       continue;
 
-    if (!canSplitPredecessors(PN))
+    if (!canSplitPredecessors(PN, SafetyInfo))
       return Changed;
 
     // Split predecessors of the PHI so that we can make users trivially
     // replacable.
-    splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop);
+    splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo);
 
     // Should rebuild the iterators, as they may be invalidated by
     // splitPredecessorsOfLoopExit().
     UI = I.user_begin();
     UE = I.user_end();
   }
 
   if (VisitedUsers.empty())
     return Changed;
 
 #ifndef NDEBUG
   SmallVector<BasicBlock *, 32> ExitBlocks;
   CurLoop->getUniqueExitBlocks(ExitBlocks);
   SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
                                              ExitBlocks.end());
 #endif
 
   // Clones of this instruction. Don't create more than one per exit block!
   SmallDenseMap<BasicBlock *, Instruction *, 32> SunkCopies;
 
   // If this instruction is only used outside of the loop, then all users are
   // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
   // the instruction.
   SmallSetVector<User*, 8> Users(I.user_begin(), I.user_end());
   for (auto *UI : Users) {
     auto *User = cast<Instruction>(UI);
 
     if (CurLoop->contains(User))
       continue;
 
     PHINode *PN = cast<PHINode>(User);
     assert(ExitBlockSet.count(PN->getParent()) &&
            "The LCSSA PHI is not in an exit block!");
     // The PHI must be trivially replacable.
     Instruction *New = sinkThroughTriviallyReplacablePHI(PN, &I, LI, SunkCopies,
                                                          SafetyInfo, CurLoop);
     PN->replaceAllUsesWith(New);
     PN->eraseFromParent();
     Changed = true;
   }
   return Changed;
 }
 
 /// When an instruction is found to only use loop invariant operands that
 /// is safe to hoist, this instruction is called to do the dirty work.
 ///
 static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
                   const LoopSafetyInfo *SafetyInfo,
                   OptimizationRemarkEmitter *ORE) {
   auto *Preheader = CurLoop->getLoopPreheader();
   DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": " << I
                << "\n");
   ORE->emit([&]() {
     return OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) << "hoisting "
                                                          << ore::NV("Inst", &I);
   });
 
   // Metadata can be dependent on conditions we are hoisting above.
   // Conservatively strip all metadata on the instruction unless we were
   // guaranteed to execute I if we entered the loop, in which case the metadata
   // is valid in the loop preheader.
   if (I.hasMetadataOtherThanDebugLoc() &&
       // The check on hasMetadataOtherThanDebugLoc is to prevent us from burning
       // time in isGuaranteedToExecute if we don't actually have anything to
       // drop.  It is a compile time optimization, not required for correctness.
       !isGuaranteedToExecute(I, DT, CurLoop, SafetyInfo))
     I.dropUnknownNonDebugMetadata();
 
   // Move the new node to the Preheader, before its terminator.
   I.moveBefore(Preheader->getTerminator());
 
   // Do not retain debug locations when we are moving instructions to different
   // basic blocks, because we want to avoid jumpy line tables. Calls, however,
   // need to retain their debug locs because they may be inlined.
   // FIXME: How do we retain source locations without causing poor debugging
   // behavior?
   if (!isa<CallInst>(I))
     I.setDebugLoc(DebugLoc());
 
   if (isa<LoadInst>(I))
     ++NumMovedLoads;
   else if (isa<CallInst>(I))
     ++NumMovedCalls;
   ++NumHoisted;
   return true;
 }
 
 /// Only sink or hoist an instruction if it is not a trapping instruction,
 /// or if the instruction is known not to trap when moved to the preheader.
 /// or if it is a trapping instruction and is guaranteed to execute.
 static bool isSafeToExecuteUnconditionally(Instruction &Inst,
                                            const DominatorTree *DT,
                                            const Loop *CurLoop,
                                            const LoopSafetyInfo *SafetyInfo,
                                            OptimizationRemarkEmitter *ORE,
                                            const Instruction *CtxI) {
   if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT))
     return true;
 
   bool GuaranteedToExecute =
       isGuaranteedToExecute(Inst, DT, CurLoop, SafetyInfo);
 
   if (!GuaranteedToExecute) {
     auto *LI = dyn_cast<LoadInst>(&Inst);
     if (LI && CurLoop->isLoopInvariant(LI->getPointerOperand()))
       ORE->emit([&]() {
         return OptimizationRemarkMissed(
                    DEBUG_TYPE, "LoadWithLoopInvariantAddressCondExecuted", LI)
                << "failed to hoist load with loop-invariant address "
                   "because load is conditionally executed";
       });
   }
 
   return GuaranteedToExecute;
 }
 
 namespace {
 class LoopPromoter : public LoadAndStorePromoter {
   Value *SomePtr; // Designated pointer to store to.
   const SmallSetVector<Value *, 8> &PointerMustAliases;
   SmallVectorImpl<BasicBlock *> &LoopExitBlocks;
   SmallVectorImpl<Instruction *> &LoopInsertPts;
   PredIteratorCache &PredCache;
   AliasSetTracker &AST;
   LoopInfo &LI;
   DebugLoc DL;
   int Alignment;
   bool UnorderedAtomic;
   AAMDNodes AATags;
 
   Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const {
     if (Instruction *I = dyn_cast<Instruction>(V))
       if (Loop *L = LI.getLoopFor(I->getParent()))
         if (!L->contains(BB)) {
           // We need to create an LCSSA PHI node for the incoming value and
           // store that.
           PHINode *PN = PHINode::Create(I->getType(), PredCache.size(BB),
                                         I->getName() + ".lcssa", &BB->front());
           for (BasicBlock *Pred : PredCache.get(BB))
             PN->addIncoming(I, Pred);
           return PN;
         }
     return V;
   }
 
 public:
   LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S,
                const SmallSetVector<Value *, 8> &PMA,
                SmallVectorImpl<BasicBlock *> &LEB,
                SmallVectorImpl<Instruction *> &LIP, PredIteratorCache &PIC,
                AliasSetTracker &ast, LoopInfo &li, DebugLoc dl, int alignment,
                bool UnorderedAtomic, const AAMDNodes &AATags)
       : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
         LoopExitBlocks(LEB), LoopInsertPts(LIP), PredCache(PIC), AST(ast),
         LI(li), DL(std::move(dl)), Alignment(alignment),
         UnorderedAtomic(UnorderedAtomic), AATags(AATags) {}
 
   bool isInstInList(Instruction *I,
                     const SmallVectorImpl<Instruction *> &) const override {
     Value *Ptr;
     if (LoadInst *LI = dyn_cast<LoadInst>(I))
       Ptr = LI->getOperand(0);
     else
       Ptr = cast<StoreInst>(I)->getPointerOperand();
     return PointerMustAliases.count(Ptr);
   }
 
   void doExtraRewritesBeforeFinalDeletion() const override {
     // Insert stores after in the loop exit blocks.  Each exit block gets a
     // store of the live-out values that feed them.  Since we've already told
     // the SSA updater about the defs in the loop and the preheader
     // definition, it is all set and we can start using it.
     for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
       BasicBlock *ExitBlock = LoopExitBlocks[i];
       Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
       LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock);
       Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock);
       Instruction *InsertPos = LoopInsertPts[i];
       StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
       if (UnorderedAtomic)
         NewSI->setOrdering(AtomicOrdering::Unordered);
       NewSI->setAlignment(Alignment);
       NewSI->setDebugLoc(DL);
       if (AATags)
         NewSI->setAAMetadata(AATags);
     }
   }
 
   void replaceLoadWithValue(LoadInst *LI, Value *V) const override {
     // Update alias analysis.
     AST.copyValue(LI, V);
   }
   void instructionDeleted(Instruction *I) const override { AST.deleteValue(I); }
 };
 
 
 /// Return true iff we can prove that a caller of this function can not inspect
 /// the contents of the provided object in a well defined program.
 bool isKnownNonEscaping(Value *Object, const TargetLibraryInfo *TLI) {
   if (isa<AllocaInst>(Object))
     // Since the alloca goes out of scope, we know the caller can't retain a
     // reference to it and be well defined.  Thus, we don't need to check for
     // capture. 
     return true;
   
   // For all other objects we need to know that the caller can't possibly
   // have gotten a reference to the object.  There are two components of
   // that:
   //   1) Object can't be escaped by this function.  This is what
   //      PointerMayBeCaptured checks.
   //   2) Object can't have been captured at definition site.  For this, we
   //      need to know the return value is noalias.  At the moment, we use a
   //      weaker condition and handle only AllocLikeFunctions (which are
   //      known to be noalias).  TODO
   return isAllocLikeFn(Object, TLI) &&
     !PointerMayBeCaptured(Object, true, true);
 }
 
 } // namespace
 
 /// Try to promote memory values to scalars by sinking stores out of the
 /// loop and moving loads to before the loop.  We do this by looping over
 /// the stores in the loop, looking for stores to Must pointers which are
 /// loop invariant.
 ///
 bool llvm::promoteLoopAccessesToScalars(
     const SmallSetVector<Value *, 8> &PointerMustAliases,
     SmallVectorImpl<BasicBlock *> &ExitBlocks,
     SmallVectorImpl<Instruction *> &InsertPts, PredIteratorCache &PIC,
     LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
     Loop *CurLoop, AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo,
     OptimizationRemarkEmitter *ORE) {
   // Verify inputs.
   assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
          CurAST != nullptr && SafetyInfo != nullptr &&
          "Unexpected Input to promoteLoopAccessesToScalars");
 
   Value *SomePtr = *PointerMustAliases.begin();
   BasicBlock *Preheader = CurLoop->getLoopPreheader();
 
   // It isn't safe to promote a load/store from the loop if the load/store is
   // conditional.  For example, turning:
   //
   //    for () { if (c) *P += 1; }
   //
   // into:
   //
   //    tmp = *P;  for () { if (c) tmp +=1; } *P = tmp;
   //
   // is not safe, because *P may only be valid to access if 'c' is true.
   //
   // The safety property divides into two parts:
   // p1) The memory may not be dereferenceable on entry to the loop.  In this
   //    case, we can't insert the required load in the preheader.
   // p2) The memory model does not allow us to insert a store along any dynamic
   //    path which did not originally have one.
   //
   // If at least one store is guaranteed to execute, both properties are
   // satisfied, and promotion is legal.
   //
   // This, however, is not a necessary condition. Even if no store/load is
   // guaranteed to execute, we can still establish these properties.
   // We can establish (p1) by proving that hoisting the load into the preheader
   // is safe (i.e. proving dereferenceability on all paths through the loop). We
   // can use any access within the alias set to prove dereferenceability,
   // since they're all must alias.
   //
   // There are two ways establish (p2):
   // a) Prove the location is thread-local. In this case the memory model
   // requirement does not apply, and stores are safe to insert.
   // b) Prove a store dominates every exit block. In this case, if an exit
   // blocks is reached, the original dynamic path would have taken us through
   // the store, so inserting a store into the exit block is safe. Note that this
   // is different from the store being guaranteed to execute. For instance,
   // if an exception is thrown on the first iteration of the loop, the original
   // store is never executed, but the exit blocks are not executed either.
 
   bool DereferenceableInPH = false;
   bool SafeToInsertStore = false;
 
   SmallVector<Instruction *, 64> LoopUses;
 
   // We start with an alignment of one and try to find instructions that allow
   // us to prove better alignment.
   unsigned Alignment = 1;
   // Keep track of which types of access we see
   bool SawUnorderedAtomic = false;
   bool SawNotAtomic = false;
   AAMDNodes AATags;
 
   const DataLayout &MDL = Preheader->getModule()->getDataLayout();
 
   bool IsKnownThreadLocalObject = false;
   if (SafetyInfo->MayThrow) {
     // If a loop can throw, we have to insert a store along each unwind edge.
     // That said, we can't actually make the unwind edge explicit. Therefore,
     // we have to prove that the store is dead along the unwind edge.  We do
     // this by proving that the caller can't have a reference to the object
     // after return and thus can't possibly load from the object.  
     Value *Object = GetUnderlyingObject(SomePtr, MDL);
     if (!isKnownNonEscaping(Object, TLI))
       return false;
     // Subtlety: Alloca's aren't visible to callers, but *are* potentially
     // visible to other threads if captured and used during their lifetimes.
     IsKnownThreadLocalObject = !isa<AllocaInst>(Object);
   }
 
   // Check that all of the pointers in the alias set have the same type.  We
   // cannot (yet) promote a memory location that is loaded and stored in
   // different sizes.  While we are at it, collect alignment and AA info.
   for (Value *ASIV : PointerMustAliases) {
     // Check that all of the pointers in the alias set have the same type.  We
     // cannot (yet) promote a memory location that is loaded and stored in
     // different sizes.
     if (SomePtr->getType() != ASIV->getType())
       return false;
 
     for (User *U : ASIV->users()) {
       // Ignore instructions that are outside the loop.
       Instruction *UI = dyn_cast<Instruction>(U);
       if (!UI || !CurLoop->contains(UI))
         continue;
 
       // If there is an non-load/store instruction in the loop, we can't promote
       // it.
       if (LoadInst *Load = dyn_cast<LoadInst>(UI)) {
         assert(!Load->isVolatile() && "AST broken");
         if (!Load->isUnordered())
           return false;
 
         SawUnorderedAtomic |= Load->isAtomic();
         SawNotAtomic |= !Load->isAtomic();
 
         if (!DereferenceableInPH)
           DereferenceableInPH = isSafeToExecuteUnconditionally(
               *Load, DT, CurLoop, SafetyInfo, ORE, Preheader->getTerminator());
       } else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) {
         // Stores *of* the pointer are not interesting, only stores *to* the
         // pointer.
         if (UI->getOperand(1) != ASIV)
           continue;
         assert(!Store->isVolatile() && "AST broken");
         if (!Store->isUnordered())
           return false;
 
         SawUnorderedAtomic |= Store->isAtomic();
         SawNotAtomic |= !Store->isAtomic();
 
         // If the store is guaranteed to execute, both properties are satisfied.
         // We may want to check if a store is guaranteed to execute even if we
         // already know that promotion is safe, since it may have higher
         // alignment than any other guaranteed stores, in which case we can
         // raise the alignment on the promoted store.
         unsigned InstAlignment = Store->getAlignment();
         if (!InstAlignment)
           InstAlignment =
               MDL.getABITypeAlignment(Store->getValueOperand()->getType());
 
         if (!DereferenceableInPH || !SafeToInsertStore ||
             (InstAlignment > Alignment)) {
           if (isGuaranteedToExecute(*UI, DT, CurLoop, SafetyInfo)) {
             DereferenceableInPH = true;
             SafeToInsertStore = true;
             Alignment = std::max(Alignment, InstAlignment);
           }
         }
 
         // If a store dominates all exit blocks, it is safe to sink.
         // As explained above, if an exit block was executed, a dominating
         // store must have been been executed at least once, so we are not
         // introducing stores on paths that did not have them.
         // Note that this only looks at explicit exit blocks. If we ever
         // start sinking stores into unwind edges (see above), this will break.
         if (!SafeToInsertStore)
           SafeToInsertStore = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) {
             return DT->dominates(Store->getParent(), Exit);
           });
 
         // If the store is not guaranteed to execute, we may still get
         // deref info through it.
         if (!DereferenceableInPH) {
           DereferenceableInPH = isDereferenceableAndAlignedPointer(
               Store->getPointerOperand(), Store->getAlignment(), MDL,
               Preheader->getTerminator(), DT);
         }
       } else
         return false; // Not a load or store.
 
       // Merge the AA tags.
       if (LoopUses.empty()) {
         // On the first load/store, just take its AA tags.
         UI->getAAMetadata(AATags);
       } else if (AATags) {
         UI->getAAMetadata(AATags, /* Merge = */ true);
       }
 
       LoopUses.push_back(UI);
     }
   }
 
   // If we found both an unordered atomic instruction and a non-atomic memory
   // access, bail.  We can't blindly promote non-atomic to atomic since we
   // might not be able to lower the result.  We can't downgrade since that
   // would violate memory model.  Also, align 0 is an error for atomics.
   if (SawUnorderedAtomic && SawNotAtomic)
     return false;
 
   // If we couldn't prove we can hoist the load, bail.
   if (!DereferenceableInPH)
     return false;
 
   // We know we can hoist the load, but don't have a guaranteed store.
   // Check whether the location is thread-local. If it is, then we can insert
   // stores along paths which originally didn't have them without violating the
   // memory model.
   if (!SafeToInsertStore) {
     if (IsKnownThreadLocalObject)
       SafeToInsertStore = true;
     else {
       Value *Object = GetUnderlyingObject(SomePtr, MDL);
       SafeToInsertStore =
           (isAllocLikeFn(Object, TLI) || isa<AllocaInst>(Object)) &&
           !PointerMayBeCaptured(Object, true, true);
     }
   }
 
   // If we've still failed to prove we can sink the store, give up.
   if (!SafeToInsertStore)
     return false;
 
   // Otherwise, this is safe to promote, lets do it!
   DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr
                << '\n');
   ORE->emit([&]() {
     return OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar",
                               LoopUses[0])
            << "Moving accesses to memory location out of the loop";
   });
   ++NumPromoted;
 
   // Grab a debug location for the inserted loads/stores; given that the
   // inserted loads/stores have little relation to the original loads/stores,
   // this code just arbitrarily picks a location from one, since any debug
   // location is better than none.
   DebugLoc DL = LoopUses[0]->getDebugLoc();
 
   // We use the SSAUpdater interface to insert phi nodes as required.
   SmallVector<PHINode *, 16> NewPHIs;
   SSAUpdater SSA(&NewPHIs);
   LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
                         InsertPts, PIC, *CurAST, *LI, DL, Alignment,
                         SawUnorderedAtomic, AATags);
 
   // Set up the preheader to have a definition of the value.  It is the live-out
   // value from the preheader that uses in the loop will use.
   LoadInst *PreheaderLoad = new LoadInst(
       SomePtr, SomePtr->getName() + ".promoted", Preheader->getTerminator());
   if (SawUnorderedAtomic)
     PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
   PreheaderLoad->setAlignment(Alignment);
   PreheaderLoad->setDebugLoc(DL);
   if (AATags)
     PreheaderLoad->setAAMetadata(AATags);
   SSA.AddAvailableValue(Preheader, PreheaderLoad);
 
   // Rewrite all the loads in the loop and remember all the definitions from
   // stores in the loop.
   Promoter.run(LoopUses);
 
   // If the SSAUpdater didn't use the load in the preheader, just zap it now.
   if (PreheaderLoad->use_empty())
     PreheaderLoad->eraseFromParent();
 
   return true;
 }
 
 /// Returns an owning pointer to an alias set which incorporates aliasing info
 /// from L and all subloops of L.
 /// FIXME: In new pass manager, there is no helper function to handle loop
 /// analysis such as cloneBasicBlockAnalysis, so the AST needs to be recomputed
 /// from scratch for every loop. Hook up with the helper functions when
 /// available in the new pass manager to avoid redundant computation.
 AliasSetTracker *
 LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI,
                                                  AliasAnalysis *AA) {
   AliasSetTracker *CurAST = nullptr;
   SmallVector<Loop *, 4> RecomputeLoops;
   for (Loop *InnerL : L->getSubLoops()) {
     auto MapI = LoopToAliasSetMap.find(InnerL);
     // If the AST for this inner loop is missing it may have been merged into
     // some other loop's AST and then that loop unrolled, and so we need to
     // recompute it.
     if (MapI == LoopToAliasSetMap.end()) {
       RecomputeLoops.push_back(InnerL);
       continue;
     }
     AliasSetTracker *InnerAST = MapI->second;
 
     if (CurAST != nullptr) {
       // What if InnerLoop was modified by other passes ?
       CurAST->add(*InnerAST);
 
       // Once we've incorporated the inner loop's AST into ours, we don't need
       // the subloop's anymore.
       delete InnerAST;
     } else {
       CurAST = InnerAST;
     }
     LoopToAliasSetMap.erase(MapI);
   }
   if (CurAST == nullptr)
     CurAST = new AliasSetTracker(*AA);
 
   auto mergeLoop = [&](Loop *L) {
     // Loop over the body of this loop, looking for calls, invokes, and stores.
     for (BasicBlock *BB : L->blocks())
       CurAST->add(*BB); // Incorporate the specified basic block
   };
 
   // Add everything from the sub loops that are no longer directly available.
   for (Loop *InnerL : RecomputeLoops)
     mergeLoop(InnerL);
 
   // And merge in this loop.
   mergeLoop(L);
 
   return CurAST;
 }
 
 /// Simple analysis hook. Clone alias set info.
 ///
 void LegacyLICMPass::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To,
                                              Loop *L) {
   AliasSetTracker *AST = LICM.getLoopToAliasSetMap().lookup(L);
   if (!AST)
     return;
 
   AST->copyValue(From, To);
 }
 
 /// Simple Analysis hook. Delete value V from alias set
 ///
 void LegacyLICMPass::deleteAnalysisValue(Value *V, Loop *L) {
   AliasSetTracker *AST = LICM.getLoopToAliasSetMap().lookup(L);
   if (!AST)
     return;
 
   AST->deleteValue(V);
 }
 
 /// Simple Analysis hook. Delete value L from alias set map.
 ///
 void LegacyLICMPass::deleteAnalysisLoop(Loop *L) {
   AliasSetTracker *AST = LICM.getLoopToAliasSetMap().lookup(L);
   if (!AST)
     return;
 
   delete AST;
   LICM.getLoopToAliasSetMap().erase(L);
 }
 
 /// Return true if the body of this loop may store into the memory
 /// location pointed to by V.
 ///
 static bool pointerInvalidatedByLoop(Value *V, uint64_t Size,
                                      const AAMDNodes &AAInfo,
                                      AliasSetTracker *CurAST) {
   // Check to see if any of the basic blocks in CurLoop invalidate *V.
   return CurAST->getAliasSetForPointer(V, Size, AAInfo).isMod();
 }
 
 /// Little predicate that returns true if the specified basic block is in
 /// a subloop of the current one, not the current one itself.
 ///
 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) {
   assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
   return LI->getLoopFor(BB) != CurLoop;
 }
Index: head/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
===================================================================
--- head/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp	(revision 329982)
+++ head/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp	(revision 329983)
@@ -1,1648 +1,1720 @@
 //===-- LoopUtils.cpp - Loop Utility functions -------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // This file defines common loop utility functions.
 //
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/LoopUtils.h"
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/KnownBits.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 
 using namespace llvm;
 using namespace llvm::PatternMatch;
 
 #define DEBUG_TYPE "loop-utils"
 
 bool RecurrenceDescriptor::areAllUsesIn(Instruction *I,
                                         SmallPtrSetImpl<Instruction *> &Set) {
   for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use)
     if (!Set.count(dyn_cast<Instruction>(*Use)))
       return false;
   return true;
 }
 
 bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurrenceKind Kind) {
   switch (Kind) {
   default:
     break;
   case RK_IntegerAdd:
   case RK_IntegerMult:
   case RK_IntegerOr:
   case RK_IntegerAnd:
   case RK_IntegerXor:
   case RK_IntegerMinMax:
     return true;
   }
   return false;
 }
 
 bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurrenceKind Kind) {
   return (Kind != RK_NoRecurrence) && !isIntegerRecurrenceKind(Kind);
 }
 
 bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurrenceKind Kind) {
   switch (Kind) {
   default:
     break;
   case RK_IntegerAdd:
   case RK_IntegerMult:
   case RK_FloatAdd:
   case RK_FloatMult:
     return true;
   }
   return false;
 }
 
-Instruction *
-RecurrenceDescriptor::lookThroughAnd(PHINode *Phi, Type *&RT,
-                                     SmallPtrSetImpl<Instruction *> &Visited,
-                                     SmallPtrSetImpl<Instruction *> &CI) {
+/// Determines if Phi may have been type-promoted. If Phi has a single user
+/// that ANDs the Phi with a type mask, return the user. RT is updated to
+/// account for the narrower bit width represented by the mask, and the AND
+/// instruction is added to CI.
+static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT,
+                                   SmallPtrSetImpl<Instruction *> &Visited,
+                                   SmallPtrSetImpl<Instruction *> &CI) {
   if (!Phi->hasOneUse())
     return Phi;
 
   const APInt *M = nullptr;
   Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser());
 
   // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT
   // with a new integer type of the corresponding bit width.
   if (match(J, m_c_And(m_Instruction(I), m_APInt(M)))) {
     int32_t Bits = (*M + 1).exactLogBase2();
     if (Bits > 0) {
       RT = IntegerType::get(Phi->getContext(), Bits);
       Visited.insert(Phi);
       CI.insert(J);
       return J;
     }
   }
   return Phi;
 }
 
-bool RecurrenceDescriptor::getSourceExtensionKind(
-    Instruction *Start, Instruction *Exit, Type *RT, bool &IsSigned,
-    SmallPtrSetImpl<Instruction *> &Visited,
-    SmallPtrSetImpl<Instruction *> &CI) {
+/// Compute the minimal bit width needed to represent a reduction whose exit
+/// instruction is given by Exit.
+static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit,
+                                                     DemandedBits *DB,
+                                                     AssumptionCache *AC,
+                                                     DominatorTree *DT) {
+  bool IsSigned = false;
+  const DataLayout &DL = Exit->getModule()->getDataLayout();
+  uint64_t MaxBitWidth = DL.getTypeSizeInBits(Exit->getType());
 
+  if (DB) {
+    // Use the demanded bits analysis to determine the bits that are live out
+    // of the exit instruction, rounding up to the nearest power of two. If the
+    // use of demanded bits results in a smaller bit width, we know the value
+    // must be positive (i.e., IsSigned = false), because if this were not the
+    // case, the sign bit would have been demanded.
+    auto Mask = DB->getDemandedBits(Exit);
+    MaxBitWidth = Mask.getBitWidth() - Mask.countLeadingZeros();
+  }
+
+  if (MaxBitWidth == DL.getTypeSizeInBits(Exit->getType()) && AC && DT) {
+    // If demanded bits wasn't able to limit the bit width, we can try to use
+    // value tracking instead. This can be the case, for example, if the value
+    // may be negative.
+    auto NumSignBits = ComputeNumSignBits(Exit, DL, 0, AC, nullptr, DT);
+    auto NumTypeBits = DL.getTypeSizeInBits(Exit->getType());
+    MaxBitWidth = NumTypeBits - NumSignBits;
+    KnownBits Bits = computeKnownBits(Exit, DL);
+    if (!Bits.isNonNegative()) {
+      // If the value is not known to be non-negative, we set IsSigned to true,
+      // meaning that we will use sext instructions instead of zext
+      // instructions to restore the original type.
+      IsSigned = true;
+      if (!Bits.isNegative())
+        // If the value is not known to be negative, we don't known what the
+        // upper bit is, and therefore, we don't know what kind of extend we
+        // will need. In this case, just increase the bit width by one bit and
+        // use sext.
+        ++MaxBitWidth;
+    }
+  }
+  if (!isPowerOf2_64(MaxBitWidth))
+    MaxBitWidth = NextPowerOf2(MaxBitWidth);
+
+  return std::make_pair(Type::getIntNTy(Exit->getContext(), MaxBitWidth),
+                        IsSigned);
+}
+
+/// Collect cast instructions that can be ignored in the vectorizer's cost
+/// model, given a reduction exit value and the minimal type in which the
+/// reduction can be represented.
+static void collectCastsToIgnore(Loop *TheLoop, Instruction *Exit,
+                                 Type *RecurrenceType,
+                                 SmallPtrSetImpl<Instruction *> &Casts) {
+
   SmallVector<Instruction *, 8> Worklist;
-  bool FoundOneOperand = false;
-  unsigned DstSize = RT->getPrimitiveSizeInBits();
+  SmallPtrSet<Instruction *, 8> Visited;
   Worklist.push_back(Exit);
 
-  // Traverse the instructions in the reduction expression, beginning with the
-  // exit value.
   while (!Worklist.empty()) {
-    Instruction *I = Worklist.pop_back_val();
-    for (Use &U : I->operands()) {
-
-      // Terminate the traversal if the operand is not an instruction, or we
-      // reach the starting value.
-      Instruction *J = dyn_cast<Instruction>(U.get());
-      if (!J || J == Start)
+    Instruction *Val = Worklist.pop_back_val();
+    Visited.insert(Val);
+    if (auto *Cast = dyn_cast<CastInst>(Val))
+      if (Cast->getSrcTy() == RecurrenceType) {
+        // If the source type of a cast instruction is equal to the recurrence
+        // type, it will be eliminated, and should be ignored in the vectorizer
+        // cost model.
+        Casts.insert(Cast);
         continue;
-
-      // Otherwise, investigate the operation if it is also in the expression.
-      if (Visited.count(J)) {
-        Worklist.push_back(J);
-        continue;
       }
 
-      // If the operand is not in Visited, it is not a reduction operation, but
-      // it does feed into one. Make sure it is either a single-use sign- or
-      // zero-extend instruction.
-      CastInst *Cast = dyn_cast<CastInst>(J);
-      bool IsSExtInst = isa<SExtInst>(J);
-      if (!Cast || !Cast->hasOneUse() || !(isa<ZExtInst>(J) || IsSExtInst))
-        return false;
-
-      // Ensure the source type of the extend is no larger than the reduction
-      // type. It is not necessary for the types to be identical.
-      unsigned SrcSize = Cast->getSrcTy()->getPrimitiveSizeInBits();
-      if (SrcSize > DstSize)
-        return false;
-
-      // Furthermore, ensure that all such extends are of the same kind.
-      if (FoundOneOperand) {
-        if (IsSigned != IsSExtInst)
-          return false;
-      } else {
-        FoundOneOperand = true;
-        IsSigned = IsSExtInst;
-      }
-
-      // Lastly, if the source type of the extend matches the reduction type,
-      // add the extend to CI so that we can avoid accounting for it in the
-      // cost model.
-      if (SrcSize == DstSize)
-        CI.insert(Cast);
-    }
+    // Add all operands to the work list if they are loop-varying values that
+    // we haven't yet visited.
+    for (Value *O : cast<User>(Val)->operands())
+      if (auto *I = dyn_cast<Instruction>(O))
+        if (TheLoop->contains(I) && !Visited.count(I))
+          Worklist.push_back(I);
   }
-  return true;
 }
 
 bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
                                            Loop *TheLoop, bool HasFunNoNaNAttr,
-                                           RecurrenceDescriptor &RedDes) {
+                                           RecurrenceDescriptor &RedDes,
+                                           DemandedBits *DB,
+                                           AssumptionCache *AC,
+                                           DominatorTree *DT) {
   if (Phi->getNumIncomingValues() != 2)
     return false;
 
   // Reduction variables are only found in the loop header block.
   if (Phi->getParent() != TheLoop->getHeader())
     return false;
 
   // Obtain the reduction start value from the value that comes from the loop
   // preheader.
   Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader());
 
   // ExitInstruction is the single value which is used outside the loop.
   // We only allow for a single reduction value to be used outside the loop.
   // This includes users of the reduction, variables (which form a cycle
   // which ends in the phi node).
   Instruction *ExitInstruction = nullptr;
   // Indicates that we found a reduction operation in our scan.
   bool FoundReduxOp = false;
 
   // We start with the PHI node and scan for all of the users of this
   // instruction. All users must be instructions that can be used as reduction
   // variables (such as ADD). We must have a single out-of-block user. The cycle
   // must include the original PHI.
   bool FoundStartPHI = false;
 
   // To recognize min/max patterns formed by a icmp select sequence, we store
   // the number of instruction we saw from the recognized min/max pattern,
   //  to make sure we only see exactly the two instructions.
   unsigned NumCmpSelectPatternInst = 0;
   InstDesc ReduxDesc(false, nullptr);
 
   // Data used for determining if the recurrence has been type-promoted.
   Type *RecurrenceType = Phi->getType();
   SmallPtrSet<Instruction *, 4> CastInsts;
   Instruction *Start = Phi;
   bool IsSigned = false;
 
   SmallPtrSet<Instruction *, 8> VisitedInsts;
   SmallVector<Instruction *, 8> Worklist;
 
   // Return early if the recurrence kind does not match the type of Phi. If the
   // recurrence kind is arithmetic, we attempt to look through AND operations
   // resulting from the type promotion performed by InstCombine.  Vector
   // operations are not limited to the legal integer widths, so we may be able
   // to evaluate the reduction in the narrower width.
   if (RecurrenceType->isFloatingPointTy()) {
     if (!isFloatingPointRecurrenceKind(Kind))
       return false;
   } else {
     if (!isIntegerRecurrenceKind(Kind))
       return false;
     if (isArithmeticRecurrenceKind(Kind))
       Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts);
   }
 
   Worklist.push_back(Start);
   VisitedInsts.insert(Start);
 
   // A value in the reduction can be used:
   //  - By the reduction:
   //      - Reduction operation:
   //        - One use of reduction value (safe).
   //        - Multiple use of reduction value (not safe).
   //      - PHI:
   //        - All uses of the PHI must be the reduction (safe).
   //        - Otherwise, not safe.
   //  - By instructions outside of the loop (safe).
   //      * One value may have several outside users, but all outside
   //        uses must be of the same value.
   //  - By an instruction that is not part of the reduction (not safe).
   //    This is either:
   //      * An instruction type other than PHI or the reduction operation.
   //      * A PHI in the header other than the initial PHI.
   while (!Worklist.empty()) {
     Instruction *Cur = Worklist.back();
     Worklist.pop_back();
 
     // No Users.
     // If the instruction has no users then this is a broken chain and can't be
     // a reduction variable.
     if (Cur->use_empty())
       return false;
 
     bool IsAPhi = isa<PHINode>(Cur);
 
     // A header PHI use other than the original PHI.
     if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent())
       return false;
 
     // Reductions of instructions such as Div, and Sub is only possible if the
     // LHS is the reduction variable.
     if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) &&
         !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) &&
         !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0))))
       return false;
 
     // Any reduction instruction must be of one of the allowed kinds. We ignore
     // the starting value (the Phi or an AND instruction if the Phi has been
     // type-promoted).
     if (Cur != Start) {
       ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr);
       if (!ReduxDesc.isRecurrence())
         return false;
     }
 
     // A reduction operation must only have one use of the reduction value.
     if (!IsAPhi && Kind != RK_IntegerMinMax && Kind != RK_FloatMinMax &&
         hasMultipleUsesOf(Cur, VisitedInsts))
       return false;
 
     // All inputs to a PHI node must be a reduction value.
     if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts))
       return false;
 
     if (Kind == RK_IntegerMinMax &&
         (isa<ICmpInst>(Cur) || isa<SelectInst>(Cur)))
       ++NumCmpSelectPatternInst;
     if (Kind == RK_FloatMinMax && (isa<FCmpInst>(Cur) || isa<SelectInst>(Cur)))
       ++NumCmpSelectPatternInst;
 
     // Check  whether we found a reduction operator.
     FoundReduxOp |= !IsAPhi && Cur != Start;
 
     // Process users of current instruction. Push non-PHI nodes after PHI nodes
     // onto the stack. This way we are going to have seen all inputs to PHI
     // nodes once we get to them.
     SmallVector<Instruction *, 8> NonPHIs;
     SmallVector<Instruction *, 8> PHIs;
     for (User *U : Cur->users()) {
       Instruction *UI = cast<Instruction>(U);
 
       // Check if we found the exit user.
       BasicBlock *Parent = UI->getParent();
       if (!TheLoop->contains(Parent)) {
         // If we already know this instruction is used externally, move on to
         // the next user.
         if (ExitInstruction == Cur)
           continue;
 
         // Exit if you find multiple values used outside or if the header phi
         // node is being used. In this case the user uses the value of the
         // previous iteration, in which case we would loose "VF-1" iterations of
         // the reduction operation if we vectorize.
         if (ExitInstruction != nullptr || Cur == Phi)
           return false;
 
         // The instruction used by an outside user must be the last instruction
         // before we feed back to the reduction phi. Otherwise, we loose VF-1
         // operations on the value.
         if (!is_contained(Phi->operands(), Cur))
           return false;
 
         ExitInstruction = Cur;
         continue;
       }
 
       // Process instructions only once (termination). Each reduction cycle
       // value must only be used once, except by phi nodes and min/max
       // reductions which are represented as a cmp followed by a select.
       InstDesc IgnoredVal(false, nullptr);
       if (VisitedInsts.insert(UI).second) {
         if (isa<PHINode>(UI))
           PHIs.push_back(UI);
         else
           NonPHIs.push_back(UI);
       } else if (!isa<PHINode>(UI) &&
                  ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) &&
                    !isa<SelectInst>(UI)) ||
                   !isMinMaxSelectCmpPattern(UI, IgnoredVal).isRecurrence()))
         return false;
 
       // Remember that we completed the cycle.
       if (UI == Phi)
         FoundStartPHI = true;
     }
     Worklist.append(PHIs.begin(), PHIs.end());
     Worklist.append(NonPHIs.begin(), NonPHIs.end());
   }
 
   // This means we have seen one but not the other instruction of the
   // pattern or more than just a select and cmp.
   if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) &&
       NumCmpSelectPatternInst != 2)
     return false;
 
   if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
     return false;
 
-  // If we think Phi may have been type-promoted, we also need to ensure that
-  // all source operands of the reduction are either SExtInsts or ZEstInsts. If
-  // so, we will be able to evaluate the reduction in the narrower bit width.
-  if (Start != Phi)
-    if (!getSourceExtensionKind(Start, ExitInstruction, RecurrenceType,
-                                IsSigned, VisitedInsts, CastInsts))
+  if (Start != Phi) {
+    // If the starting value is not the same as the phi node, we speculatively
+    // looked through an 'and' instruction when evaluating a potential
+    // arithmetic reduction to determine if it may have been type-promoted.
+    //
+    // We now compute the minimal bit width that is required to represent the
+    // reduction. If this is the same width that was indicated by the 'and', we
+    // can represent the reduction in the smaller type. The 'and' instruction
+    // will be eliminated since it will essentially be a cast instruction that
+    // can be ignore in the cost model. If we compute a different type than we
+    // did when evaluating the 'and', the 'and' will not be eliminated, and we
+    // will end up with different kinds of operations in the recurrence
+    // expression (e.g., RK_IntegerAND, RK_IntegerADD). We give up if this is
+    // the case.
+    //
+    // The vectorizer relies on InstCombine to perform the actual
+    // type-shrinking. It does this by inserting instructions to truncate the
+    // exit value of the reduction to the width indicated by RecurrenceType and
+    // then extend this value back to the original width. If IsSigned is false,
+    // a 'zext' instruction will be generated; otherwise, a 'sext' will be
+    // used.
+    //
+    // TODO: We should not rely on InstCombine to rewrite the reduction in the
+    //       smaller type. We should just generate a correctly typed expression
+    //       to begin with.
+    Type *ComputedType;
+    std::tie(ComputedType, IsSigned) =
+        computeRecurrenceType(ExitInstruction, DB, AC, DT);
+    if (ComputedType != RecurrenceType)
       return false;
 
+    // The recurrence expression will be represented in a narrower type. If
+    // there are any cast instructions that will be unnecessary, collect them
+    // in CastInsts. Note that the 'and' instruction was already included in
+    // this list.
+    //
+    // TODO: A better way to represent this may be to tag in some way all the
+    //       instructions that are a part of the reduction. The vectorizer cost
+    //       model could then apply the recurrence type to these instructions,
+    //       without needing a white list of instructions to ignore.
+    collectCastsToIgnore(TheLoop, ExitInstruction, RecurrenceType, CastInsts);
+  }
+
   // We found a reduction var if we have reached the original phi node and we
   // only have a single instruction with out-of-loop users.
 
   // The ExitInstruction(Instruction which is allowed to have out-of-loop users)
   // is saved as part of the RecurrenceDescriptor.
 
   // Save the description of this reduction variable.
   RecurrenceDescriptor RD(
       RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(),
       ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts);
   RedDes = RD;
 
   return true;
 }
 
 /// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction
 /// pattern corresponding to a min(X, Y) or max(X, Y).
 RecurrenceDescriptor::InstDesc
 RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev) {
 
   assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) &&
          "Expect a select instruction");
   Instruction *Cmp = nullptr;
   SelectInst *Select = nullptr;
 
   // We must handle the select(cmp()) as a single instruction. Advance to the
   // select.
   if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) {
     if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->user_begin())))
       return InstDesc(false, I);
     return InstDesc(Select, Prev.getMinMaxKind());
   }
 
   // Only handle single use cases for now.
   if (!(Select = dyn_cast<SelectInst>(I)))
     return InstDesc(false, I);
   if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) &&
       !(Cmp = dyn_cast<FCmpInst>(I->getOperand(0))))
     return InstDesc(false, I);
   if (!Cmp->hasOneUse())
     return InstDesc(false, I);
 
   Value *CmpLeft;
   Value *CmpRight;
 
   // Look for a min/max pattern.
   if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
     return InstDesc(Select, MRK_UIntMin);
   else if (m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
     return InstDesc(Select, MRK_UIntMax);
   else if (m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
     return InstDesc(Select, MRK_SIntMax);
   else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
     return InstDesc(Select, MRK_SIntMin);
   else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
     return InstDesc(Select, MRK_FloatMin);
   else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
     return InstDesc(Select, MRK_FloatMax);
   else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
     return InstDesc(Select, MRK_FloatMin);
   else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
     return InstDesc(Select, MRK_FloatMax);
 
   return InstDesc(false, I);
 }
 
 RecurrenceDescriptor::InstDesc
 RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
                                         InstDesc &Prev, bool HasFunNoNaNAttr) {
   bool FP = I->getType()->isFloatingPointTy();
   Instruction *UAI = Prev.getUnsafeAlgebraInst();
   if (!UAI && FP && !I->isFast())
     UAI = I; // Found an unsafe (unvectorizable) algebra instruction.
 
   switch (I->getOpcode()) {
   default:
     return InstDesc(false, I);
   case Instruction::PHI:
     return InstDesc(I, Prev.getMinMaxKind(), Prev.getUnsafeAlgebraInst());
   case Instruction::Sub:
   case Instruction::Add:
     return InstDesc(Kind == RK_IntegerAdd, I);
   case Instruction::Mul:
     return InstDesc(Kind == RK_IntegerMult, I);
   case Instruction::And:
     return InstDesc(Kind == RK_IntegerAnd, I);
   case Instruction::Or:
     return InstDesc(Kind == RK_IntegerOr, I);
   case Instruction::Xor:
     return InstDesc(Kind == RK_IntegerXor, I);
   case Instruction::FMul:
     return InstDesc(Kind == RK_FloatMult, I, UAI);
   case Instruction::FSub:
   case Instruction::FAdd:
     return InstDesc(Kind == RK_FloatAdd, I, UAI);
   case Instruction::FCmp:
   case Instruction::ICmp:
   case Instruction::Select:
     if (Kind != RK_IntegerMinMax &&
         (!HasFunNoNaNAttr || Kind != RK_FloatMinMax))
       return InstDesc(false, I);
     return isMinMaxSelectCmpPattern(I, Prev);
   }
 }
 
 bool RecurrenceDescriptor::hasMultipleUsesOf(
     Instruction *I, SmallPtrSetImpl<Instruction *> &Insts) {
   unsigned NumUses = 0;
   for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E;
        ++Use) {
     if (Insts.count(dyn_cast<Instruction>(*Use)))
       ++NumUses;
     if (NumUses > 1)
       return true;
   }
 
   return false;
 }
 bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
-                                          RecurrenceDescriptor &RedDes) {
+                                          RecurrenceDescriptor &RedDes,
+                                          DemandedBits *DB, AssumptionCache *AC,
+                                          DominatorTree *DT) {
 
   BasicBlock *Header = TheLoop->getHeader();
   Function &F = *Header->getParent();
   bool HasFunNoNaNAttr =
       F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
 
-  if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes)) {
+  if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+                      AC, DT)) {
     DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n");
     return true;
   }
-  if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes)) {
+  if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+                      AC, DT)) {
     DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n");
     return true;
   }
-  if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes)) {
+  if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+                      AC, DT)) {
     DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n");
     return true;
   }
-  if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes)) {
+  if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+                      AC, DT)) {
     DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n");
     return true;
   }
-  if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes)) {
+  if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+                      AC, DT)) {
     DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n");
     return true;
   }
-  if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr,
-                      RedDes)) {
+  if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr, RedDes,
+                      DB, AC, DT)) {
     DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n");
     return true;
   }
-  if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes)) {
+  if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+                      AC, DT)) {
     DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n");
     return true;
   }
-  if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes)) {
+  if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+                      AC, DT)) {
     DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n");
     return true;
   }
-  if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes)) {
+  if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+                      AC, DT)) {
     DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi << "\n");
     return true;
   }
   // Not a reduction of known type.
   return false;
 }
 
 bool RecurrenceDescriptor::isFirstOrderRecurrence(
     PHINode *Phi, Loop *TheLoop,
     DenseMap<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT) {
 
   // Ensure the phi node is in the loop header and has two incoming values.
   if (Phi->getParent() != TheLoop->getHeader() ||
       Phi->getNumIncomingValues() != 2)
     return false;
 
   // Ensure the loop has a preheader and a single latch block. The loop
   // vectorizer will need the latch to set up the next iteration of the loop.
   auto *Preheader = TheLoop->getLoopPreheader();
   auto *Latch = TheLoop->getLoopLatch();
   if (!Preheader || !Latch)
     return false;
 
   // Ensure the phi node's incoming blocks are the loop preheader and latch.
   if (Phi->getBasicBlockIndex(Preheader) < 0 ||
       Phi->getBasicBlockIndex(Latch) < 0)
     return false;
 
   // Get the previous value. The previous value comes from the latch edge while
   // the initial value comes form the preheader edge.
   auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch));
   if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous) ||
       SinkAfter.count(Previous)) // Cannot rely on dominance due to motion.
     return false;
 
   // Ensure every user of the phi node is dominated by the previous value.
   // The dominance requirement ensures the loop vectorizer will not need to
   // vectorize the initial value prior to the first iteration of the loop.
   // TODO: Consider extending this sinking to handle other kinds of instructions
   // and expressions, beyond sinking a single cast past Previous.
   if (Phi->hasOneUse()) {
     auto *I = Phi->user_back();
     if (I->isCast() && (I->getParent() == Phi->getParent()) && I->hasOneUse() &&
         DT->dominates(Previous, I->user_back())) {
       if (!DT->dominates(Previous, I)) // Otherwise we're good w/o sinking.
         SinkAfter[I] = Previous;
       return true;
     }
   }
 
   for (User *U : Phi->users())
     if (auto *I = dyn_cast<Instruction>(U)) {
       if (!DT->dominates(Previous, I))
         return false;
     }
 
   return true;
 }
 
 /// This function returns the identity element (or neutral element) for
 /// the operation K.
 Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurrenceKind K,
                                                       Type *Tp) {
   switch (K) {
   case RK_IntegerXor:
   case RK_IntegerAdd:
   case RK_IntegerOr:
     // Adding, Xoring, Oring zero to a number does not change it.
     return ConstantInt::get(Tp, 0);
   case RK_IntegerMult:
     // Multiplying a number by 1 does not change it.
     return ConstantInt::get(Tp, 1);
   case RK_IntegerAnd:
     // AND-ing a number with an all-1 value does not change it.
     return ConstantInt::get(Tp, -1, true);
   case RK_FloatMult:
     // Multiplying a number by 1 does not change it.
     return ConstantFP::get(Tp, 1.0L);
   case RK_FloatAdd:
     // Adding zero to a number does not change it.
     return ConstantFP::get(Tp, 0.0L);
   default:
     llvm_unreachable("Unknown recurrence kind");
   }
 }
 
 /// This function translates the recurrence kind to an LLVM binary operator.
 unsigned RecurrenceDescriptor::getRecurrenceBinOp(RecurrenceKind Kind) {
   switch (Kind) {
   case RK_IntegerAdd:
     return Instruction::Add;
   case RK_IntegerMult:
     return Instruction::Mul;
   case RK_IntegerOr:
     return Instruction::Or;
   case RK_IntegerAnd:
     return Instruction::And;
   case RK_IntegerXor:
     return Instruction::Xor;
   case RK_FloatMult:
     return Instruction::FMul;
   case RK_FloatAdd:
     return Instruction::FAdd;
   case RK_IntegerMinMax:
     return Instruction::ICmp;
   case RK_FloatMinMax:
     return Instruction::FCmp;
   default:
     llvm_unreachable("Unknown recurrence operation");
   }
 }
 
 Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder,
                                             MinMaxRecurrenceKind RK,
                                             Value *Left, Value *Right) {
   CmpInst::Predicate P = CmpInst::ICMP_NE;
   switch (RK) {
   default:
     llvm_unreachable("Unknown min/max recurrence kind");
   case MRK_UIntMin:
     P = CmpInst::ICMP_ULT;
     break;
   case MRK_UIntMax:
     P = CmpInst::ICMP_UGT;
     break;
   case MRK_SIntMin:
     P = CmpInst::ICMP_SLT;
     break;
   case MRK_SIntMax:
     P = CmpInst::ICMP_SGT;
     break;
   case MRK_FloatMin:
     P = CmpInst::FCMP_OLT;
     break;
   case MRK_FloatMax:
     P = CmpInst::FCMP_OGT;
     break;
   }
 
   // We only match FP sequences that are 'fast', so we can unconditionally
   // set it on any generated instructions.
   IRBuilder<>::FastMathFlagGuard FMFG(Builder);
   FastMathFlags FMF;
   FMF.setFast();
   Builder.setFastMathFlags(FMF);
 
   Value *Cmp;
   if (RK == MRK_FloatMin || RK == MRK_FloatMax)
     Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp");
   else
     Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp");
 
   Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
   return Select;
 }
 
 InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
                                          const SCEV *Step, BinaryOperator *BOp,
                                          SmallVectorImpl<Instruction *> *Casts)
   : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) {
   assert(IK != IK_NoInduction && "Not an induction");
 
   // Start value type should match the induction kind and the value
   // itself should not be null.
   assert(StartValue && "StartValue is null");
   assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) &&
          "StartValue is not a pointer for pointer induction");
   assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) &&
          "StartValue is not an integer for integer induction");
 
   // Check the Step Value. It should be non-zero integer value.
   assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) &&
          "Step value is zero");
 
   assert((IK != IK_PtrInduction || getConstIntStepValue()) &&
          "Step value should be constant for pointer induction");
   assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) &&
          "StepValue is not an integer");
 
   assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) &&
          "StepValue is not FP for FpInduction");
   assert((IK != IK_FpInduction || (InductionBinOp &&
           (InductionBinOp->getOpcode() == Instruction::FAdd ||
            InductionBinOp->getOpcode() == Instruction::FSub))) &&
          "Binary opcode should be specified for FP induction");
 
   if (Casts) {
     for (auto &Inst : *Casts) {
       RedundantCasts.push_back(Inst);
     }
   }
 }
 
 int InductionDescriptor::getConsecutiveDirection() const {
   ConstantInt *ConstStep = getConstIntStepValue();
   if (ConstStep && (ConstStep->isOne() || ConstStep->isMinusOne()))
     return ConstStep->getSExtValue();
   return 0;
 }
 
 ConstantInt *InductionDescriptor::getConstIntStepValue() const {
   if (isa<SCEVConstant>(Step))
     return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue());
   return nullptr;
 }
 
 Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index,
                                       ScalarEvolution *SE,
                                       const DataLayout& DL) const {
 
   SCEVExpander Exp(*SE, DL, "induction");
   assert(Index->getType() == Step->getType() &&
          "Index type does not match StepValue type");
   switch (IK) {
   case IK_IntInduction: {
     assert(Index->getType() == StartValue->getType() &&
            "Index type does not match StartValue type");
 
     // FIXME: Theoretically, we can call getAddExpr() of ScalarEvolution
     // and calculate (Start + Index * Step) for all cases, without
     // special handling for "isOne" and "isMinusOne".
     // But in the real life the result code getting worse. We mix SCEV
     // expressions and ADD/SUB operations and receive redundant
     // intermediate values being calculated in different ways and
     // Instcombine is unable to reduce them all.
 
     if (getConstIntStepValue() &&
         getConstIntStepValue()->isMinusOne())
       return B.CreateSub(StartValue, Index);
     if (getConstIntStepValue() &&
         getConstIntStepValue()->isOne())
       return B.CreateAdd(StartValue, Index);
     const SCEV *S = SE->getAddExpr(SE->getSCEV(StartValue),
                                    SE->getMulExpr(Step, SE->getSCEV(Index)));
     return Exp.expandCodeFor(S, StartValue->getType(), &*B.GetInsertPoint());
   }
   case IK_PtrInduction: {
     assert(isa<SCEVConstant>(Step) &&
            "Expected constant step for pointer induction");
     const SCEV *S = SE->getMulExpr(SE->getSCEV(Index), Step);
     Index = Exp.expandCodeFor(S, Index->getType(), &*B.GetInsertPoint());
     return B.CreateGEP(nullptr, StartValue, Index);
   }
   case IK_FpInduction: {
     assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value");
     assert(InductionBinOp &&
            (InductionBinOp->getOpcode() == Instruction::FAdd ||
             InductionBinOp->getOpcode() == Instruction::FSub) &&
            "Original bin op should be defined for FP induction");
 
     Value *StepValue = cast<SCEVUnknown>(Step)->getValue();
 
     // Floating point operations had to be 'fast' to enable the induction.
     FastMathFlags Flags;
     Flags.setFast();
 
     Value *MulExp = B.CreateFMul(StepValue, Index);
     if (isa<Instruction>(MulExp))
       // We have to check, the MulExp may be a constant.
       cast<Instruction>(MulExp)->setFastMathFlags(Flags);
 
     Value *BOp = B.CreateBinOp(InductionBinOp->getOpcode() , StartValue,
                                MulExp, "induction");
     if (isa<Instruction>(BOp))
       cast<Instruction>(BOp)->setFastMathFlags(Flags);
 
     return BOp;
   }
   case IK_NoInduction:
     return nullptr;
   }
   llvm_unreachable("invalid enum");
 }
 
 bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop,
                                            ScalarEvolution *SE,
                                            InductionDescriptor &D) {
 
   // Here we only handle FP induction variables.
   assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type");
 
   if (TheLoop->getHeader() != Phi->getParent())
     return false;
 
   // The loop may have multiple entrances or multiple exits; we can analyze
   // this phi if it has a unique entry value and a unique backedge value.
   if (Phi->getNumIncomingValues() != 2)
     return false;
   Value *BEValue = nullptr, *StartValue = nullptr;
   if (TheLoop->contains(Phi->getIncomingBlock(0))) {
     BEValue = Phi->getIncomingValue(0);
     StartValue = Phi->getIncomingValue(1);
   } else {
     assert(TheLoop->contains(Phi->getIncomingBlock(1)) &&
            "Unexpected Phi node in the loop");
     BEValue = Phi->getIncomingValue(1);
     StartValue = Phi->getIncomingValue(0);
   }
 
   BinaryOperator *BOp = dyn_cast<BinaryOperator>(BEValue);
   if (!BOp)
     return false;
 
   Value *Addend = nullptr;
   if (BOp->getOpcode() == Instruction::FAdd) {
     if (BOp->getOperand(0) == Phi)
       Addend = BOp->getOperand(1);
     else if (BOp->getOperand(1) == Phi)
       Addend = BOp->getOperand(0);
   } else if (BOp->getOpcode() == Instruction::FSub)
     if (BOp->getOperand(0) == Phi)
       Addend = BOp->getOperand(1);
 
   if (!Addend)
     return false;
 
   // The addend should be loop invariant
   if (auto *I = dyn_cast<Instruction>(Addend))
     if (TheLoop->contains(I))
       return false;
 
   // FP Step has unknown SCEV
   const SCEV *Step = SE->getUnknown(Addend);
   D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp);
   return true;
 }
 
 /// This function is called when we suspect that the update-chain of a phi node
 /// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts, 
 /// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime 
 /// predicate P under which the SCEV expression for the phi can be the 
 /// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the 
 /// cast instructions that are involved in the update-chain of this induction. 
 /// A caller that adds the required runtime predicate can be free to drop these 
 /// cast instructions, and compute the phi using \p AR (instead of some scev 
 /// expression with casts).
 ///
 /// For example, without a predicate the scev expression can take the following
 /// form:
 ///      (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy)
 ///
 /// It corresponds to the following IR sequence:
 /// %for.body:
 ///   %x = phi i64 [ 0, %ph ], [ %add, %for.body ]
 ///   %casted_phi = "ExtTrunc i64 %x"
 ///   %add = add i64 %casted_phi, %step
 ///
 /// where %x is given in \p PN,
 /// PSE.getSCEV(%x) is equal to PSE.getSCEV(%casted_phi) under a predicate,
 /// and the IR sequence that "ExtTrunc i64 %x" represents can take one of
 /// several forms, for example, such as:
 ///   ExtTrunc1:    %casted_phi = and  %x, 2^n-1
 /// or:
 ///   ExtTrunc2:    %t = shl %x, m
 ///                 %casted_phi = ashr %t, m
 ///
 /// If we are able to find such sequence, we return the instructions
 /// we found, namely %casted_phi and the instructions on its use-def chain up
 /// to the phi (not including the phi).
 static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE,
                                     const SCEVUnknown *PhiScev,
                                     const SCEVAddRecExpr *AR,
                                     SmallVectorImpl<Instruction *> &CastInsts) {
 
   assert(CastInsts.empty() && "CastInsts is expected to be empty.");
   auto *PN = cast<PHINode>(PhiScev->getValue());
   assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression");
   const Loop *L = AR->getLoop();
 
   // Find any cast instructions that participate in the def-use chain of 
   // PhiScev in the loop.
   // FORNOW/TODO: We currently expect the def-use chain to include only
   // two-operand instructions, where one of the operands is an invariant.
   // createAddRecFromPHIWithCasts() currently does not support anything more
   // involved than that, so we keep the search simple. This can be
   // extended/generalized as needed.
 
   auto getDef = [&](const Value *Val) -> Value * {
     const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val);
     if (!BinOp)
       return nullptr;
     Value *Op0 = BinOp->getOperand(0);
     Value *Op1 = BinOp->getOperand(1);
     Value *Def = nullptr;
     if (L->isLoopInvariant(Op0))
       Def = Op1;
     else if (L->isLoopInvariant(Op1))
       Def = Op0;
     return Def;
   };
 
   // Look for the instruction that defines the induction via the
   // loop backedge.
   BasicBlock *Latch = L->getLoopLatch();
   if (!Latch)
     return false;
   Value *Val = PN->getIncomingValueForBlock(Latch);
   if (!Val)
     return false;
 
   // Follow the def-use chain until the induction phi is reached.
   // If on the way we encounter a Value that has the same SCEV Expr as the
   // phi node, we can consider the instructions we visit from that point
   // as part of the cast-sequence that can be ignored.
   bool InCastSequence = false;
   auto *Inst = dyn_cast<Instruction>(Val);
   while (Val != PN) {
     // If we encountered a phi node other than PN, or if we left the loop,
     // we bail out.
     if (!Inst || !L->contains(Inst)) {
       return false;
     }
     auto *AddRec = dyn_cast<SCEVAddRecExpr>(PSE.getSCEV(Val));
     if (AddRec && PSE.areAddRecsEqualWithPreds(AddRec, AR))
       InCastSequence = true;
     if (InCastSequence) {
       // Only the last instruction in the cast sequence is expected to have
       // uses outside the induction def-use chain.
       if (!CastInsts.empty())
         if (!Inst->hasOneUse())
           return false;
       CastInsts.push_back(Inst);
     }
     Val = getDef(Val);
     if (!Val)
       return false;
     Inst = dyn_cast<Instruction>(Val);
   }
 
   return InCastSequence;
 }
 
 bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
                                          PredicatedScalarEvolution &PSE,
                                          InductionDescriptor &D,
                                          bool Assume) {
   Type *PhiTy = Phi->getType();
 
   // Handle integer and pointer inductions variables.
   // Now we handle also FP induction but not trying to make a
   // recurrent expression from the PHI node in-place.
 
   if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() &&
       !PhiTy->isFloatTy() && !PhiTy->isDoubleTy() && !PhiTy->isHalfTy())
     return false;
 
   if (PhiTy->isFloatingPointTy())
     return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D);
 
   const SCEV *PhiScev = PSE.getSCEV(Phi);
   const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
 
   // We need this expression to be an AddRecExpr.
   if (Assume && !AR)
     AR = PSE.getAsAddRec(Phi);
 
   if (!AR) {
     DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
     return false;
   }
 
   // Record any Cast instructions that participate in the induction update
   const auto *SymbolicPhi = dyn_cast<SCEVUnknown>(PhiScev);
   // If we started from an UnknownSCEV, and managed to build an addRecurrence
   // only after enabling Assume with PSCEV, this means we may have encountered
   // cast instructions that required adding a runtime check in order to
   // guarantee the correctness of the AddRecurence respresentation of the
   // induction.
   if (PhiScev != AR && SymbolicPhi) {
     SmallVector<Instruction *, 2> Casts;
     if (getCastsForInductionPHI(PSE, SymbolicPhi, AR, Casts))
       return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR, &Casts);
   }
 
   return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR);
 }
 
 bool InductionDescriptor::isInductionPHI(
     PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE,
     InductionDescriptor &D, const SCEV *Expr,
     SmallVectorImpl<Instruction *> *CastsToIgnore) {
   Type *PhiTy = Phi->getType();
   // We only handle integer and pointer inductions variables.
   if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
     return false;
 
   // Check that the PHI is consecutive.
   const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi);
   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
 
   if (!AR) {
     DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
     return false;
   }
 
   if (AR->getLoop() != TheLoop) {
     // FIXME: We should treat this as a uniform. Unfortunately, we
     // don't currently know how to handled uniform PHIs.
     DEBUG(dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n");
     return false;
   }
 
   Value *StartValue =
     Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader());
   const SCEV *Step = AR->getStepRecurrence(*SE);
   // Calculate the pointer stride and check if it is consecutive.
   // The stride may be a constant or a loop invariant integer value.
   const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step);
   if (!ConstStep && !SE->isLoopInvariant(Step, TheLoop))
     return false;
 
   if (PhiTy->isIntegerTy()) {
     D = InductionDescriptor(StartValue, IK_IntInduction, Step, /*BOp=*/ nullptr,
                             CastsToIgnore);
     return true;
   }
 
   assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
   // Pointer induction should be a constant.
   if (!ConstStep)
     return false;
 
   ConstantInt *CV = ConstStep->getValue();
   Type *PointerElementType = PhiTy->getPointerElementType();
   // The pointer stride cannot be determined if the pointer element type is not
   // sized.
   if (!PointerElementType->isSized())
     return false;
 
   const DataLayout &DL = Phi->getModule()->getDataLayout();
   int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType));
   if (!Size)
     return false;
 
   int64_t CVSize = CV->getSExtValue();
   if (CVSize % Size)
     return false;
   auto *StepValue = SE->getConstant(CV->getType(), CVSize / Size,
                                     true /* signed */);
   D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue);
   return true;
 }
 
 bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
                                    bool PreserveLCSSA) {
   bool Changed = false;
 
   // We re-use a vector for the in-loop predecesosrs.
   SmallVector<BasicBlock *, 4> InLoopPredecessors;
 
   auto RewriteExit = [&](BasicBlock *BB) {
     assert(InLoopPredecessors.empty() &&
            "Must start with an empty predecessors list!");
     auto Cleanup = make_scope_exit([&] { InLoopPredecessors.clear(); });
 
     // See if there are any non-loop predecessors of this exit block and
     // keep track of the in-loop predecessors.
     bool IsDedicatedExit = true;
     for (auto *PredBB : predecessors(BB))
       if (L->contains(PredBB)) {
         if (isa<IndirectBrInst>(PredBB->getTerminator()))
           // We cannot rewrite exiting edges from an indirectbr.
           return false;
 
         InLoopPredecessors.push_back(PredBB);
       } else {
         IsDedicatedExit = false;
       }
 
     assert(!InLoopPredecessors.empty() && "Must have *some* loop predecessor!");
 
     // Nothing to do if this is already a dedicated exit.
     if (IsDedicatedExit)
       return false;
 
     auto *NewExitBB = SplitBlockPredecessors(
         BB, InLoopPredecessors, ".loopexit", DT, LI, PreserveLCSSA);
 
     if (!NewExitBB)
       DEBUG(dbgs() << "WARNING: Can't create a dedicated exit block for loop: "
                    << *L << "\n");
     else
       DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
                    << NewExitBB->getName() << "\n");
     return true;
   };
 
   // Walk the exit blocks directly rather than building up a data structure for
   // them, but only visit each one once.
   SmallPtrSet<BasicBlock *, 4> Visited;
   for (auto *BB : L->blocks())
     for (auto *SuccBB : successors(BB)) {
       // We're looking for exit blocks so skip in-loop successors.
       if (L->contains(SuccBB))
         continue;
 
       // Visit each exit block exactly once.
       if (!Visited.insert(SuccBB).second)
         continue;
 
       Changed |= RewriteExit(SuccBB);
     }
 
   return Changed;
 }
 
 /// \brief Returns the instructions that use values defined in the loop.
 SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) {
   SmallVector<Instruction *, 8> UsedOutside;
 
   for (auto *Block : L->getBlocks())
     // FIXME: I believe that this could use copy_if if the Inst reference could
     // be adapted into a pointer.
     for (auto &Inst : *Block) {
       auto Users = Inst.users();
       if (any_of(Users, [&](User *U) {
             auto *Use = cast<Instruction>(U);
             return !L->contains(Use->getParent());
           }))
         UsedOutside.push_back(&Inst);
     }
 
   return UsedOutside;
 }
 
 void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) {
   // By definition, all loop passes need the LoopInfo analysis and the
   // Dominator tree it depends on. Because they all participate in the loop
   // pass manager, they must also preserve these.
   AU.addRequired<DominatorTreeWrapperPass>();
   AU.addPreserved<DominatorTreeWrapperPass>();
   AU.addRequired<LoopInfoWrapperPass>();
   AU.addPreserved<LoopInfoWrapperPass>();
 
   // We must also preserve LoopSimplify and LCSSA. We locally access their IDs
   // here because users shouldn't directly get them from this header.
   extern char &LoopSimplifyID;
   extern char &LCSSAID;
   AU.addRequiredID(LoopSimplifyID);
   AU.addPreservedID(LoopSimplifyID);
   AU.addRequiredID(LCSSAID);
   AU.addPreservedID(LCSSAID);
   // This is used in the LPPassManager to perform LCSSA verification on passes
   // which preserve lcssa form
   AU.addRequired<LCSSAVerificationPass>();
   AU.addPreserved<LCSSAVerificationPass>();
 
   // Loop passes are designed to run inside of a loop pass manager which means
   // that any function analyses they require must be required by the first loop
   // pass in the manager (so that it is computed before the loop pass manager
   // runs) and preserved by all loop pasess in the manager. To make this
   // reasonably robust, the set needed for most loop passes is maintained here.
   // If your loop pass requires an analysis not listed here, you will need to
   // carefully audit the loop pass manager nesting structure that results.
   AU.addRequired<AAResultsWrapperPass>();
   AU.addPreserved<AAResultsWrapperPass>();
   AU.addPreserved<BasicAAWrapperPass>();
   AU.addPreserved<GlobalsAAWrapperPass>();
   AU.addPreserved<SCEVAAWrapperPass>();
   AU.addRequired<ScalarEvolutionWrapperPass>();
   AU.addPreserved<ScalarEvolutionWrapperPass>();
 }
 
 /// Manually defined generic "LoopPass" dependency initialization. This is used
 /// to initialize the exact set of passes from above in \c
 /// getLoopAnalysisUsage. It can be used within a loop pass's initialization
 /// with:
 ///
 ///   INITIALIZE_PASS_DEPENDENCY(LoopPass)
 ///
 /// As-if "LoopPass" were a pass.
 void llvm::initializeLoopPassPass(PassRegistry &Registry) {
   INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
   INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
   INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
   INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
   INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
   INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
   INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
   INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
   INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
 }
 
 /// \brief Find string metadata for loop
 ///
 /// If it has a value (e.g. {"llvm.distribute", 1} return the value as an
 /// operand or null otherwise.  If the string metadata is not found return
 /// Optional's not-a-value.
 Optional<const MDOperand *> llvm::findStringMetadataForLoop(Loop *TheLoop,
                                                             StringRef Name) {
   MDNode *LoopID = TheLoop->getLoopID();
   // Return none if LoopID is false.
   if (!LoopID)
     return None;
 
   // First operand should refer to the loop id itself.
   assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
   assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
 
   // Iterate over LoopID operands and look for MDString Metadata
   for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
     MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
     if (!MD)
       continue;
     MDString *S = dyn_cast<MDString>(MD->getOperand(0));
     if (!S)
       continue;
     // Return true if MDString holds expected MetaData.
     if (Name.equals(S->getString()))
       switch (MD->getNumOperands()) {
       case 1:
         return nullptr;
       case 2:
         return &MD->getOperand(1);
       default:
         llvm_unreachable("loop metadata has 0 or 1 operand");
       }
   }
   return None;
 }
 
 /// Does a BFS from a given node to all of its children inside a given loop.
 /// The returned vector of nodes includes the starting point.
 SmallVector<DomTreeNode *, 16>
 llvm::collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop) {
   SmallVector<DomTreeNode *, 16> Worklist;
   auto AddRegionToWorklist = [&](DomTreeNode *DTN) {
     // Only include subregions in the top level loop.
     BasicBlock *BB = DTN->getBlock();
     if (CurLoop->contains(BB))
       Worklist.push_back(DTN);
   };
 
   AddRegionToWorklist(N);
 
   for (size_t I = 0; I < Worklist.size(); I++)
     for (DomTreeNode *Child : Worklist[I]->getChildren())
       AddRegionToWorklist(Child);
 
   return Worklist;
 }
 
 void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr,
                           ScalarEvolution *SE = nullptr,
                           LoopInfo *LI = nullptr) {
   assert((!DT || L->isLCSSAForm(*DT)) && "Expected LCSSA!");
   auto *Preheader = L->getLoopPreheader();
   assert(Preheader && "Preheader should exist!");
 
   // Now that we know the removal is safe, remove the loop by changing the
   // branch from the preheader to go to the single exit block.
   //
   // Because we're deleting a large chunk of code at once, the sequence in which
   // we remove things is very important to avoid invalidation issues.
 
   // Tell ScalarEvolution that the loop is deleted. Do this before
   // deleting the loop so that ScalarEvolution can look at the loop
   // to determine what it needs to clean up.
   if (SE)
     SE->forgetLoop(L);
 
   auto *ExitBlock = L->getUniqueExitBlock();
   assert(ExitBlock && "Should have a unique exit block!");
   assert(L->hasDedicatedExits() && "Loop should have dedicated exits!");
 
   auto *OldBr = dyn_cast<BranchInst>(Preheader->getTerminator());
   assert(OldBr && "Preheader must end with a branch");
   assert(OldBr->isUnconditional() && "Preheader must have a single successor");
   // Connect the preheader to the exit block. Keep the old edge to the header
   // around to perform the dominator tree update in two separate steps
   // -- #1 insertion of the edge preheader -> exit and #2 deletion of the edge
   // preheader -> header.
   //
   //
   // 0.  Preheader          1.  Preheader           2.  Preheader
   //        |                    |   |                   |
   //        V                    |   V                   |
   //      Header <--\            | Header <--\           | Header <--\
   //       |  |     |            |  |  |     |           |  |  |     |
   //       |  V     |            |  |  V     |           |  |  V     |
   //       | Body --/            |  | Body --/           |  | Body --/
   //       V                     V  V                    V  V
   //      Exit                   Exit                    Exit
   //
   // By doing this is two separate steps we can perform the dominator tree
   // update without using the batch update API.
   //
   // Even when the loop is never executed, we cannot remove the edge from the
   // source block to the exit block. Consider the case where the unexecuted loop
   // branches back to an outer loop. If we deleted the loop and removed the edge
   // coming to this inner loop, this will break the outer loop structure (by
   // deleting the backedge of the outer loop). If the outer loop is indeed a
   // non-loop, it will be deleted in a future iteration of loop deletion pass.
   IRBuilder<> Builder(OldBr);
   Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock);
   // Remove the old branch. The conditional branch becomes a new terminator.
   OldBr->eraseFromParent();
 
   // Rewrite phis in the exit block to get their inputs from the Preheader
   // instead of the exiting block.
   for (PHINode &P : ExitBlock->phis()) {
     // Set the zero'th element of Phi to be from the preheader and remove all
     // other incoming values. Given the loop has dedicated exits, all other
     // incoming values must be from the exiting blocks.
     int PredIndex = 0;
     P.setIncomingBlock(PredIndex, Preheader);
     // Removes all incoming values from all other exiting blocks (including
     // duplicate values from an exiting block).
     // Nuke all entries except the zero'th entry which is the preheader entry.
     // NOTE! We need to remove Incoming Values in the reverse order as done
     // below, to keep the indices valid for deletion (removeIncomingValues
     // updates getNumIncomingValues and shifts all values down into the operand
     // being deleted).
     for (unsigned i = 0, e = P.getNumIncomingValues() - 1; i != e; ++i)
       P.removeIncomingValue(e - i, false);
 
     assert((P.getNumIncomingValues() == 1 &&
             P.getIncomingBlock(PredIndex) == Preheader) &&
            "Should have exactly one value and that's from the preheader!");
   }
 
   // Disconnect the loop body by branching directly to its exit.
   Builder.SetInsertPoint(Preheader->getTerminator());
   Builder.CreateBr(ExitBlock);
   // Remove the old branch.
   Preheader->getTerminator()->eraseFromParent();
 
   if (DT) {
     // Update the dominator tree by informing it about the new edge from the
     // preheader to the exit.
     DT->insertEdge(Preheader, ExitBlock);
     // Inform the dominator tree about the removed edge.
     DT->deleteEdge(Preheader, L->getHeader());
   }
 
   // Remove the block from the reference counting scheme, so that we can
   // delete it freely later.
   for (auto *Block : L->blocks())
     Block->dropAllReferences();
 
   if (LI) {
     // Erase the instructions and the blocks without having to worry
     // about ordering because we already dropped the references.
     // NOTE: This iteration is safe because erasing the block does not remove
     // its entry from the loop's block list.  We do that in the next section.
     for (Loop::block_iterator LpI = L->block_begin(), LpE = L->block_end();
          LpI != LpE; ++LpI)
       (*LpI)->eraseFromParent();
 
     // Finally, the blocks from loopinfo.  This has to happen late because
     // otherwise our loop iterators won't work.
 
     SmallPtrSet<BasicBlock *, 8> blocks;
     blocks.insert(L->block_begin(), L->block_end());
     for (BasicBlock *BB : blocks)
       LI->removeBlock(BB);
 
     // The last step is to update LoopInfo now that we've eliminated this loop.
     LI->erase(L);
   }
 }
 
 /// Returns true if the instruction in a loop is guaranteed to execute at least
 /// once.
 bool llvm::isGuaranteedToExecute(const Instruction &Inst,
                                  const DominatorTree *DT, const Loop *CurLoop,
                                  const LoopSafetyInfo *SafetyInfo) {
   // We have to check to make sure that the instruction dominates all
   // of the exit blocks.  If it doesn't, then there is a path out of the loop
   // which does not execute this instruction, so we can't hoist it.
 
   // If the instruction is in the header block for the loop (which is very
   // common), it is always guaranteed to dominate the exit blocks.  Since this
   // is a common case, and can save some work, check it now.
   if (Inst.getParent() == CurLoop->getHeader())
     // If there's a throw in the header block, we can't guarantee we'll reach
     // Inst.
     return !SafetyInfo->HeaderMayThrow;
 
   // Somewhere in this loop there is an instruction which may throw and make us
   // exit the loop.
   if (SafetyInfo->MayThrow)
     return false;
 
   // Get the exit blocks for the current loop.
   SmallVector<BasicBlock *, 8> ExitBlocks;
   CurLoop->getExitBlocks(ExitBlocks);
 
   // Verify that the block dominates each of the exit blocks of the loop.
   for (BasicBlock *ExitBlock : ExitBlocks)
     if (!DT->dominates(Inst.getParent(), ExitBlock))
       return false;
 
   // As a degenerate case, if the loop is statically infinite then we haven't
   // proven anything since there are no exit blocks.
   if (ExitBlocks.empty())
     return false;
 
   // FIXME: In general, we have to prove that the loop isn't an infinite loop.
   // See http::llvm.org/PR24078 .  (The "ExitBlocks.empty()" check above is
   // just a special case of this.)
   return true;
 }
 
 Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) {
   // Only support loops with a unique exiting block, and a latch.
   if (!L->getExitingBlock())
     return None;
 
   // Get the branch weights for the the loop's backedge.
   BranchInst *LatchBR =
       dyn_cast<BranchInst>(L->getLoopLatch()->getTerminator());
   if (!LatchBR || LatchBR->getNumSuccessors() != 2)
     return None;
 
   assert((LatchBR->getSuccessor(0) == L->getHeader() ||
           LatchBR->getSuccessor(1) == L->getHeader()) &&
          "At least one edge out of the latch must go to the header");
 
   // To estimate the number of times the loop body was executed, we want to
   // know the number of times the backedge was taken, vs. the number of times
   // we exited the loop.
   uint64_t TrueVal, FalseVal;
   if (!LatchBR->extractProfMetadata(TrueVal, FalseVal))
     return None;
 
   if (!TrueVal || !FalseVal)
     return 0;
 
   // Divide the count of the backedge by the count of the edge exiting the loop,
   // rounding to nearest.
   if (LatchBR->getSuccessor(0) == L->getHeader())
     return (TrueVal + (FalseVal / 2)) / FalseVal;
   else
     return (FalseVal + (TrueVal / 2)) / TrueVal;
 }
 
 /// \brief Adds a 'fast' flag to floating point operations.
 static Value *addFastMathFlag(Value *V) {
   if (isa<FPMathOperator>(V)) {
     FastMathFlags Flags;
     Flags.setFast();
     cast<Instruction>(V)->setFastMathFlags(Flags);
   }
   return V;
 }
 
 // Helper to generate a log2 shuffle reduction.
 Value *
 llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
                           RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind,
                           ArrayRef<Value *> RedOps) {
   unsigned VF = Src->getType()->getVectorNumElements();
   // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
   // and vector ops, reducing the set of values being computed by half each
   // round.
   assert(isPowerOf2_32(VF) &&
          "Reduction emission only supported for pow2 vectors!");
   Value *TmpVec = Src;
   SmallVector<Constant *, 32> ShuffleMask(VF, nullptr);
   for (unsigned i = VF; i != 1; i >>= 1) {
     // Move the upper half of the vector to the lower half.
     for (unsigned j = 0; j != i / 2; ++j)
       ShuffleMask[j] = Builder.getInt32(i / 2 + j);
 
     // Fill the rest of the mask with undef.
     std::fill(&ShuffleMask[i / 2], ShuffleMask.end(),
               UndefValue::get(Builder.getInt32Ty()));
 
     Value *Shuf = Builder.CreateShuffleVector(
         TmpVec, UndefValue::get(TmpVec->getType()),
         ConstantVector::get(ShuffleMask), "rdx.shuf");
 
     if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
       // Floating point operations had to be 'fast' to enable the reduction.
       TmpVec = addFastMathFlag(Builder.CreateBinOp((Instruction::BinaryOps)Op,
                                                    TmpVec, Shuf, "bin.rdx"));
     } else {
       assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid &&
              "Invalid min/max");
       TmpVec = RecurrenceDescriptor::createMinMaxOp(Builder, MinMaxKind, TmpVec,
                                                     Shuf);
     }
     if (!RedOps.empty())
       propagateIRFlags(TmpVec, RedOps);
   }
   // The result is in the first element of the vector.
   return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
 }
 
 /// Create a simple vector reduction specified by an opcode and some
 /// flags (if generating min/max reductions).
 Value *llvm::createSimpleTargetReduction(
     IRBuilder<> &Builder, const TargetTransformInfo *TTI, unsigned Opcode,
     Value *Src, TargetTransformInfo::ReductionFlags Flags,
     ArrayRef<Value *> RedOps) {
   assert(isa<VectorType>(Src->getType()) && "Type must be a vector");
 
   Value *ScalarUdf = UndefValue::get(Src->getType()->getVectorElementType());
   std::function<Value*()> BuildFunc;
   using RD = RecurrenceDescriptor;
   RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid;
   // TODO: Support creating ordered reductions.
   FastMathFlags FMFFast;
   FMFFast.setFast();
 
   switch (Opcode) {
   case Instruction::Add:
     BuildFunc = [&]() { return Builder.CreateAddReduce(Src); };
     break;
   case Instruction::Mul:
     BuildFunc = [&]() { return Builder.CreateMulReduce(Src); };
     break;
   case Instruction::And:
     BuildFunc = [&]() { return Builder.CreateAndReduce(Src); };
     break;
   case Instruction::Or:
     BuildFunc = [&]() { return Builder.CreateOrReduce(Src); };
     break;
   case Instruction::Xor:
     BuildFunc = [&]() { return Builder.CreateXorReduce(Src); };
     break;
   case Instruction::FAdd:
     BuildFunc = [&]() {
       auto Rdx = Builder.CreateFAddReduce(ScalarUdf, Src);
       cast<CallInst>(Rdx)->setFastMathFlags(FMFFast);
       return Rdx;
     };
     break;
   case Instruction::FMul:
     BuildFunc = [&]() {
       auto Rdx = Builder.CreateFMulReduce(ScalarUdf, Src);
       cast<CallInst>(Rdx)->setFastMathFlags(FMFFast);
       return Rdx;
     };
     break;
   case Instruction::ICmp:
     if (Flags.IsMaxOp) {
       MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMax : RD::MRK_UIntMax;
       BuildFunc = [&]() {
         return Builder.CreateIntMaxReduce(Src, Flags.IsSigned);
       };
     } else {
       MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMin : RD::MRK_UIntMin;
       BuildFunc = [&]() {
         return Builder.CreateIntMinReduce(Src, Flags.IsSigned);
       };
     }
     break;
   case Instruction::FCmp:
     if (Flags.IsMaxOp) {
       MinMaxKind = RD::MRK_FloatMax;
       BuildFunc = [&]() { return Builder.CreateFPMaxReduce(Src, Flags.NoNaN); };
     } else {
       MinMaxKind = RD::MRK_FloatMin;
       BuildFunc = [&]() { return Builder.CreateFPMinReduce(Src, Flags.NoNaN); };
     }
     break;
   default:
     llvm_unreachable("Unhandled opcode");
     break;
   }
   if (TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags))
     return BuildFunc();
   return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, RedOps);
 }
 
 /// Create a vector reduction using a given recurrence descriptor.
 Value *llvm::createTargetReduction(IRBuilder<> &B,
                                    const TargetTransformInfo *TTI,
                                    RecurrenceDescriptor &Desc, Value *Src,
                                    bool NoNaN) {
   // TODO: Support in-order reductions based on the recurrence descriptor.
   using RD = RecurrenceDescriptor;
   RD::RecurrenceKind RecKind = Desc.getRecurrenceKind();
   TargetTransformInfo::ReductionFlags Flags;
   Flags.NoNaN = NoNaN;
   switch (RecKind) {
   case RD::RK_FloatAdd:
     return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags);
   case RD::RK_FloatMult:
     return createSimpleTargetReduction(B, TTI, Instruction::FMul, Src, Flags);
   case RD::RK_IntegerAdd:
     return createSimpleTargetReduction(B, TTI, Instruction::Add, Src, Flags);
   case RD::RK_IntegerMult:
     return createSimpleTargetReduction(B, TTI, Instruction::Mul, Src, Flags);
   case RD::RK_IntegerAnd:
     return createSimpleTargetReduction(B, TTI, Instruction::And, Src, Flags);
   case RD::RK_IntegerOr:
     return createSimpleTargetReduction(B, TTI, Instruction::Or, Src, Flags);
   case RD::RK_IntegerXor:
     return createSimpleTargetReduction(B, TTI, Instruction::Xor, Src, Flags);
   case RD::RK_IntegerMinMax: {
     RD::MinMaxRecurrenceKind MMKind = Desc.getMinMaxRecurrenceKind();
     Flags.IsMaxOp = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_UIntMax);
     Flags.IsSigned = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_SIntMin);
     return createSimpleTargetReduction(B, TTI, Instruction::ICmp, Src, Flags);
   }
   case RD::RK_FloatMinMax: {
     Flags.IsMaxOp = Desc.getMinMaxRecurrenceKind() == RD::MRK_FloatMax;
     return createSimpleTargetReduction(B, TTI, Instruction::FCmp, Src, Flags);
   }
   default:
     llvm_unreachable("Unhandled RecKind");
   }
 }
 
 void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) {
   auto *VecOp = dyn_cast<Instruction>(I);
   if (!VecOp)
     return;
   auto *Intersection = (OpValue == nullptr) ? dyn_cast<Instruction>(VL[0])
                                             : dyn_cast<Instruction>(OpValue);
   if (!Intersection)
     return;
   const unsigned Opcode = Intersection->getOpcode();
   VecOp->copyIRFlags(Intersection);
   for (auto *V : VL) {
     auto *Instr = dyn_cast<Instruction>(V);
     if (!Instr)
       continue;
     if (OpValue == nullptr || Opcode == Instr->getOpcode())
       VecOp->andIRFlags(V);
   }
 }
Index: head/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- head/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp	(revision 329982)
+++ head/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp	(revision 329983)
@@ -1,8826 +1,8836 @@
 //===- LoopVectorize.cpp - A Loop Vectorizer ------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // This is the LLVM loop vectorizer. This pass modifies 'vectorizable' loops
 // and generates target-independent LLVM-IR.
 // The vectorizer uses the TargetTransformInfo analysis to estimate the costs
 // of instructions in order to estimate the profitability of vectorization.
 //
 // The loop vectorizer combines consecutive loop iterations into a single
 // 'wide' iteration. After this transformation the index is incremented
 // by the SIMD vector width, and not by one.
 //
 // This pass has three parts:
 // 1. The main loop pass that drives the different parts.
 // 2. LoopVectorizationLegality - A unit that checks for the legality
 //    of the vectorization.
 // 3. InnerLoopVectorizer - A unit that performs the actual
 //    widening of instructions.
 // 4. LoopVectorizationCostModel - A unit that checks for the profitability
 //    of vectorization. It decides on the optimal vector width, which
 //    can be one, if vectorization is not profitable.
 //
 //===----------------------------------------------------------------------===//
 //
 // The reduction-variable vectorization is based on the paper:
 //  D. Nuzman and R. Henderson. Multi-platform Auto-vectorization.
 //
 // Variable uniformity checks are inspired by:
 //  Karrenberg, R. and Hack, S. Whole Function Vectorization.
 //
 // The interleaved access vectorization is based on the paper:
 //  Dorit Nuzman, Ira Rosen and Ayal Zaks.  Auto-Vectorization of Interleaved
 //  Data for SIMD
 //
 // Other ideas/concepts are from:
 //  A. Zaks and D. Nuzman. Autovectorization in GCC-two years later.
 //
 //  S. Maleki, Y. Gao, M. Garzaran, T. Wong and D. Padua.  An Evaluation of
 //  Vectorizing Compilers.
 //
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Vectorize/LoopVectorize.h"
 #include "VPlan.h"
 #include "VPlanBuilder.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SCCIterator.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/CodeMetrics.h"
 #include "llvm/Analysis/DemandedBits.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/LoopAccessAnalysis.h"
 #include "llvm/Analysis/LoopAnalysisManager.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopIterator.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Use.h"
 #include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/LoopSimplify.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
 #include "llvm/Transforms/Utils/LoopVersioning.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
 #include <cstdlib>
 #include <functional>
 #include <iterator>
 #include <limits>
 #include <memory>
 #include <string>
 #include <tuple>
 #include <utility>
 #include <vector>
 
 using namespace llvm;
 
 #define LV_NAME "loop-vectorize"
 #define DEBUG_TYPE LV_NAME
 
 STATISTIC(LoopsVectorized, "Number of loops vectorized");
 STATISTIC(LoopsAnalyzed, "Number of loops analyzed for vectorization");
 
 static cl::opt<bool>
     EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
                        cl::desc("Enable if-conversion during vectorization."));
 
 /// Loops with a known constant trip count below this number are vectorized only
 /// if no scalar iteration overheads are incurred.
 static cl::opt<unsigned> TinyTripCountVectorThreshold(
     "vectorizer-min-trip-count", cl::init(16), cl::Hidden,
     cl::desc("Loops with a constant trip count that is smaller than this "
              "value are vectorized only if no scalar iteration overheads "
              "are incurred."));
 
 static cl::opt<bool> MaximizeBandwidth(
     "vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden,
     cl::desc("Maximize bandwidth when selecting vectorization factor which "
              "will be determined by the smallest type in loop."));
 
 static cl::opt<bool> EnableInterleavedMemAccesses(
     "enable-interleaved-mem-accesses", cl::init(false), cl::Hidden,
     cl::desc("Enable vectorization on interleaved memory accesses in a loop"));
 
 /// Maximum factor for an interleaved memory access.
 static cl::opt<unsigned> MaxInterleaveGroupFactor(
     "max-interleave-group-factor", cl::Hidden,
     cl::desc("Maximum factor for an interleaved access group (default = 8)"),
     cl::init(8));
 
 /// We don't interleave loops with a known constant trip count below this
 /// number.
 static const unsigned TinyTripCountInterleaveThreshold = 128;
 
 static cl::opt<unsigned> ForceTargetNumScalarRegs(
     "force-target-num-scalar-regs", cl::init(0), cl::Hidden,
     cl::desc("A flag that overrides the target's number of scalar registers."));
 
 static cl::opt<unsigned> ForceTargetNumVectorRegs(
     "force-target-num-vector-regs", cl::init(0), cl::Hidden,
     cl::desc("A flag that overrides the target's number of vector registers."));
 
 /// Maximum vectorization interleave count.
 static const unsigned MaxInterleaveFactor = 16;
 
 static cl::opt<unsigned> ForceTargetMaxScalarInterleaveFactor(
     "force-target-max-scalar-interleave", cl::init(0), cl::Hidden,
     cl::desc("A flag that overrides the target's max interleave factor for "
              "scalar loops."));
 
 static cl::opt<unsigned> ForceTargetMaxVectorInterleaveFactor(
     "force-target-max-vector-interleave", cl::init(0), cl::Hidden,
     cl::desc("A flag that overrides the target's max interleave factor for "
              "vectorized loops."));
 
 static cl::opt<unsigned> ForceTargetInstructionCost(
     "force-target-instruction-cost", cl::init(0), cl::Hidden,
     cl::desc("A flag that overrides the target's expected cost for "
              "an instruction to a single constant value. Mostly "
              "useful for getting consistent testing."));
 
 static cl::opt<unsigned> SmallLoopCost(
     "small-loop-cost", cl::init(20), cl::Hidden,
     cl::desc(
         "The cost of a loop that is considered 'small' by the interleaver."));
 
 static cl::opt<bool> LoopVectorizeWithBlockFrequency(
     "loop-vectorize-with-block-frequency", cl::init(false), cl::Hidden,
     cl::desc("Enable the use of the block frequency analysis to access PGO "
              "heuristics minimizing code growth in cold regions and being more "
              "aggressive in hot regions."));
 
 // Runtime interleave loops for load/store throughput.
 static cl::opt<bool> EnableLoadStoreRuntimeInterleave(
     "enable-loadstore-runtime-interleave", cl::init(true), cl::Hidden,
     cl::desc(
         "Enable runtime interleaving until load/store ports are saturated"));
 
 /// The number of stores in a loop that are allowed to need predication.
 static cl::opt<unsigned> NumberOfStoresToPredicate(
     "vectorize-num-stores-pred", cl::init(1), cl::Hidden,
     cl::desc("Max number of stores to be predicated behind an if."));
 
 static cl::opt<bool> EnableIndVarRegisterHeur(
     "enable-ind-var-reg-heur", cl::init(true), cl::Hidden,
     cl::desc("Count the induction variable only once when interleaving"));
 
 static cl::opt<bool> EnableCondStoresVectorization(
     "enable-cond-stores-vec", cl::init(true), cl::Hidden,
     cl::desc("Enable if predication of stores during vectorization."));
 
 static cl::opt<unsigned> MaxNestedScalarReductionIC(
     "max-nested-scalar-reduction-interleave", cl::init(2), cl::Hidden,
     cl::desc("The maximum interleave count to use when interleaving a scalar "
              "reduction in a nested loop."));
 
 static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold(
     "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
     cl::desc("The maximum allowed number of runtime memory checks with a "
              "vectorize(enable) pragma."));
 
 static cl::opt<unsigned> VectorizeSCEVCheckThreshold(
     "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
     cl::desc("The maximum number of SCEV checks allowed."));
 
 static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold(
     "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
     cl::desc("The maximum number of SCEV checks allowed with a "
              "vectorize(enable) pragma"));
 
 /// Create an analysis remark that explains why vectorization failed
 ///
 /// \p PassName is the name of the pass (e.g. can be AlwaysPrint).  \p
 /// RemarkName is the identifier for the remark.  If \p I is passed it is an
 /// instruction that prevents vectorization.  Otherwise \p TheLoop is used for
 /// the location of the remark.  \return the remark object that can be
 /// streamed to.
 static OptimizationRemarkAnalysis
 createMissedAnalysis(const char *PassName, StringRef RemarkName, Loop *TheLoop,
                      Instruction *I = nullptr) {
   Value *CodeRegion = TheLoop->getHeader();
   DebugLoc DL = TheLoop->getStartLoc();
 
   if (I) {
     CodeRegion = I->getParent();
     // If there is no debug location attached to the instruction, revert back to
     // using the loop's.
     if (I->getDebugLoc())
       DL = I->getDebugLoc();
   }
 
   OptimizationRemarkAnalysis R(PassName, RemarkName, DL, CodeRegion);
   R << "loop not vectorized: ";
   return R;
 }
 
 namespace {
 
 class LoopVectorizationLegality;
 class LoopVectorizationCostModel;
 class LoopVectorizationRequirements;
 
 } // end anonymous namespace
 
 /// Returns true if the given loop body has a cycle, excluding the loop
 /// itself.
 static bool hasCyclesInLoopBody(const Loop &L) {
   if (!L.empty())
     return true;
 
   for (const auto &SCC :
        make_range(scc_iterator<Loop, LoopBodyTraits>::begin(L),
                   scc_iterator<Loop, LoopBodyTraits>::end(L))) {
     if (SCC.size() > 1) {
       DEBUG(dbgs() << "LVL: Detected a cycle in the loop body:\n");
       DEBUG(L.dump());
       return true;
     }
   }
   return false;
 }
 
 /// A helper function for converting Scalar types to vector types.
 /// If the incoming type is void, we return void. If the VF is 1, we return
 /// the scalar type.
 static Type *ToVectorTy(Type *Scalar, unsigned VF) {
   if (Scalar->isVoidTy() || VF == 1)
     return Scalar;
   return VectorType::get(Scalar, VF);
 }
 
 // FIXME: The following helper functions have multiple implementations
 // in the project. They can be effectively organized in a common Load/Store
 // utilities unit.
 
 /// A helper function that returns the pointer operand of a load or store
 /// instruction.
 static Value *getPointerOperand(Value *I) {
   if (auto *LI = dyn_cast<LoadInst>(I))
     return LI->getPointerOperand();
   if (auto *SI = dyn_cast<StoreInst>(I))
     return SI->getPointerOperand();
   return nullptr;
 }
 
 /// A helper function that returns the type of loaded or stored value.
 static Type *getMemInstValueType(Value *I) {
   assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
          "Expected Load or Store instruction");
   if (auto *LI = dyn_cast<LoadInst>(I))
     return LI->getType();
   return cast<StoreInst>(I)->getValueOperand()->getType();
 }
 
 /// A helper function that returns the alignment of load or store instruction.
 static unsigned getMemInstAlignment(Value *I) {
   assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
          "Expected Load or Store instruction");
   if (auto *LI = dyn_cast<LoadInst>(I))
     return LI->getAlignment();
   return cast<StoreInst>(I)->getAlignment();
 }
 
 /// A helper function that returns the address space of the pointer operand of
 /// load or store instruction.
 static unsigned getMemInstAddressSpace(Value *I) {
   assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
          "Expected Load or Store instruction");
   if (auto *LI = dyn_cast<LoadInst>(I))
     return LI->getPointerAddressSpace();
   return cast<StoreInst>(I)->getPointerAddressSpace();
 }
 
 /// A helper function that returns true if the given type is irregular. The
 /// type is irregular if its allocated size doesn't equal the store size of an
 /// element of the corresponding vector type at the given vectorization factor.
 static bool hasIrregularType(Type *Ty, const DataLayout &DL, unsigned VF) {
   // Determine if an array of VF elements of type Ty is "bitcast compatible"
   // with a <VF x Ty> vector.
   if (VF > 1) {
     auto *VectorTy = VectorType::get(Ty, VF);
     return VF * DL.getTypeAllocSize(Ty) != DL.getTypeStoreSize(VectorTy);
   }
 
   // If the vectorization factor is one, we just check if an array of type Ty
   // requires padding between elements.
   return DL.getTypeAllocSizeInBits(Ty) != DL.getTypeSizeInBits(Ty);
 }
 
 /// A helper function that returns the reciprocal of the block probability of
 /// predicated blocks. If we return X, we are assuming the predicated block
 /// will execute once for for every X iterations of the loop header.
 ///
 /// TODO: We should use actual block probability here, if available. Currently,
 ///       we always assume predicated blocks have a 50% chance of executing.
 static unsigned getReciprocalPredBlockProb() { return 2; }
 
 /// A helper function that adds a 'fast' flag to floating-point operations.
 static Value *addFastMathFlag(Value *V) {
   if (isa<FPMathOperator>(V)) {
     FastMathFlags Flags;
     Flags.setFast();
     cast<Instruction>(V)->setFastMathFlags(Flags);
   }
   return V;
 }
 
 /// A helper function that returns an integer or floating-point constant with
 /// value C.
 static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) {
   return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C)
                            : ConstantFP::get(Ty, C);
 }
 
 namespace llvm {
 
 /// InnerLoopVectorizer vectorizes loops which contain only one basic
 /// block to a specified vectorization factor (VF).
 /// This class performs the widening of scalars into vectors, or multiple
 /// scalars. This class also implements the following features:
 /// * It inserts an epilogue loop for handling loops that don't have iteration
 ///   counts that are known to be a multiple of the vectorization factor.
 /// * It handles the code generation for reduction variables.
 /// * Scalarization (implementation using scalars) of un-vectorizable
 ///   instructions.
 /// InnerLoopVectorizer does not perform any vectorization-legality
 /// checks, and relies on the caller to check for the different legality
 /// aspects. The InnerLoopVectorizer relies on the
 /// LoopVectorizationLegality class to provide information about the induction
 /// and reduction variables that were found to a given vectorization factor.
 class InnerLoopVectorizer {
 public:
   InnerLoopVectorizer(Loop *OrigLoop, PredicatedScalarEvolution &PSE,
                       LoopInfo *LI, DominatorTree *DT,
                       const TargetLibraryInfo *TLI,
                       const TargetTransformInfo *TTI, AssumptionCache *AC,
                       OptimizationRemarkEmitter *ORE, unsigned VecWidth,
                       unsigned UnrollFactor, LoopVectorizationLegality *LVL,
                       LoopVectorizationCostModel *CM)
       : OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TLI(TLI), TTI(TTI),
         AC(AC), ORE(ORE), VF(VecWidth), UF(UnrollFactor),
         Builder(PSE.getSE()->getContext()),
         VectorLoopValueMap(UnrollFactor, VecWidth), Legal(LVL), Cost(CM) {}
   virtual ~InnerLoopVectorizer() = default;
 
   /// Create a new empty loop. Unlink the old loop and connect the new one.
   /// Return the pre-header block of the new loop.
   BasicBlock *createVectorizedLoopSkeleton();
 
   /// Widen a single instruction within the innermost loop.
   void widenInstruction(Instruction &I);
 
   /// Fix the vectorized code, taking care of header phi's, live-outs, and more.
   void fixVectorizedLoop();
 
   // Return true if any runtime check is added.
   bool areSafetyChecksAdded() { return AddedSafetyChecks; }
 
   /// A type for vectorized values in the new loop. Each value from the
   /// original loop, when vectorized, is represented by UF vector values in the
   /// new unrolled loop, where UF is the unroll factor.
   using VectorParts = SmallVector<Value *, 2>;
 
   /// Vectorize a single PHINode in a block. This method handles the induction
   /// variable canonicalization. It supports both VF = 1 for unrolled loops and
   /// arbitrary length vectors.
   void widenPHIInstruction(Instruction *PN, unsigned UF, unsigned VF);
 
   /// A helper function to scalarize a single Instruction in the innermost loop.
   /// Generates a sequence of scalar instances for each lane between \p MinLane
   /// and \p MaxLane, times each part between \p MinPart and \p MaxPart,
   /// inclusive..
   void scalarizeInstruction(Instruction *Instr, const VPIteration &Instance,
                             bool IfPredicateInstr);
 
   /// Widen an integer or floating-point induction variable \p IV. If \p Trunc
   /// is provided, the integer induction variable will first be truncated to
   /// the corresponding type.
   void widenIntOrFpInduction(PHINode *IV, TruncInst *Trunc = nullptr);
 
   /// getOrCreateVectorValue and getOrCreateScalarValue coordinate to generate a
   /// vector or scalar value on-demand if one is not yet available. When
   /// vectorizing a loop, we visit the definition of an instruction before its
   /// uses. When visiting the definition, we either vectorize or scalarize the
   /// instruction, creating an entry for it in the corresponding map. (In some
   /// cases, such as induction variables, we will create both vector and scalar
   /// entries.) Then, as we encounter uses of the definition, we derive values
   /// for each scalar or vector use unless such a value is already available.
   /// For example, if we scalarize a definition and one of its uses is vector,
   /// we build the required vector on-demand with an insertelement sequence
   /// when visiting the use. Otherwise, if the use is scalar, we can use the
   /// existing scalar definition.
   ///
   /// Return a value in the new loop corresponding to \p V from the original
   /// loop at unroll index \p Part. If the value has already been vectorized,
   /// the corresponding vector entry in VectorLoopValueMap is returned. If,
   /// however, the value has a scalar entry in VectorLoopValueMap, we construct
   /// a new vector value on-demand by inserting the scalar values into a vector
   /// with an insertelement sequence. If the value has been neither vectorized
   /// nor scalarized, it must be loop invariant, so we simply broadcast the
   /// value into a vector.
   Value *getOrCreateVectorValue(Value *V, unsigned Part);
 
   /// Return a value in the new loop corresponding to \p V from the original
   /// loop at unroll and vector indices \p Instance. If the value has been
   /// vectorized but not scalarized, the necessary extractelement instruction
   /// will be generated.
   Value *getOrCreateScalarValue(Value *V, const VPIteration &Instance);
 
   /// Construct the vector value of a scalarized value \p V one lane at a time.
   void packScalarIntoVectorValue(Value *V, const VPIteration &Instance);
 
   /// Try to vectorize the interleaved access group that \p Instr belongs to.
   void vectorizeInterleaveGroup(Instruction *Instr);
 
   /// Vectorize Load and Store instructions, optionally masking the vector
   /// operations if \p BlockInMask is non-null.
   void vectorizeMemoryInstruction(Instruction *Instr,
                                   VectorParts *BlockInMask = nullptr);
 
   /// \brief Set the debug location in the builder using the debug location in
   /// the instruction.
   void setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr);
 
 protected:
   friend class LoopVectorizationPlanner;
 
   /// A small list of PHINodes.
   using PhiVector = SmallVector<PHINode *, 4>;
 
   /// A type for scalarized values in the new loop. Each value from the
   /// original loop, when scalarized, is represented by UF x VF scalar values
   /// in the new unrolled loop, where UF is the unroll factor and VF is the
   /// vectorization factor.
   using ScalarParts = SmallVector<SmallVector<Value *, 4>, 2>;
 
   /// Set up the values of the IVs correctly when exiting the vector loop.
   void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
                     Value *CountRoundDown, Value *EndValue,
                     BasicBlock *MiddleBlock);
 
   /// Create a new induction variable inside L.
   PHINode *createInductionVariable(Loop *L, Value *Start, Value *End,
                                    Value *Step, Instruction *DL);
 
   /// Handle all cross-iteration phis in the header.
   void fixCrossIterationPHIs();
 
   /// Fix a first-order recurrence. This is the second phase of vectorizing
   /// this phi node.
   void fixFirstOrderRecurrence(PHINode *Phi);
 
   /// Fix a reduction cross-iteration phi. This is the second phase of
   /// vectorizing this phi node.
   void fixReduction(PHINode *Phi);
 
   /// \brief The Loop exit block may have single value PHI nodes with some
   /// incoming value. While vectorizing we only handled real values
   /// that were defined inside the loop and we should have one value for
   /// each predecessor of its parent basic block. See PR14725.
   void fixLCSSAPHIs();
 
   /// Iteratively sink the scalarized operands of a predicated instruction into
   /// the block that was created for it.
   void sinkScalarOperands(Instruction *PredInst);
 
   /// Shrinks vector element sizes to the smallest bitwidth they can be legally
   /// represented as.
   void truncateToMinimalBitwidths();
 
   /// Insert the new loop to the loop hierarchy and pass manager
   /// and update the analysis passes.
   void updateAnalysis();
 
   /// Create a broadcast instruction. This method generates a broadcast
   /// instruction (shuffle) for loop invariant values and for the induction
   /// value. If this is the induction variable then we extend it to N, N+1, ...
   /// this is needed because each iteration in the loop corresponds to a SIMD
   /// element.
   virtual Value *getBroadcastInstrs(Value *V);
 
   /// This function adds (StartIdx, StartIdx + Step, StartIdx + 2*Step, ...)
   /// to each vector element of Val. The sequence starts at StartIndex.
   /// \p Opcode is relevant for FP induction variable.
   virtual Value *getStepVector(Value *Val, int StartIdx, Value *Step,
                                Instruction::BinaryOps Opcode =
                                Instruction::BinaryOpsEnd);
 
   /// Compute scalar induction steps. \p ScalarIV is the scalar induction
   /// variable on which to base the steps, \p Step is the size of the step, and
   /// \p EntryVal is the value from the original loop that maps to the steps.
   /// Note that \p EntryVal doesn't have to be an induction variable (e.g., it
   /// can be a truncate instruction).
   void buildScalarSteps(Value *ScalarIV, Value *Step, Value *EntryVal,
                         const InductionDescriptor &ID);
 
   /// Create a vector induction phi node based on an existing scalar one. \p
   /// EntryVal is the value from the original loop that maps to the vector phi
   /// node, and \p Step is the loop-invariant step. If \p EntryVal is a
   /// truncate instruction, instead of widening the original IV, we widen a
   /// version of the IV truncated to \p EntryVal's type.
   void createVectorIntOrFpInductionPHI(const InductionDescriptor &II,
                                        Value *Step, Instruction *EntryVal);
 
   /// Returns true if an instruction \p I should be scalarized instead of
   /// vectorized for the chosen vectorization factor.
   bool shouldScalarizeInstruction(Instruction *I) const;
 
   /// Returns true if we should generate a scalar version of \p IV.
   bool needsScalarInduction(Instruction *IV) const;
 
   /// If there is a cast involved in the induction variable \p ID, which should 
   /// be ignored in the vectorized loop body, this function records the 
   /// VectorLoopValue of the respective Phi also as the VectorLoopValue of the 
   /// cast. We had already proved that the casted Phi is equal to the uncasted 
   /// Phi in the vectorized loop (under a runtime guard), and therefore 
   /// there is no need to vectorize the cast - the same value can be used in the 
   /// vector loop for both the Phi and the cast. 
   /// If \p VectorLoopValue is a scalarized value, \p Lane is also specified,
   /// Otherwise, \p VectorLoopValue is a widened/vectorized value.
   void recordVectorLoopValueForInductionCast (const InductionDescriptor &ID,
                                               Value *VectorLoopValue, 
                                               unsigned Part, 
                                               unsigned Lane = UINT_MAX);
 
   /// Generate a shuffle sequence that will reverse the vector Vec.
   virtual Value *reverseVector(Value *Vec);
 
   /// Returns (and creates if needed) the original loop trip count.
   Value *getOrCreateTripCount(Loop *NewLoop);
 
   /// Returns (and creates if needed) the trip count of the widened loop.
   Value *getOrCreateVectorTripCount(Loop *NewLoop);
 
   /// Returns a bitcasted value to the requested vector type.
   /// Also handles bitcasts of vector<float> <-> vector<pointer> types.
   Value *createBitOrPointerCast(Value *V, VectorType *DstVTy,
                                 const DataLayout &DL);
 
   /// Emit a bypass check to see if the vector trip count is zero, including if
   /// it overflows.
   void emitMinimumIterationCountCheck(Loop *L, BasicBlock *Bypass);
 
   /// Emit a bypass check to see if all of the SCEV assumptions we've
   /// had to make are correct.
   void emitSCEVChecks(Loop *L, BasicBlock *Bypass);
 
   /// Emit bypass checks to check any memory assumptions we may have made.
   void emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass);
 
   /// Add additional metadata to \p To that was not present on \p Orig.
   ///
   /// Currently this is used to add the noalias annotations based on the
   /// inserted memchecks.  Use this for instructions that are *cloned* into the
   /// vector loop.
   void addNewMetadata(Instruction *To, const Instruction *Orig);
 
   /// Add metadata from one instruction to another.
   ///
   /// This includes both the original MDs from \p From and additional ones (\see
   /// addNewMetadata).  Use this for *newly created* instructions in the vector
   /// loop.
   void addMetadata(Instruction *To, Instruction *From);
 
   /// \brief Similar to the previous function but it adds the metadata to a
   /// vector of instructions.
   void addMetadata(ArrayRef<Value *> To, Instruction *From);
 
   /// The original loop.
   Loop *OrigLoop;
 
   /// A wrapper around ScalarEvolution used to add runtime SCEV checks. Applies
   /// dynamic knowledge to simplify SCEV expressions and converts them to a
   /// more usable form.
   PredicatedScalarEvolution &PSE;
 
   /// Loop Info.
   LoopInfo *LI;
 
   /// Dominator Tree.
   DominatorTree *DT;
 
   /// Alias Analysis.
   AliasAnalysis *AA;
 
   /// Target Library Info.
   const TargetLibraryInfo *TLI;
 
   /// Target Transform Info.
   const TargetTransformInfo *TTI;
 
   /// Assumption Cache.
   AssumptionCache *AC;
 
   /// Interface to emit optimization remarks.
   OptimizationRemarkEmitter *ORE;
 
   /// \brief LoopVersioning.  It's only set up (non-null) if memchecks were
   /// used.
   ///
   /// This is currently only used to add no-alias metadata based on the
   /// memchecks.  The actually versioning is performed manually.
   std::unique_ptr<LoopVersioning> LVer;
 
   /// The vectorization SIMD factor to use. Each vector will have this many
   /// vector elements.
   unsigned VF;
 
   /// The vectorization unroll factor to use. Each scalar is vectorized to this
   /// many different vector instructions.
   unsigned UF;
 
   /// The builder that we use
   IRBuilder<> Builder;
 
   // --- Vectorization state ---
 
   /// The vector-loop preheader.
   BasicBlock *LoopVectorPreHeader;
 
   /// The scalar-loop preheader.
   BasicBlock *LoopScalarPreHeader;
 
   /// Middle Block between the vector and the scalar.
   BasicBlock *LoopMiddleBlock;
 
   /// The ExitBlock of the scalar loop.
   BasicBlock *LoopExitBlock;
 
   /// The vector loop body.
   BasicBlock *LoopVectorBody;
 
   /// The scalar loop body.
   BasicBlock *LoopScalarBody;
 
   /// A list of all bypass blocks. The first block is the entry of the loop.
   SmallVector<BasicBlock *, 4> LoopBypassBlocks;
 
   /// The new Induction variable which was added to the new block.
   PHINode *Induction = nullptr;
 
   /// The induction variable of the old basic block.
   PHINode *OldInduction = nullptr;
 
   /// Maps values from the original loop to their corresponding values in the
   /// vectorized loop. A key value can map to either vector values, scalar
   /// values or both kinds of values, depending on whether the key was
   /// vectorized and scalarized.
   VectorizerValueMap VectorLoopValueMap;
 
   /// Store instructions that were predicated.
   SmallVector<Instruction *, 4> PredicatedInstructions;
 
   /// Trip count of the original loop.
   Value *TripCount = nullptr;
 
   /// Trip count of the widened loop (TripCount - TripCount % (VF*UF))
   Value *VectorTripCount = nullptr;
 
   /// The legality analysis.
   LoopVectorizationLegality *Legal;
 
   /// The profitablity analysis.
   LoopVectorizationCostModel *Cost;
 
   // Record whether runtime checks are added.
   bool AddedSafetyChecks = false;
 
   // Holds the end values for each induction variable. We save the end values
   // so we can later fix-up the external users of the induction variables.
   DenseMap<PHINode *, Value *> IVEndValues;
 };
 
 class InnerLoopUnroller : public InnerLoopVectorizer {
 public:
   InnerLoopUnroller(Loop *OrigLoop, PredicatedScalarEvolution &PSE,
                     LoopInfo *LI, DominatorTree *DT,
                     const TargetLibraryInfo *TLI,
                     const TargetTransformInfo *TTI, AssumptionCache *AC,
                     OptimizationRemarkEmitter *ORE, unsigned UnrollFactor,
                     LoopVectorizationLegality *LVL,
                     LoopVectorizationCostModel *CM)
       : InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE, 1,
                             UnrollFactor, LVL, CM) {}
 
 private:
   Value *getBroadcastInstrs(Value *V) override;
   Value *getStepVector(Value *Val, int StartIdx, Value *Step,
                        Instruction::BinaryOps Opcode =
                        Instruction::BinaryOpsEnd) override;
   Value *reverseVector(Value *Vec) override;
 };
 
 } // end namespace llvm
 
 /// \brief Look for a meaningful debug location on the instruction or it's
 /// operands.
 static Instruction *getDebugLocFromInstOrOperands(Instruction *I) {
   if (!I)
     return I;
 
   DebugLoc Empty;
   if (I->getDebugLoc() != Empty)
     return I;
 
   for (User::op_iterator OI = I->op_begin(), OE = I->op_end(); OI != OE; ++OI) {
     if (Instruction *OpInst = dyn_cast<Instruction>(*OI))
       if (OpInst->getDebugLoc() != Empty)
         return OpInst;
   }
 
   return I;
 }
 
 void InnerLoopVectorizer::setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr) {
   if (const Instruction *Inst = dyn_cast_or_null<Instruction>(Ptr)) {
     const DILocation *DIL = Inst->getDebugLoc();
     if (DIL && Inst->getFunction()->isDebugInfoForProfiling() &&
         !isa<DbgInfoIntrinsic>(Inst))
       B.SetCurrentDebugLocation(DIL->cloneWithDuplicationFactor(UF * VF));
     else
       B.SetCurrentDebugLocation(DIL);
   } else
     B.SetCurrentDebugLocation(DebugLoc());
 }
 
 #ifndef NDEBUG
 /// \return string containing a file name and a line # for the given loop.
 static std::string getDebugLocString(const Loop *L) {
   std::string Result;
   if (L) {
     raw_string_ostream OS(Result);
     if (const DebugLoc LoopDbgLoc = L->getStartLoc())
       LoopDbgLoc.print(OS);
     else
       // Just print the module name.
       OS << L->getHeader()->getParent()->getParent()->getModuleIdentifier();
     OS.flush();
   }
   return Result;
 }
 #endif
 
 void InnerLoopVectorizer::addNewMetadata(Instruction *To,
                                          const Instruction *Orig) {
   // If the loop was versioned with memchecks, add the corresponding no-alias
   // metadata.
   if (LVer && (isa<LoadInst>(Orig) || isa<StoreInst>(Orig)))
     LVer->annotateInstWithNoAlias(To, Orig);
 }
 
 void InnerLoopVectorizer::addMetadata(Instruction *To,
                                       Instruction *From) {
   propagateMetadata(To, From);
   addNewMetadata(To, From);
 }
 
 void InnerLoopVectorizer::addMetadata(ArrayRef<Value *> To,
                                       Instruction *From) {
   for (Value *V : To) {
     if (Instruction *I = dyn_cast<Instruction>(V))
       addMetadata(I, From);
   }
 }
 
 namespace llvm {
 
 /// \brief The group of interleaved loads/stores sharing the same stride and
 /// close to each other.
 ///
 /// Each member in this group has an index starting from 0, and the largest
 /// index should be less than interleaved factor, which is equal to the absolute
 /// value of the access's stride.
 ///
 /// E.g. An interleaved load group of factor 4:
 ///        for (unsigned i = 0; i < 1024; i+=4) {
 ///          a = A[i];                           // Member of index 0
 ///          b = A[i+1];                         // Member of index 1
 ///          d = A[i+3];                         // Member of index 3
 ///          ...
 ///        }
 ///
 ///      An interleaved store group of factor 4:
 ///        for (unsigned i = 0; i < 1024; i+=4) {
 ///          ...
 ///          A[i]   = a;                         // Member of index 0
 ///          A[i+1] = b;                         // Member of index 1
 ///          A[i+2] = c;                         // Member of index 2
 ///          A[i+3] = d;                         // Member of index 3
 ///        }
 ///
 /// Note: the interleaved load group could have gaps (missing members), but
 /// the interleaved store group doesn't allow gaps.
 class InterleaveGroup {
 public:
   InterleaveGroup(Instruction *Instr, int Stride, unsigned Align)
       : Align(Align), InsertPos(Instr) {
     assert(Align && "The alignment should be non-zero");
 
     Factor = std::abs(Stride);
     assert(Factor > 1 && "Invalid interleave factor");
 
     Reverse = Stride < 0;
     Members[0] = Instr;
   }
 
   bool isReverse() const { return Reverse; }
   unsigned getFactor() const { return Factor; }
   unsigned getAlignment() const { return Align; }
   unsigned getNumMembers() const { return Members.size(); }
 
   /// \brief Try to insert a new member \p Instr with index \p Index and
   /// alignment \p NewAlign. The index is related to the leader and it could be
   /// negative if it is the new leader.
   ///
   /// \returns false if the instruction doesn't belong to the group.
   bool insertMember(Instruction *Instr, int Index, unsigned NewAlign) {
     assert(NewAlign && "The new member's alignment should be non-zero");
 
     int Key = Index + SmallestKey;
 
     // Skip if there is already a member with the same index.
     if (Members.count(Key))
       return false;
 
     if (Key > LargestKey) {
       // The largest index is always less than the interleave factor.
       if (Index >= static_cast<int>(Factor))
         return false;
 
       LargestKey = Key;
     } else if (Key < SmallestKey) {
       // The largest index is always less than the interleave factor.
       if (LargestKey - Key >= static_cast<int>(Factor))
         return false;
 
       SmallestKey = Key;
     }
 
     // It's always safe to select the minimum alignment.
     Align = std::min(Align, NewAlign);
     Members[Key] = Instr;
     return true;
   }
 
   /// \brief Get the member with the given index \p Index
   ///
   /// \returns nullptr if contains no such member.
   Instruction *getMember(unsigned Index) const {
     int Key = SmallestKey + Index;
     if (!Members.count(Key))
       return nullptr;
 
     return Members.find(Key)->second;
   }
 
   /// \brief Get the index for the given member. Unlike the key in the member
   /// map, the index starts from 0.
   unsigned getIndex(Instruction *Instr) const {
     for (auto I : Members)
       if (I.second == Instr)
         return I.first - SmallestKey;
 
     llvm_unreachable("InterleaveGroup contains no such member");
   }
 
   Instruction *getInsertPos() const { return InsertPos; }
   void setInsertPos(Instruction *Inst) { InsertPos = Inst; }
 
   /// Add metadata (e.g. alias info) from the instructions in this group to \p
   /// NewInst.
   ///
   /// FIXME: this function currently does not add noalias metadata a'la
   /// addNewMedata.  To do that we need to compute the intersection of the
   /// noalias info from all members.
   void addMetadata(Instruction *NewInst) const {
     SmallVector<Value *, 4> VL;
     std::transform(Members.begin(), Members.end(), std::back_inserter(VL),
                    [](std::pair<int, Instruction *> p) { return p.second; });
     propagateMetadata(NewInst, VL);
   }
 
 private:
   unsigned Factor; // Interleave Factor.
   bool Reverse;
   unsigned Align;
   DenseMap<int, Instruction *> Members;
   int SmallestKey = 0;
   int LargestKey = 0;
 
   // To avoid breaking dependences, vectorized instructions of an interleave
   // group should be inserted at either the first load or the last store in
   // program order.
   //
   // E.g. %even = load i32             // Insert Position
   //      %add = add i32 %even         // Use of %even
   //      %odd = load i32
   //
   //      store i32 %even
   //      %odd = add i32               // Def of %odd
   //      store i32 %odd               // Insert Position
   Instruction *InsertPos;
 };
 } // end namespace llvm
 
 namespace {
 
 /// \brief Drive the analysis of interleaved memory accesses in the loop.
 ///
 /// Use this class to analyze interleaved accesses only when we can vectorize
 /// a loop. Otherwise it's meaningless to do analysis as the vectorization
 /// on interleaved accesses is unsafe.
 ///
 /// The analysis collects interleave groups and records the relationships
 /// between the member and the group in a map.
 class InterleavedAccessInfo {
 public:
   InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L,
                         DominatorTree *DT, LoopInfo *LI)
       : PSE(PSE), TheLoop(L), DT(DT), LI(LI) {}
 
   ~InterleavedAccessInfo() {
     SmallSet<InterleaveGroup *, 4> DelSet;
     // Avoid releasing a pointer twice.
     for (auto &I : InterleaveGroupMap)
       DelSet.insert(I.second);
     for (auto *Ptr : DelSet)
       delete Ptr;
   }
 
   /// \brief Analyze the interleaved accesses and collect them in interleave
   /// groups. Substitute symbolic strides using \p Strides.
   void analyzeInterleaving(const ValueToValueMap &Strides);
 
   /// \brief Check if \p Instr belongs to any interleave group.
   bool isInterleaved(Instruction *Instr) const {
     return InterleaveGroupMap.count(Instr);
   }
 
   /// \brief Get the interleave group that \p Instr belongs to.
   ///
   /// \returns nullptr if doesn't have such group.
   InterleaveGroup *getInterleaveGroup(Instruction *Instr) const {
     if (InterleaveGroupMap.count(Instr))
       return InterleaveGroupMap.find(Instr)->second;
     return nullptr;
   }
 
   /// \brief Returns true if an interleaved group that may access memory
   /// out-of-bounds requires a scalar epilogue iteration for correctness.
   bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; }
 
   /// \brief Initialize the LoopAccessInfo used for dependence checking.
   void setLAI(const LoopAccessInfo *Info) { LAI = Info; }
 
 private:
   /// A wrapper around ScalarEvolution, used to add runtime SCEV checks.
   /// Simplifies SCEV expressions in the context of existing SCEV assumptions.
   /// The interleaved access analysis can also add new predicates (for example
   /// by versioning strides of pointers).
   PredicatedScalarEvolution &PSE;
 
   Loop *TheLoop;
   DominatorTree *DT;
   LoopInfo *LI;
   const LoopAccessInfo *LAI = nullptr;
 
   /// True if the loop may contain non-reversed interleaved groups with
   /// out-of-bounds accesses. We ensure we don't speculatively access memory
   /// out-of-bounds by executing at least one scalar epilogue iteration.
   bool RequiresScalarEpilogue = false;
 
   /// Holds the relationships between the members and the interleave group.
   DenseMap<Instruction *, InterleaveGroup *> InterleaveGroupMap;
 
   /// Holds dependences among the memory accesses in the loop. It maps a source
   /// access to a set of dependent sink accesses.
   DenseMap<Instruction *, SmallPtrSet<Instruction *, 2>> Dependences;
 
   /// \brief The descriptor for a strided memory access.
   struct StrideDescriptor {
     StrideDescriptor() = default;
     StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size,
                      unsigned Align)
         : Stride(Stride), Scev(Scev), Size(Size), Align(Align) {}
 
     // The access's stride. It is negative for a reverse access.
     int64_t Stride = 0;
 
     // The scalar expression of this access.
     const SCEV *Scev = nullptr;
 
     // The size of the memory object.
     uint64_t Size = 0;
 
     // The alignment of this access.
     unsigned Align = 0;
   };
 
   /// \brief A type for holding instructions and their stride descriptors.
   using StrideEntry = std::pair<Instruction *, StrideDescriptor>;
 
   /// \brief Create a new interleave group with the given instruction \p Instr,
   /// stride \p Stride and alignment \p Align.
   ///
   /// \returns the newly created interleave group.
   InterleaveGroup *createInterleaveGroup(Instruction *Instr, int Stride,
                                          unsigned Align) {
     assert(!InterleaveGroupMap.count(Instr) &&
            "Already in an interleaved access group");
     InterleaveGroupMap[Instr] = new InterleaveGroup(Instr, Stride, Align);
     return InterleaveGroupMap[Instr];
   }
 
   /// \brief Release the group and remove all the relationships.
   void releaseGroup(InterleaveGroup *Group) {
     for (unsigned i = 0; i < Group->getFactor(); i++)
       if (Instruction *Member = Group->getMember(i))
         InterleaveGroupMap.erase(Member);
 
     delete Group;
   }
 
   /// \brief Collect all the accesses with a constant stride in program order.
   void collectConstStrideAccesses(
       MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,
       const ValueToValueMap &Strides);
 
   /// \brief Returns true if \p Stride is allowed in an interleaved group.
   static bool isStrided(int Stride) {
     unsigned Factor = std::abs(Stride);
     return Factor >= 2 && Factor <= MaxInterleaveGroupFactor;
   }
 
   /// \brief Returns true if \p BB is a predicated block.
   bool isPredicated(BasicBlock *BB) const {
     return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
   }
 
   /// \brief Returns true if LoopAccessInfo can be used for dependence queries.
   bool areDependencesValid() const {
     return LAI && LAI->getDepChecker().getDependences();
   }
 
   /// \brief Returns true if memory accesses \p A and \p B can be reordered, if
   /// necessary, when constructing interleaved groups.
   ///
   /// \p A must precede \p B in program order. We return false if reordering is
   /// not necessary or is prevented because \p A and \p B may be dependent.
   bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A,
                                                  StrideEntry *B) const {
     // Code motion for interleaved accesses can potentially hoist strided loads
     // and sink strided stores. The code below checks the legality of the
     // following two conditions:
     //
     // 1. Potentially moving a strided load (B) before any store (A) that
     //    precedes B, or
     //
     // 2. Potentially moving a strided store (A) after any load or store (B)
     //    that A precedes.
     //
     // It's legal to reorder A and B if we know there isn't a dependence from A
     // to B. Note that this determination is conservative since some
     // dependences could potentially be reordered safely.
 
     // A is potentially the source of a dependence.
     auto *Src = A->first;
     auto SrcDes = A->second;
 
     // B is potentially the sink of a dependence.
     auto *Sink = B->first;
     auto SinkDes = B->second;
 
     // Code motion for interleaved accesses can't violate WAR dependences.
     // Thus, reordering is legal if the source isn't a write.
     if (!Src->mayWriteToMemory())
       return true;
 
     // At least one of the accesses must be strided.
     if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride))
       return true;
 
     // If dependence information is not available from LoopAccessInfo,
     // conservatively assume the instructions can't be reordered.
     if (!areDependencesValid())
       return false;
 
     // If we know there is a dependence from source to sink, assume the
     // instructions can't be reordered. Otherwise, reordering is legal.
     return !Dependences.count(Src) || !Dependences.lookup(Src).count(Sink);
   }
 
   /// \brief Collect the dependences from LoopAccessInfo.
   ///
   /// We process the dependences once during the interleaved access analysis to
   /// enable constant-time dependence queries.
   void collectDependences() {
     if (!areDependencesValid())
       return;
     auto *Deps = LAI->getDepChecker().getDependences();
     for (auto Dep : *Deps)
       Dependences[Dep.getSource(*LAI)].insert(Dep.getDestination(*LAI));
   }
 };
 
 /// Utility class for getting and setting loop vectorizer hints in the form
 /// of loop metadata.
 /// This class keeps a number of loop annotations locally (as member variables)
 /// and can, upon request, write them back as metadata on the loop. It will
 /// initially scan the loop for existing metadata, and will update the local
 /// values based on information in the loop.
 /// We cannot write all values to metadata, as the mere presence of some info,
 /// for example 'force', means a decision has been made. So, we need to be
 /// careful NOT to add them if the user hasn't specifically asked so.
 class LoopVectorizeHints {
   enum HintKind { HK_WIDTH, HK_UNROLL, HK_FORCE, HK_ISVECTORIZED };
 
   /// Hint - associates name and validation with the hint value.
   struct Hint {
     const char *Name;
     unsigned Value; // This may have to change for non-numeric values.
     HintKind Kind;
 
     Hint(const char *Name, unsigned Value, HintKind Kind)
         : Name(Name), Value(Value), Kind(Kind) {}
 
     bool validate(unsigned Val) {
       switch (Kind) {
       case HK_WIDTH:
         return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
       case HK_UNROLL:
         return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
       case HK_FORCE:
         return (Val <= 1);
       case HK_ISVECTORIZED:
         return (Val==0 || Val==1);
       }
       return false;
     }
   };
 
   /// Vectorization width.
   Hint Width;
 
   /// Vectorization interleave factor.
   Hint Interleave;
 
   /// Vectorization forced
   Hint Force;
 
   /// Already Vectorized
   Hint IsVectorized;
 
   /// Return the loop metadata prefix.
   static StringRef Prefix() { return "llvm.loop."; }
 
   /// True if there is any unsafe math in the loop.
   bool PotentiallyUnsafe = false;
 
 public:
   enum ForceKind {
     FK_Undefined = -1, ///< Not selected.
     FK_Disabled = 0,   ///< Forcing disabled.
     FK_Enabled = 1,    ///< Forcing enabled.
   };
 
   LoopVectorizeHints(const Loop *L, bool DisableInterleaving,
                      OptimizationRemarkEmitter &ORE)
       : Width("vectorize.width", VectorizerParams::VectorizationFactor,
               HK_WIDTH),
         Interleave("interleave.count", DisableInterleaving, HK_UNROLL),
         Force("vectorize.enable", FK_Undefined, HK_FORCE),
         IsVectorized("isvectorized", 0, HK_ISVECTORIZED), TheLoop(L), ORE(ORE) {
     // Populate values with existing loop metadata.
     getHintsFromMetadata();
 
     // force-vector-interleave overrides DisableInterleaving.
     if (VectorizerParams::isInterleaveForced())
       Interleave.Value = VectorizerParams::VectorizationInterleave;
 
     if (IsVectorized.Value != 1)
       // If the vectorization width and interleaving count are both 1 then
       // consider the loop to have been already vectorized because there's
       // nothing more that we can do.
       IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1;
     DEBUG(if (DisableInterleaving && Interleave.Value == 1) dbgs()
           << "LV: Interleaving disabled by the pass manager\n");
   }
 
   /// Mark the loop L as already vectorized by setting the width to 1.
   void setAlreadyVectorized() {
     IsVectorized.Value = 1;
     Hint Hints[] = {IsVectorized};
     writeHintsToMetadata(Hints);
   }
 
   bool allowVectorization(Function *F, Loop *L, bool AlwaysVectorize) const {
     if (getForce() == LoopVectorizeHints::FK_Disabled) {
       DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
       emitRemarkWithHints();
       return false;
     }
 
     if (!AlwaysVectorize && getForce() != LoopVectorizeHints::FK_Enabled) {
       DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
       emitRemarkWithHints();
       return false;
     }
 
     if (getIsVectorized() == 1) {
       DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
       // FIXME: Add interleave.disable metadata. This will allow
       // vectorize.disable to be used without disabling the pass and errors
       // to differentiate between disabled vectorization and a width of 1.
       ORE.emit([&]() {
         return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(),
                                           "AllDisabled", L->getStartLoc(),
                                           L->getHeader())
                << "loop not vectorized: vectorization and interleaving are "
                   "explicitly disabled, or the loop has already been "
                   "vectorized";
       });
       return false;
     }
 
     return true;
   }
 
   /// Dumps all the hint information.
   void emitRemarkWithHints() const {
     using namespace ore;
 
     ORE.emit([&]() {
       if (Force.Value == LoopVectorizeHints::FK_Disabled)
         return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
                                         TheLoop->getStartLoc(),
                                         TheLoop->getHeader())
                << "loop not vectorized: vectorization is explicitly disabled";
       else {
         OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
                                    TheLoop->getStartLoc(),
                                    TheLoop->getHeader());
         R << "loop not vectorized";
         if (Force.Value == LoopVectorizeHints::FK_Enabled) {
           R << " (Force=" << NV("Force", true);
           if (Width.Value != 0)
             R << ", Vector Width=" << NV("VectorWidth", Width.Value);
           if (Interleave.Value != 0)
             R << ", Interleave Count="
               << NV("InterleaveCount", Interleave.Value);
           R << ")";
         }
         return R;
       }
     });
   }
 
   unsigned getWidth() const { return Width.Value; }
   unsigned getInterleave() const { return Interleave.Value; }
   unsigned getIsVectorized() const { return IsVectorized.Value; }
   enum ForceKind getForce() const { return (ForceKind)Force.Value; }
 
   /// \brief If hints are provided that force vectorization, use the AlwaysPrint
   /// pass name to force the frontend to print the diagnostic.
   const char *vectorizeAnalysisPassName() const {
     if (getWidth() == 1)
       return LV_NAME;
     if (getForce() == LoopVectorizeHints::FK_Disabled)
       return LV_NAME;
     if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0)
       return LV_NAME;
     return OptimizationRemarkAnalysis::AlwaysPrint;
   }
 
   bool allowReordering() const {
     // When enabling loop hints are provided we allow the vectorizer to change
     // the order of operations that is given by the scalar loop. This is not
     // enabled by default because can be unsafe or inefficient. For example,
     // reordering floating-point operations will change the way round-off
     // error accumulates in the loop.
     return getForce() == LoopVectorizeHints::FK_Enabled || getWidth() > 1;
   }
 
   bool isPotentiallyUnsafe() const {
     // Avoid FP vectorization if the target is unsure about proper support.
     // This may be related to the SIMD unit in the target not handling
     // IEEE 754 FP ops properly, or bad single-to-double promotions.
     // Otherwise, a sequence of vectorized loops, even without reduction,
     // could lead to different end results on the destination vectors.
     return getForce() != LoopVectorizeHints::FK_Enabled && PotentiallyUnsafe;
   }
 
   void setPotentiallyUnsafe() { PotentiallyUnsafe = true; }
 
 private:
   /// Find hints specified in the loop metadata and update local values.
   void getHintsFromMetadata() {
     MDNode *LoopID = TheLoop->getLoopID();
     if (!LoopID)
       return;
 
     // First operand should refer to the loop id itself.
     assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
     assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
 
     for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
       const MDString *S = nullptr;
       SmallVector<Metadata *, 4> Args;
 
       // The expected hint is either a MDString or a MDNode with the first
       // operand a MDString.
       if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
         if (!MD || MD->getNumOperands() == 0)
           continue;
         S = dyn_cast<MDString>(MD->getOperand(0));
         for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
           Args.push_back(MD->getOperand(i));
       } else {
         S = dyn_cast<MDString>(LoopID->getOperand(i));
         assert(Args.size() == 0 && "too many arguments for MDString");
       }
 
       if (!S)
         continue;
 
       // Check if the hint starts with the loop metadata prefix.
       StringRef Name = S->getString();
       if (Args.size() == 1)
         setHint(Name, Args[0]);
     }
   }
 
   /// Checks string hint with one operand and set value if valid.
   void setHint(StringRef Name, Metadata *Arg) {
     if (!Name.startswith(Prefix()))
       return;
     Name = Name.substr(Prefix().size(), StringRef::npos);
 
     const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
     if (!C)
       return;
     unsigned Val = C->getZExtValue();
 
     Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized};
     for (auto H : Hints) {
       if (Name == H->Name) {
         if (H->validate(Val))
           H->Value = Val;
         else
           DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
         break;
       }
     }
   }
 
   /// Create a new hint from name / value pair.
   MDNode *createHintMetadata(StringRef Name, unsigned V) const {
     LLVMContext &Context = TheLoop->getHeader()->getContext();
     Metadata *MDs[] = {MDString::get(Context, Name),
                        ConstantAsMetadata::get(
                            ConstantInt::get(Type::getInt32Ty(Context), V))};
     return MDNode::get(Context, MDs);
   }
 
   /// Matches metadata with hint name.
   bool matchesHintMetadataName(MDNode *Node, ArrayRef<Hint> HintTypes) {
     MDString *Name = dyn_cast<MDString>(Node->getOperand(0));
     if (!Name)
       return false;
 
     for (auto H : HintTypes)
       if (Name->getString().endswith(H.Name))
         return true;
     return false;
   }
 
   /// Sets current hints into loop metadata, keeping other values intact.
   void writeHintsToMetadata(ArrayRef<Hint> HintTypes) {
     if (HintTypes.empty())
       return;
 
     // Reserve the first element to LoopID (see below).
     SmallVector<Metadata *, 4> MDs(1);
     // If the loop already has metadata, then ignore the existing operands.
     MDNode *LoopID = TheLoop->getLoopID();
     if (LoopID) {
       for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
         MDNode *Node = cast<MDNode>(LoopID->getOperand(i));
         // If node in update list, ignore old value.
         if (!matchesHintMetadataName(Node, HintTypes))
           MDs.push_back(Node);
       }
     }
 
     // Now, add the missing hints.
     for (auto H : HintTypes)
       MDs.push_back(createHintMetadata(Twine(Prefix(), H.Name).str(), H.Value));
 
     // Replace current metadata node with new one.
     LLVMContext &Context = TheLoop->getHeader()->getContext();
     MDNode *NewLoopID = MDNode::get(Context, MDs);
     // Set operand 0 to refer to the loop id itself.
     NewLoopID->replaceOperandWith(0, NewLoopID);
 
     TheLoop->setLoopID(NewLoopID);
   }
 
   /// The loop these hints belong to.
   const Loop *TheLoop;
 
   /// Interface to emit optimization remarks.
   OptimizationRemarkEmitter &ORE;
 };
 
 } // end anonymous namespace
 
 static void emitMissedWarning(Function *F, Loop *L,
                               const LoopVectorizeHints &LH,
                               OptimizationRemarkEmitter *ORE) {
   LH.emitRemarkWithHints();
 
   if (LH.getForce() == LoopVectorizeHints::FK_Enabled) {
     if (LH.getWidth() != 1)
       ORE->emit(DiagnosticInfoOptimizationFailure(
                     DEBUG_TYPE, "FailedRequestedVectorization",
                     L->getStartLoc(), L->getHeader())
                 << "loop not vectorized: "
                 << "failed explicitly specified loop vectorization");
     else if (LH.getInterleave() != 1)
       ORE->emit(DiagnosticInfoOptimizationFailure(
                     DEBUG_TYPE, "FailedRequestedInterleaving", L->getStartLoc(),
                     L->getHeader())
                 << "loop not interleaved: "
                 << "failed explicitly specified loop interleaving");
   }
 }
 
 namespace {
 
 /// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
 /// to what vectorization factor.
 /// This class does not look at the profitability of vectorization, only the
 /// legality. This class has two main kinds of checks:
 /// * Memory checks - The code in canVectorizeMemory checks if vectorization
 ///   will change the order of memory accesses in a way that will change the
 ///   correctness of the program.
 /// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory
 /// checks for a number of different conditions, such as the availability of a
 /// single induction variable, that all types are supported and vectorize-able,
 /// etc. This code reflects the capabilities of InnerLoopVectorizer.
 /// This class is also used by InnerLoopVectorizer for identifying
 /// induction variable and the different reduction variables.
 class LoopVectorizationLegality {
 public:
   LoopVectorizationLegality(
       Loop *L, PredicatedScalarEvolution &PSE, DominatorTree *DT,
       TargetLibraryInfo *TLI, AliasAnalysis *AA, Function *F,
       const TargetTransformInfo *TTI,
       std::function<const LoopAccessInfo &(Loop &)> *GetLAA, LoopInfo *LI,
       OptimizationRemarkEmitter *ORE, LoopVectorizationRequirements *R,
-      LoopVectorizeHints *H)
+      LoopVectorizeHints *H, DemandedBits *DB, AssumptionCache *AC)
       : TheLoop(L), PSE(PSE), TLI(TLI), TTI(TTI), DT(DT), GetLAA(GetLAA),
-        ORE(ORE), InterleaveInfo(PSE, L, DT, LI), Requirements(R), Hints(H) {}
+        ORE(ORE), InterleaveInfo(PSE, L, DT, LI), Requirements(R), Hints(H),
+        DB(DB), AC(AC) {}
 
   /// ReductionList contains the reduction descriptors for all
   /// of the reductions that were found in the loop.
   using ReductionList = DenseMap<PHINode *, RecurrenceDescriptor>;
 
   /// InductionList saves induction variables and maps them to the
   /// induction descriptor.
   using InductionList = MapVector<PHINode *, InductionDescriptor>;
 
   /// RecurrenceSet contains the phi nodes that are recurrences other than
   /// inductions and reductions.
   using RecurrenceSet = SmallPtrSet<const PHINode *, 8>;
 
   /// Returns true if it is legal to vectorize this loop.
   /// This does not mean that it is profitable to vectorize this
   /// loop, only that it is legal to do so.
   bool canVectorize();
 
   /// Returns the primary induction variable.
   PHINode *getPrimaryInduction() { return PrimaryInduction; }
 
   /// Returns the reduction variables found in the loop.
   ReductionList *getReductionVars() { return &Reductions; }
 
   /// Returns the induction variables found in the loop.
   InductionList *getInductionVars() { return &Inductions; }
 
   /// Return the first-order recurrences found in the loop.
   RecurrenceSet *getFirstOrderRecurrences() { return &FirstOrderRecurrences; }
 
   /// Return the set of instructions to sink to handle first-order recurrences.
   DenseMap<Instruction *, Instruction *> &getSinkAfter() { return SinkAfter; }
 
   /// Returns the widest induction type.
   Type *getWidestInductionType() { return WidestIndTy; }
 
   /// Returns True if V is a Phi node of an induction variable in this loop.
   bool isInductionPhi(const Value *V);
 
   /// Returns True if V is a cast that is part of an induction def-use chain,
   /// and had been proven to be redundant under a runtime guard (in other
   /// words, the cast has the same SCEV expression as the induction phi).
   bool isCastedInductionVariable(const Value *V);
 
   /// Returns True if V can be considered as an induction variable in this 
   /// loop. V can be the induction phi, or some redundant cast in the def-use
   /// chain of the inducion phi.
   bool isInductionVariable(const Value *V);
 
   /// Returns True if PN is a reduction variable in this loop.
   bool isReductionVariable(PHINode *PN) { return Reductions.count(PN); }
 
   /// Returns True if Phi is a first-order recurrence in this loop.
   bool isFirstOrderRecurrence(const PHINode *Phi);
 
   /// Return true if the block BB needs to be predicated in order for the loop
   /// to be vectorized.
   bool blockNeedsPredication(BasicBlock *BB);
 
   /// Check if this pointer is consecutive when vectorizing. This happens
   /// when the last index of the GEP is the induction variable, or that the
   /// pointer itself is an induction variable.
   /// This check allows us to vectorize A[idx] into a wide load/store.
   /// Returns:
   /// 0 - Stride is unknown or non-consecutive.
   /// 1 - Address is consecutive.
   /// -1 - Address is consecutive, and decreasing.
   /// NOTE: This method must only be used before modifying the original scalar
   /// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965).
   int isConsecutivePtr(Value *Ptr);
 
   /// Returns true if the value V is uniform within the loop.
   bool isUniform(Value *V);
 
   /// Returns the information that we collected about runtime memory check.
   const RuntimePointerChecking *getRuntimePointerChecking() const {
     return LAI->getRuntimePointerChecking();
   }
 
   const LoopAccessInfo *getLAI() const { return LAI; }
 
   /// \brief Check if \p Instr belongs to any interleaved access group.
   bool isAccessInterleaved(Instruction *Instr) {
     return InterleaveInfo.isInterleaved(Instr);
   }
 
   /// \brief Get the interleaved access group that \p Instr belongs to.
   const InterleaveGroup *getInterleavedAccessGroup(Instruction *Instr) {
     return InterleaveInfo.getInterleaveGroup(Instr);
   }
 
   /// \brief Returns true if an interleaved group requires a scalar iteration
   /// to handle accesses with gaps.
   bool requiresScalarEpilogue() const {
     return InterleaveInfo.requiresScalarEpilogue();
   }
 
   unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); }
 
   uint64_t getMaxSafeRegisterWidth() const {
 	  return LAI->getDepChecker().getMaxSafeRegisterWidth();
   }
 
   bool hasStride(Value *V) { return LAI->hasStride(V); }
 
   /// Returns true if the target machine supports masked store operation
   /// for the given \p DataType and kind of access to \p Ptr.
   bool isLegalMaskedStore(Type *DataType, Value *Ptr) {
     return isConsecutivePtr(Ptr) && TTI->isLegalMaskedStore(DataType);
   }
 
   /// Returns true if the target machine supports masked load operation
   /// for the given \p DataType and kind of access to \p Ptr.
   bool isLegalMaskedLoad(Type *DataType, Value *Ptr) {
     return isConsecutivePtr(Ptr) && TTI->isLegalMaskedLoad(DataType);
   }
 
   /// Returns true if the target machine supports masked scatter operation
   /// for the given \p DataType.
   bool isLegalMaskedScatter(Type *DataType) {
     return TTI->isLegalMaskedScatter(DataType);
   }
 
   /// Returns true if the target machine supports masked gather operation
   /// for the given \p DataType.
   bool isLegalMaskedGather(Type *DataType) {
     return TTI->isLegalMaskedGather(DataType);
   }
 
   /// Returns true if the target machine can represent \p V as a masked gather
   /// or scatter operation.
   bool isLegalGatherOrScatter(Value *V) {
     auto *LI = dyn_cast<LoadInst>(V);
     auto *SI = dyn_cast<StoreInst>(V);
     if (!LI && !SI)
       return false;
     auto *Ptr = getPointerOperand(V);
     auto *Ty = cast<PointerType>(Ptr->getType())->getElementType();
     return (LI && isLegalMaskedGather(Ty)) || (SI && isLegalMaskedScatter(Ty));
   }
 
   /// Returns true if vector representation of the instruction \p I
   /// requires mask.
   bool isMaskRequired(const Instruction *I) { return (MaskedOp.count(I) != 0); }
 
   unsigned getNumStores() const { return LAI->getNumStores(); }
   unsigned getNumLoads() const { return LAI->getNumLoads(); }
   unsigned getNumPredStores() const { return NumPredStores; }
 
   /// Returns true if \p I is an instruction that will be scalarized with
   /// predication. Such instructions include conditional stores and
   /// instructions that may divide by zero.
   bool isScalarWithPredication(Instruction *I);
 
   /// Returns true if \p I is a memory instruction with consecutive memory
   /// access that can be widened.
   bool memoryInstructionCanBeWidened(Instruction *I, unsigned VF = 1);
 
   // Returns true if the NoNaN attribute is set on the function.
   bool hasFunNoNaNAttr() const { return HasFunNoNaNAttr; }
 
 private:
   /// Check if a single basic block loop is vectorizable.
   /// At this point we know that this is a loop with a constant trip count
   /// and we only need to check individual instructions.
   bool canVectorizeInstrs();
 
   /// When we vectorize loops we may change the order in which
   /// we read and write from memory. This method checks if it is
   /// legal to vectorize the code, considering only memory constrains.
   /// Returns true if the loop is vectorizable
   bool canVectorizeMemory();
 
   /// Return true if we can vectorize this loop using the IF-conversion
   /// transformation.
   bool canVectorizeWithIfConvert();
 
   /// Return true if all of the instructions in the block can be speculatively
   /// executed. \p SafePtrs is a list of addresses that are known to be legal
   /// and we know that we can read from them without segfault.
   bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs);
 
   /// Updates the vectorization state by adding \p Phi to the inductions list.
   /// This can set \p Phi as the main induction of the loop if \p Phi is a
   /// better choice for the main induction than the existing one.
   void addInductionPhi(PHINode *Phi, const InductionDescriptor &ID,
                        SmallPtrSetImpl<Value *> &AllowedExit);
 
   /// Create an analysis remark that explains why vectorization failed
   ///
   /// \p RemarkName is the identifier for the remark.  If \p I is passed it is
   /// an instruction that prevents vectorization.  Otherwise the loop is used
   /// for the location of the remark.  \return the remark object that can be
   /// streamed to.
   OptimizationRemarkAnalysis
   createMissedAnalysis(StringRef RemarkName, Instruction *I = nullptr) const {
     return ::createMissedAnalysis(Hints->vectorizeAnalysisPassName(),
                                   RemarkName, TheLoop, I);
   }
 
   /// \brief If an access has a symbolic strides, this maps the pointer value to
   /// the stride symbol.
   const ValueToValueMap *getSymbolicStrides() {
     // FIXME: Currently, the set of symbolic strides is sometimes queried before
     // it's collected.  This happens from canVectorizeWithIfConvert, when the
     // pointer is checked to reference consecutive elements suitable for a
     // masked access.
     return LAI ? &LAI->getSymbolicStrides() : nullptr;
   }
 
   unsigned NumPredStores = 0;
 
   /// The loop that we evaluate.
   Loop *TheLoop;
 
   /// A wrapper around ScalarEvolution used to add runtime SCEV checks.
   /// Applies dynamic knowledge to simplify SCEV expressions in the context
   /// of existing SCEV assumptions. The analysis will also add a minimal set
   /// of new predicates if this is required to enable vectorization and
   /// unrolling.
   PredicatedScalarEvolution &PSE;
 
   /// Target Library Info.
   TargetLibraryInfo *TLI;
 
   /// Target Transform Info
   const TargetTransformInfo *TTI;
 
   /// Dominator Tree.
   DominatorTree *DT;
 
   // LoopAccess analysis.
   std::function<const LoopAccessInfo &(Loop &)> *GetLAA;
 
   // And the loop-accesses info corresponding to this loop.  This pointer is
   // null until canVectorizeMemory sets it up.
   const LoopAccessInfo *LAI = nullptr;
 
   /// Interface to emit optimization remarks.
   OptimizationRemarkEmitter *ORE;
 
   /// The interleave access information contains groups of interleaved accesses
   /// with the same stride and close to each other.
   InterleavedAccessInfo InterleaveInfo;
 
   //  ---  vectorization state --- //
 
   /// Holds the primary induction variable. This is the counter of the
   /// loop.
   PHINode *PrimaryInduction = nullptr;
 
   /// Holds the reduction variables.
   ReductionList Reductions;
 
   /// Holds all of the induction variables that we found in the loop.
   /// Notice that inductions don't need to start at zero and that induction
   /// variables can be pointers.
   InductionList Inductions;
 
   /// Holds all the casts that participate in the update chain of the induction 
   /// variables, and that have been proven to be redundant (possibly under a 
   /// runtime guard). These casts can be ignored when creating the vectorized 
   /// loop body.
   SmallPtrSet<Instruction *, 4> InductionCastsToIgnore;
 
   /// Holds the phi nodes that are first-order recurrences.
   RecurrenceSet FirstOrderRecurrences;
 
   /// Holds instructions that need to sink past other instructions to handle
   /// first-order recurrences.
   DenseMap<Instruction *, Instruction *> SinkAfter;
 
   /// Holds the widest induction type encountered.
   Type *WidestIndTy = nullptr;
 
   /// Allowed outside users. This holds the induction and reduction
   /// vars which can be accessed from outside the loop.
   SmallPtrSet<Value *, 4> AllowedExit;
 
   /// Can we assume the absence of NaNs.
   bool HasFunNoNaNAttr = false;
 
   /// Vectorization requirements that will go through late-evaluation.
   LoopVectorizationRequirements *Requirements;
 
   /// Used to emit an analysis of any legality issues.
   LoopVectorizeHints *Hints;
 
+  /// The demanded bits analsyis is used to compute the minimum type size in
+  /// which a reduction can be computed.
+  DemandedBits *DB;
+
+  /// The assumption cache analysis is used to compute the minimum type size in
+  /// which a reduction can be computed.
+  AssumptionCache *AC;
+
   /// While vectorizing these instructions we have to generate a
   /// call to the appropriate masked intrinsic
   SmallPtrSet<const Instruction *, 8> MaskedOp;
 };
 
 /// LoopVectorizationCostModel - estimates the expected speedups due to
 /// vectorization.
 /// In many cases vectorization is not profitable. This can happen because of
 /// a number of reasons. In this class we mainly attempt to predict the
 /// expected speedup/slowdowns due to the supported instruction set. We use the
 /// TargetTransformInfo to query the different backends for the cost of
 /// different operations.
 class LoopVectorizationCostModel {
 public:
   LoopVectorizationCostModel(Loop *L, PredicatedScalarEvolution &PSE,
                              LoopInfo *LI, LoopVectorizationLegality *Legal,
                              const TargetTransformInfo &TTI,
                              const TargetLibraryInfo *TLI, DemandedBits *DB,
                              AssumptionCache *AC,
                              OptimizationRemarkEmitter *ORE, const Function *F,
                              const LoopVectorizeHints *Hints)
       : TheLoop(L), PSE(PSE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), DB(DB),
         AC(AC), ORE(ORE), TheFunction(F), Hints(Hints) {}
 
   /// \return An upper bound for the vectorization factor, or None if
   /// vectorization should be avoided up front.
   Optional<unsigned> computeMaxVF(bool OptForSize);
 
   /// Information about vectorization costs
   struct VectorizationFactor {
     // Vector width with best cost
     unsigned Width;
 
     // Cost of the loop with that width
     unsigned Cost;
   };
 
   /// \return The most profitable vectorization factor and the cost of that VF.
   /// This method checks every power of two up to MaxVF. If UserVF is not ZERO
   /// then this vectorization factor will be selected if vectorization is
   /// possible.
   VectorizationFactor selectVectorizationFactor(unsigned MaxVF);
 
   /// Setup cost-based decisions for user vectorization factor.
   void selectUserVectorizationFactor(unsigned UserVF) {
     collectUniformsAndScalars(UserVF);
     collectInstsToScalarize(UserVF);
   }
 
   /// \return The size (in bits) of the smallest and widest types in the code
   /// that needs to be vectorized. We ignore values that remain scalar such as
   /// 64 bit loop indices.
   std::pair<unsigned, unsigned> getSmallestAndWidestTypes();
 
   /// \return The desired interleave count.
   /// If interleave count has been specified by metadata it will be returned.
   /// Otherwise, the interleave count is computed and returned. VF and LoopCost
   /// are the selected vectorization factor and the cost of the selected VF.
   unsigned selectInterleaveCount(bool OptForSize, unsigned VF,
                                  unsigned LoopCost);
 
   /// Memory access instruction may be vectorized in more than one way.
   /// Form of instruction after vectorization depends on cost.
   /// This function takes cost-based decisions for Load/Store instructions
   /// and collects them in a map. This decisions map is used for building
   /// the lists of loop-uniform and loop-scalar instructions.
   /// The calculated cost is saved with widening decision in order to
   /// avoid redundant calculations.
   void setCostBasedWideningDecision(unsigned VF);
 
   /// \brief A struct that represents some properties of the register usage
   /// of a loop.
   struct RegisterUsage {
     /// Holds the number of loop invariant values that are used in the loop.
     unsigned LoopInvariantRegs;
 
     /// Holds the maximum number of concurrent live intervals in the loop.
     unsigned MaxLocalUsers;
 
     /// Holds the number of instructions in the loop.
     unsigned NumInstructions;
   };
 
   /// \return Returns information about the register usages of the loop for the
   /// given vectorization factors.
   SmallVector<RegisterUsage, 8> calculateRegisterUsage(ArrayRef<unsigned> VFs);
 
   /// Collect values we want to ignore in the cost model.
   void collectValuesToIgnore();
 
   /// \returns The smallest bitwidth each instruction can be represented with.
   /// The vector equivalents of these instructions should be truncated to this
   /// type.
   const MapVector<Instruction *, uint64_t> &getMinimalBitwidths() const {
     return MinBWs;
   }
 
   /// \returns True if it is more profitable to scalarize instruction \p I for
   /// vectorization factor \p VF.
   bool isProfitableToScalarize(Instruction *I, unsigned VF) const {
     assert(VF > 1 && "Profitable to scalarize relevant only for VF > 1.");
     auto Scalars = InstsToScalarize.find(VF);
     assert(Scalars != InstsToScalarize.end() &&
            "VF not yet analyzed for scalarization profitability");
     return Scalars->second.count(I);
   }
 
   /// Returns true if \p I is known to be uniform after vectorization.
   bool isUniformAfterVectorization(Instruction *I, unsigned VF) const {
     if (VF == 1)
       return true;
     assert(Uniforms.count(VF) && "VF not yet analyzed for uniformity");
     auto UniformsPerVF = Uniforms.find(VF);
     return UniformsPerVF->second.count(I);
   }
 
   /// Returns true if \p I is known to be scalar after vectorization.
   bool isScalarAfterVectorization(Instruction *I, unsigned VF) const {
     if (VF == 1)
       return true;
     assert(Scalars.count(VF) && "Scalar values are not calculated for VF");
     auto ScalarsPerVF = Scalars.find(VF);
     return ScalarsPerVF->second.count(I);
   }
 
   /// \returns True if instruction \p I can be truncated to a smaller bitwidth
   /// for vectorization factor \p VF.
   bool canTruncateToMinimalBitwidth(Instruction *I, unsigned VF) const {
     return VF > 1 && MinBWs.count(I) && !isProfitableToScalarize(I, VF) &&
            !isScalarAfterVectorization(I, VF);
   }
 
   /// Decision that was taken during cost calculation for memory instruction.
   enum InstWidening {
     CM_Unknown,
     CM_Widen,         // For consecutive accesses with stride +1.
     CM_Widen_Reverse, // For consecutive accesses with stride -1.
     CM_Interleave,
     CM_GatherScatter,
     CM_Scalarize
   };
 
   /// Save vectorization decision \p W and \p Cost taken by the cost model for
   /// instruction \p I and vector width \p VF.
   void setWideningDecision(Instruction *I, unsigned VF, InstWidening W,
                            unsigned Cost) {
     assert(VF >= 2 && "Expected VF >=2");
     WideningDecisions[std::make_pair(I, VF)] = std::make_pair(W, Cost);
   }
 
   /// Save vectorization decision \p W and \p Cost taken by the cost model for
   /// interleaving group \p Grp and vector width \p VF.
   void setWideningDecision(const InterleaveGroup *Grp, unsigned VF,
                            InstWidening W, unsigned Cost) {
     assert(VF >= 2 && "Expected VF >=2");
     /// Broadcast this decicion to all instructions inside the group.
     /// But the cost will be assigned to one instruction only.
     for (unsigned i = 0; i < Grp->getFactor(); ++i) {
       if (auto *I = Grp->getMember(i)) {
         if (Grp->getInsertPos() == I)
           WideningDecisions[std::make_pair(I, VF)] = std::make_pair(W, Cost);
         else
           WideningDecisions[std::make_pair(I, VF)] = std::make_pair(W, 0);
       }
     }
   }
 
   /// Return the cost model decision for the given instruction \p I and vector
   /// width \p VF. Return CM_Unknown if this instruction did not pass
   /// through the cost modeling.
   InstWidening getWideningDecision(Instruction *I, unsigned VF) {
     assert(VF >= 2 && "Expected VF >=2");
     std::pair<Instruction *, unsigned> InstOnVF = std::make_pair(I, VF);
     auto Itr = WideningDecisions.find(InstOnVF);
     if (Itr == WideningDecisions.end())
       return CM_Unknown;
     return Itr->second.first;
   }
 
   /// Return the vectorization cost for the given instruction \p I and vector
   /// width \p VF.
   unsigned getWideningCost(Instruction *I, unsigned VF) {
     assert(VF >= 2 && "Expected VF >=2");
     std::pair<Instruction *, unsigned> InstOnVF = std::make_pair(I, VF);
     assert(WideningDecisions.count(InstOnVF) && "The cost is not calculated");
     return WideningDecisions[InstOnVF].second;
   }
 
   /// Return True if instruction \p I is an optimizable truncate whose operand
   /// is an induction variable. Such a truncate will be removed by adding a new
   /// induction variable with the destination type.
   bool isOptimizableIVTruncate(Instruction *I, unsigned VF) {
     // If the instruction is not a truncate, return false.
     auto *Trunc = dyn_cast<TruncInst>(I);
     if (!Trunc)
       return false;
 
     // Get the source and destination types of the truncate.
     Type *SrcTy = ToVectorTy(cast<CastInst>(I)->getSrcTy(), VF);
     Type *DestTy = ToVectorTy(cast<CastInst>(I)->getDestTy(), VF);
 
     // If the truncate is free for the given types, return false. Replacing a
     // free truncate with an induction variable would add an induction variable
     // update instruction to each iteration of the loop. We exclude from this
     // check the primary induction variable since it will need an update
     // instruction regardless.
     Value *Op = Trunc->getOperand(0);
     if (Op != Legal->getPrimaryInduction() && TTI.isTruncateFree(SrcTy, DestTy))
       return false;
 
     // If the truncated value is not an induction variable, return false.
     return Legal->isInductionPhi(Op);
   }
 
   /// Collects the instructions to scalarize for each predicated instruction in
   /// the loop.
   void collectInstsToScalarize(unsigned VF);
 
   /// Collect Uniform and Scalar values for the given \p VF.
   /// The sets depend on CM decision for Load/Store instructions
   /// that may be vectorized as interleave, gather-scatter or scalarized.
   void collectUniformsAndScalars(unsigned VF) {
     // Do the analysis once.
     if (VF == 1 || Uniforms.count(VF))
       return;
     setCostBasedWideningDecision(VF);
     collectLoopUniforms(VF);
     collectLoopScalars(VF);
   }
 
 private:
   /// \return An upper bound for the vectorization factor, larger than zero.
   /// One is returned if vectorization should best be avoided due to cost.
   unsigned computeFeasibleMaxVF(bool OptForSize, unsigned ConstTripCount);
 
   /// The vectorization cost is a combination of the cost itself and a boolean
   /// indicating whether any of the contributing operations will actually
   /// operate on
   /// vector values after type legalization in the backend. If this latter value
   /// is
   /// false, then all operations will be scalarized (i.e. no vectorization has
   /// actually taken place).
   using VectorizationCostTy = std::pair<unsigned, bool>;
 
   /// Returns the expected execution cost. The unit of the cost does
   /// not matter because we use the 'cost' units to compare different
   /// vector widths. The cost that is returned is *not* normalized by
   /// the factor width.
   VectorizationCostTy expectedCost(unsigned VF);
 
   /// Returns the execution time cost of an instruction for a given vector
   /// width. Vector width of one means scalar.
   VectorizationCostTy getInstructionCost(Instruction *I, unsigned VF);
 
   /// The cost-computation logic from getInstructionCost which provides
   /// the vector type as an output parameter.
   unsigned getInstructionCost(Instruction *I, unsigned VF, Type *&VectorTy);
 
   /// Calculate vectorization cost of memory instruction \p I.
   unsigned getMemoryInstructionCost(Instruction *I, unsigned VF);
 
   /// The cost computation for scalarized memory instruction.
   unsigned getMemInstScalarizationCost(Instruction *I, unsigned VF);
 
   /// The cost computation for interleaving group of memory instructions.
   unsigned getInterleaveGroupCost(Instruction *I, unsigned VF);
 
   /// The cost computation for Gather/Scatter instruction.
   unsigned getGatherScatterCost(Instruction *I, unsigned VF);
 
   /// The cost computation for widening instruction \p I with consecutive
   /// memory access.
   unsigned getConsecutiveMemOpCost(Instruction *I, unsigned VF);
 
   /// The cost calculation for Load instruction \p I with uniform pointer -
   /// scalar load + broadcast.
   unsigned getUniformMemOpCost(Instruction *I, unsigned VF);
 
   /// Returns whether the instruction is a load or store and will be a emitted
   /// as a vector operation.
   bool isConsecutiveLoadOrStore(Instruction *I);
 
   /// Create an analysis remark that explains why vectorization failed
   ///
   /// \p RemarkName is the identifier for the remark.  \return the remark object
   /// that can be streamed to.
   OptimizationRemarkAnalysis createMissedAnalysis(StringRef RemarkName) {
     return ::createMissedAnalysis(Hints->vectorizeAnalysisPassName(),
                                   RemarkName, TheLoop);
   }
 
   /// Map of scalar integer values to the smallest bitwidth they can be legally
   /// represented as. The vector equivalents of these values should be truncated
   /// to this type.
   MapVector<Instruction *, uint64_t> MinBWs;
 
   /// A type representing the costs for instructions if they were to be
   /// scalarized rather than vectorized. The entries are Instruction-Cost
   /// pairs.
   using ScalarCostsTy = DenseMap<Instruction *, unsigned>;
 
   /// A set containing all BasicBlocks that are known to present after
   /// vectorization as a predicated block.
   SmallPtrSet<BasicBlock *, 4> PredicatedBBsAfterVectorization;
 
   /// A map holding scalar costs for different vectorization factors. The
   /// presence of a cost for an instruction in the mapping indicates that the
   /// instruction will be scalarized when vectorizing with the associated
   /// vectorization factor. The entries are VF-ScalarCostTy pairs.
   DenseMap<unsigned, ScalarCostsTy> InstsToScalarize;
 
   /// Holds the instructions known to be uniform after vectorization.
   /// The data is collected per VF.
   DenseMap<unsigned, SmallPtrSet<Instruction *, 4>> Uniforms;
 
   /// Holds the instructions known to be scalar after vectorization.
   /// The data is collected per VF.
   DenseMap<unsigned, SmallPtrSet<Instruction *, 4>> Scalars;
 
   /// Holds the instructions (address computations) that are forced to be
   /// scalarized.
   DenseMap<unsigned, SmallPtrSet<Instruction *, 4>> ForcedScalars;
 
   /// Returns the expected difference in cost from scalarizing the expression
   /// feeding a predicated instruction \p PredInst. The instructions to
   /// scalarize and their scalar costs are collected in \p ScalarCosts. A
   /// non-negative return value implies the expression will be scalarized.
   /// Currently, only single-use chains are considered for scalarization.
   int computePredInstDiscount(Instruction *PredInst, ScalarCostsTy &ScalarCosts,
                               unsigned VF);
 
   /// Collect the instructions that are uniform after vectorization. An
   /// instruction is uniform if we represent it with a single scalar value in
   /// the vectorized loop corresponding to each vector iteration. Examples of
   /// uniform instructions include pointer operands of consecutive or
   /// interleaved memory accesses. Note that although uniformity implies an
   /// instruction will be scalar, the reverse is not true. In general, a
   /// scalarized instruction will be represented by VF scalar values in the
   /// vectorized loop, each corresponding to an iteration of the original
   /// scalar loop.
   void collectLoopUniforms(unsigned VF);
 
   /// Collect the instructions that are scalar after vectorization. An
   /// instruction is scalar if it is known to be uniform or will be scalarized
   /// during vectorization. Non-uniform scalarized instructions will be
   /// represented by VF values in the vectorized loop, each corresponding to an
   /// iteration of the original scalar loop.
   void collectLoopScalars(unsigned VF);
 
   /// Keeps cost model vectorization decision and cost for instructions.
   /// Right now it is used for memory instructions only.
   using DecisionList = DenseMap<std::pair<Instruction *, unsigned>,
                                 std::pair<InstWidening, unsigned>>;
 
   DecisionList WideningDecisions;
 
 public:
   /// The loop that we evaluate.
   Loop *TheLoop;
 
   /// Predicated scalar evolution analysis.
   PredicatedScalarEvolution &PSE;
 
   /// Loop Info analysis.
   LoopInfo *LI;
 
   /// Vectorization legality.
   LoopVectorizationLegality *Legal;
 
   /// Vector target information.
   const TargetTransformInfo &TTI;
 
   /// Target Library Info.
   const TargetLibraryInfo *TLI;
 
   /// Demanded bits analysis.
   DemandedBits *DB;
 
   /// Assumption cache.
   AssumptionCache *AC;
 
   /// Interface to emit optimization remarks.
   OptimizationRemarkEmitter *ORE;
 
   const Function *TheFunction;
 
   /// Loop Vectorize Hint.
   const LoopVectorizeHints *Hints;
 
   /// Values to ignore in the cost model.
   SmallPtrSet<const Value *, 16> ValuesToIgnore;
 
   /// Values to ignore in the cost model when VF > 1.
   SmallPtrSet<const Value *, 16> VecValuesToIgnore;
 };
 
 } // end anonymous namespace
 
 namespace llvm {
 
 /// InnerLoopVectorizer vectorizes loops which contain only one basic
 /// LoopVectorizationPlanner - drives the vectorization process after having
 /// passed Legality checks.
 /// The planner builds and optimizes the Vectorization Plans which record the
 /// decisions how to vectorize the given loop. In particular, represent the
 /// control-flow of the vectorized version, the replication of instructions that
 /// are to be scalarized, and interleave access groups.
 class LoopVectorizationPlanner {
   /// The loop that we evaluate.
   Loop *OrigLoop;
 
   /// Loop Info analysis.
   LoopInfo *LI;
 
   /// Target Library Info.
   const TargetLibraryInfo *TLI;
 
   /// Target Transform Info.
   const TargetTransformInfo *TTI;
 
   /// The legality analysis.
   LoopVectorizationLegality *Legal;
 
   /// The profitablity analysis.
   LoopVectorizationCostModel &CM;
 
   using VPlanPtr = std::unique_ptr<VPlan>;
 
   SmallVector<VPlanPtr, 4> VPlans;
 
   /// This class is used to enable the VPlan to invoke a method of ILV. This is
   /// needed until the method is refactored out of ILV and becomes reusable.
   struct VPCallbackILV : public VPCallback {
     InnerLoopVectorizer &ILV;
 
     VPCallbackILV(InnerLoopVectorizer &ILV) : ILV(ILV) {}
 
     Value *getOrCreateVectorValues(Value *V, unsigned Part) override {
       return ILV.getOrCreateVectorValue(V, Part);
     }
   };
 
   /// A builder used to construct the current plan.
   VPBuilder Builder;
 
   /// When we if-convert we need to create edge masks. We have to cache values
   /// so that we don't end up with exponential recursion/IR. Note that
   /// if-conversion currently takes place during VPlan-construction, so these
   /// caches are only used at that stage.
   using EdgeMaskCacheTy =
       DenseMap<std::pair<BasicBlock *, BasicBlock *>, VPValue *>;
   using BlockMaskCacheTy = DenseMap<BasicBlock *, VPValue *>;
   EdgeMaskCacheTy EdgeMaskCache;
   BlockMaskCacheTy BlockMaskCache;
 
   unsigned BestVF = 0;
   unsigned BestUF = 0;
 
 public:
   LoopVectorizationPlanner(Loop *L, LoopInfo *LI, const TargetLibraryInfo *TLI,
                            const TargetTransformInfo *TTI,
                            LoopVectorizationLegality *Legal,
                            LoopVectorizationCostModel &CM)
       : OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM) {}
 
   /// Plan how to best vectorize, return the best VF and its cost.
   LoopVectorizationCostModel::VectorizationFactor plan(bool OptForSize,
                                                        unsigned UserVF);
 
   /// Finalize the best decision and dispose of all other VPlans.
   void setBestPlan(unsigned VF, unsigned UF);
 
   /// Generate the IR code for the body of the vectorized loop according to the
   /// best selected VPlan.
   void executePlan(InnerLoopVectorizer &LB, DominatorTree *DT);
 
   void printPlans(raw_ostream &O) {
     for (const auto &Plan : VPlans)
       O << *Plan;
   }
 
 protected:
   /// Collect the instructions from the original loop that would be trivially
   /// dead in the vectorized loop if generated.
   void collectTriviallyDeadInstructions(
       SmallPtrSetImpl<Instruction *> &DeadInstructions);
 
   /// A range of powers-of-2 vectorization factors with fixed start and
   /// adjustable end. The range includes start and excludes end, e.g.,:
   /// [1, 9) = {1, 2, 4, 8}
   struct VFRange {
     // A power of 2.
     const unsigned Start;
 
     // Need not be a power of 2. If End <= Start range is empty.
     unsigned End;
   };
 
   /// Test a \p Predicate on a \p Range of VF's. Return the value of applying
   /// \p Predicate on Range.Start, possibly decreasing Range.End such that the
   /// returned value holds for the entire \p Range.
   bool getDecisionAndClampRange(const std::function<bool(unsigned)> &Predicate,
                                 VFRange &Range);
 
   /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
   /// according to the information gathered by Legal when it checked if it is
   /// legal to vectorize the loop.
   void buildVPlans(unsigned MinVF, unsigned MaxVF);
 
 private:
   /// A helper function that computes the predicate of the block BB, assuming
   /// that the header block of the loop is set to True. It returns the *entry*
   /// mask for the block BB.
   VPValue *createBlockInMask(BasicBlock *BB, VPlanPtr &Plan);
 
   /// A helper function that computes the predicate of the edge between SRC
   /// and DST.
   VPValue *createEdgeMask(BasicBlock *Src, BasicBlock *Dst, VPlanPtr &Plan);
 
   /// Check if \I belongs to an Interleave Group within the given VF \p Range,
   /// \return true in the first returned value if so and false otherwise.
   /// Build a new VPInterleaveGroup Recipe if \I is the primary member of an IG
   /// for \p Range.Start, and provide it as the second returned value.
   /// Note that if \I is an adjunct member of an IG for \p Range.Start, the
   /// \return value is <true, nullptr>, as it is handled by another recipe.
   /// \p Range.End may be decreased to ensure same decision from \p Range.Start
   /// to \p Range.End.
   VPInterleaveRecipe *tryToInterleaveMemory(Instruction *I, VFRange &Range);
 
   // Check if \I is a memory instruction to be widened for \p Range.Start and
   // potentially masked. Such instructions are handled by a recipe that takes an
   // additional VPInstruction for the mask.
   VPWidenMemoryInstructionRecipe *tryToWidenMemory(Instruction *I,
                                                    VFRange &Range,
                                                    VPlanPtr &Plan);
 
   /// Check if an induction recipe should be constructed for \I within the given
   /// VF \p Range. If so build and return it. If not, return null. \p Range.End
   /// may be decreased to ensure same decision from \p Range.Start to
   /// \p Range.End.
   VPWidenIntOrFpInductionRecipe *tryToOptimizeInduction(Instruction *I,
                                                         VFRange &Range);
 
   /// Handle non-loop phi nodes. Currently all such phi nodes are turned into
   /// a sequence of select instructions as the vectorizer currently performs
   /// full if-conversion.
   VPBlendRecipe *tryToBlend(Instruction *I, VPlanPtr &Plan);
 
   /// Check if \p I can be widened within the given VF \p Range. If \p I can be
   /// widened for \p Range.Start, check if the last recipe of \p VPBB can be
   /// extended to include \p I or else build a new VPWidenRecipe for it and
   /// append it to \p VPBB. Return true if \p I can be widened for Range.Start,
   /// false otherwise. Range.End may be decreased to ensure same decision from
   /// \p Range.Start to \p Range.End.
   bool tryToWiden(Instruction *I, VPBasicBlock *VPBB, VFRange &Range);
 
   /// Build a VPReplicationRecipe for \p I and enclose it within a Region if it
   /// is predicated. \return \p VPBB augmented with this new recipe if \p I is
   /// not predicated, otherwise \return a new VPBasicBlock that succeeds the new
   /// Region. Update the packing decision of predicated instructions if they
   /// feed \p I. Range.End may be decreased to ensure same recipe behavior from
   /// \p Range.Start to \p Range.End.
   VPBasicBlock *handleReplication(
       Instruction *I, VFRange &Range, VPBasicBlock *VPBB,
       DenseMap<Instruction *, VPReplicateRecipe *> &PredInst2Recipe,
       VPlanPtr &Plan);
 
   /// Create a replicating region for instruction \p I that requires
   /// predication. \p PredRecipe is a VPReplicateRecipe holding \p I.
   VPRegionBlock *createReplicateRegion(Instruction *I, VPRecipeBase *PredRecipe,
                                        VPlanPtr &Plan);
 
   /// Build a VPlan according to the information gathered by Legal. \return a
   /// VPlan for vectorization factors \p Range.Start and up to \p Range.End
   /// exclusive, possibly decreasing \p Range.End.
   VPlanPtr buildVPlan(VFRange &Range,
                                     const SmallPtrSetImpl<Value *> &NeedDef);
 };
 
 } // end namespace llvm
 
 namespace {
 
 /// \brief This holds vectorization requirements that must be verified late in
 /// the process. The requirements are set by legalize and costmodel. Once
 /// vectorization has been determined to be possible and profitable the
 /// requirements can be verified by looking for metadata or compiler options.
 /// For example, some loops require FP commutativity which is only allowed if
 /// vectorization is explicitly specified or if the fast-math compiler option
 /// has been provided.
 /// Late evaluation of these requirements allows helpful diagnostics to be
 /// composed that tells the user what need to be done to vectorize the loop. For
 /// example, by specifying #pragma clang loop vectorize or -ffast-math. Late
 /// evaluation should be used only when diagnostics can generated that can be
 /// followed by a non-expert user.
 class LoopVectorizationRequirements {
 public:
   LoopVectorizationRequirements(OptimizationRemarkEmitter &ORE) : ORE(ORE) {}
 
   void addUnsafeAlgebraInst(Instruction *I) {
     // First unsafe algebra instruction.
     if (!UnsafeAlgebraInst)
       UnsafeAlgebraInst = I;
   }
 
   void addRuntimePointerChecks(unsigned Num) { NumRuntimePointerChecks = Num; }
 
   bool doesNotMeet(Function *F, Loop *L, const LoopVectorizeHints &Hints) {
     const char *PassName = Hints.vectorizeAnalysisPassName();
     bool Failed = false;
     if (UnsafeAlgebraInst && !Hints.allowReordering()) {
       ORE.emit([&]() {
         return OptimizationRemarkAnalysisFPCommute(
                    PassName, "CantReorderFPOps",
                    UnsafeAlgebraInst->getDebugLoc(),
                    UnsafeAlgebraInst->getParent())
                << "loop not vectorized: cannot prove it is safe to reorder "
                   "floating-point operations";
       });
       Failed = true;
     }
 
     // Test if runtime memcheck thresholds are exceeded.
     bool PragmaThresholdReached =
         NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
     bool ThresholdReached =
         NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold;
     if ((ThresholdReached && !Hints.allowReordering()) ||
         PragmaThresholdReached) {
       ORE.emit([&]() {
         return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps",
                                                   L->getStartLoc(),
                                                   L->getHeader())
                << "loop not vectorized: cannot prove it is safe to reorder "
                   "memory operations";
       });
       DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
       Failed = true;
     }
 
     return Failed;
   }
 
 private:
   unsigned NumRuntimePointerChecks = 0;
   Instruction *UnsafeAlgebraInst = nullptr;
 
   /// Interface to emit optimization remarks.
   OptimizationRemarkEmitter &ORE;
 };
 
 } // end anonymous namespace
 
 static void addAcyclicInnerLoop(Loop &L, SmallVectorImpl<Loop *> &V) {
   if (L.empty()) {
     if (!hasCyclesInLoopBody(L))
       V.push_back(&L);
     return;
   }
   for (Loop *InnerL : L)
     addAcyclicInnerLoop(*InnerL, V);
 }
 
 namespace {
 
 /// The LoopVectorize Pass.
 struct LoopVectorize : public FunctionPass {
   /// Pass identification, replacement for typeid
   static char ID;
 
   LoopVectorizePass Impl;
 
   explicit LoopVectorize(bool NoUnrolling = false, bool AlwaysVectorize = true)
       : FunctionPass(ID) {
     Impl.DisableUnrolling = NoUnrolling;
     Impl.AlwaysVectorize = AlwaysVectorize;
     initializeLoopVectorizePass(*PassRegistry::getPassRegistry());
   }
 
   bool runOnFunction(Function &F) override {
     if (skipFunction(F))
       return false;
 
     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
     auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
     auto *BFI = &getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
     auto *TLI = TLIP ? &TLIP->getTLI() : nullptr;
     auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
     auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
     auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>();
     auto *DB = &getAnalysis<DemandedBitsWrapperPass>().getDemandedBits();
     auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
 
     std::function<const LoopAccessInfo &(Loop &)> GetLAA =
         [&](Loop &L) -> const LoopAccessInfo & { return LAA->getInfo(&L); };
 
     return Impl.runImpl(F, *SE, *LI, *TTI, *DT, *BFI, TLI, *DB, *AA, *AC,
                         GetLAA, *ORE);
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<AssumptionCacheTracker>();
     AU.addRequired<BlockFrequencyInfoWrapperPass>();
     AU.addRequired<DominatorTreeWrapperPass>();
     AU.addRequired<LoopInfoWrapperPass>();
     AU.addRequired<ScalarEvolutionWrapperPass>();
     AU.addRequired<TargetTransformInfoWrapperPass>();
     AU.addRequired<AAResultsWrapperPass>();
     AU.addRequired<LoopAccessLegacyAnalysis>();
     AU.addRequired<DemandedBitsWrapperPass>();
     AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
     AU.addPreserved<LoopInfoWrapperPass>();
     AU.addPreserved<DominatorTreeWrapperPass>();
     AU.addPreserved<BasicAAWrapperPass>();
     AU.addPreserved<GlobalsAAWrapperPass>();
   }
 };
 
 } // end anonymous namespace
 
 //===----------------------------------------------------------------------===//
 // Implementation of LoopVectorizationLegality, InnerLoopVectorizer and
 // LoopVectorizationCostModel and LoopVectorizationPlanner.
 //===----------------------------------------------------------------------===//
 
 Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
   // We need to place the broadcast of invariant variables outside the loop.
   Instruction *Instr = dyn_cast<Instruction>(V);
   bool NewInstr = (Instr && Instr->getParent() == LoopVectorBody);
   bool Invariant = OrigLoop->isLoopInvariant(V) && !NewInstr;
 
   // Place the code for broadcasting invariant variables in the new preheader.
   IRBuilder<>::InsertPointGuard Guard(Builder);
   if (Invariant)
     Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
 
   // Broadcast the scalar into all locations in the vector.
   Value *Shuf = Builder.CreateVectorSplat(VF, V, "broadcast");
 
   return Shuf;
 }
 
 void InnerLoopVectorizer::createVectorIntOrFpInductionPHI(
     const InductionDescriptor &II, Value *Step, Instruction *EntryVal) {
   Value *Start = II.getStartValue();
 
   // Construct the initial value of the vector IV in the vector loop preheader
   auto CurrIP = Builder.saveIP();
   Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
   if (isa<TruncInst>(EntryVal)) {
     assert(Start->getType()->isIntegerTy() &&
            "Truncation requires an integer type");
     auto *TruncType = cast<IntegerType>(EntryVal->getType());
     Step = Builder.CreateTrunc(Step, TruncType);
     Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType);
   }
   Value *SplatStart = Builder.CreateVectorSplat(VF, Start);
   Value *SteppedStart =
       getStepVector(SplatStart, 0, Step, II.getInductionOpcode());
 
   // We create vector phi nodes for both integer and floating-point induction
   // variables. Here, we determine the kind of arithmetic we will perform.
   Instruction::BinaryOps AddOp;
   Instruction::BinaryOps MulOp;
   if (Step->getType()->isIntegerTy()) {
     AddOp = Instruction::Add;
     MulOp = Instruction::Mul;
   } else {
     AddOp = II.getInductionOpcode();
     MulOp = Instruction::FMul;
   }
 
   // Multiply the vectorization factor by the step using integer or
   // floating-point arithmetic as appropriate.
   Value *ConstVF = getSignedIntOrFpConstant(Step->getType(), VF);
   Value *Mul = addFastMathFlag(Builder.CreateBinOp(MulOp, Step, ConstVF));
 
   // Create a vector splat to use in the induction update.
   //
   // FIXME: If the step is non-constant, we create the vector splat with
   //        IRBuilder. IRBuilder can constant-fold the multiply, but it doesn't
   //        handle a constant vector splat.
   Value *SplatVF = isa<Constant>(Mul)
                        ? ConstantVector::getSplat(VF, cast<Constant>(Mul))
                        : Builder.CreateVectorSplat(VF, Mul);
   Builder.restoreIP(CurrIP);
 
   // We may need to add the step a number of times, depending on the unroll
   // factor. The last of those goes into the PHI.
   PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind",
                                     &*LoopVectorBody->getFirstInsertionPt());
   Instruction *LastInduction = VecInd;
   for (unsigned Part = 0; Part < UF; ++Part) {
     VectorLoopValueMap.setVectorValue(EntryVal, Part, LastInduction);
 
     if (isa<TruncInst>(EntryVal))
       addMetadata(LastInduction, EntryVal);
     else
       recordVectorLoopValueForInductionCast(II, LastInduction, Part);
 
     LastInduction = cast<Instruction>(addFastMathFlag(
         Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add")));
   }
 
   // Move the last step to the end of the latch block. This ensures consistent
   // placement of all induction updates.
   auto *LoopVectorLatch = LI->getLoopFor(LoopVectorBody)->getLoopLatch();
   auto *Br = cast<BranchInst>(LoopVectorLatch->getTerminator());
   auto *ICmp = cast<Instruction>(Br->getCondition());
   LastInduction->moveBefore(ICmp);
   LastInduction->setName("vec.ind.next");
 
   VecInd->addIncoming(SteppedStart, LoopVectorPreHeader);
   VecInd->addIncoming(LastInduction, LoopVectorLatch);
 }
 
 bool InnerLoopVectorizer::shouldScalarizeInstruction(Instruction *I) const {
   return Cost->isScalarAfterVectorization(I, VF) ||
          Cost->isProfitableToScalarize(I, VF);
 }
 
 bool InnerLoopVectorizer::needsScalarInduction(Instruction *IV) const {
   if (shouldScalarizeInstruction(IV))
     return true;
   auto isScalarInst = [&](User *U) -> bool {
     auto *I = cast<Instruction>(U);
     return (OrigLoop->contains(I) && shouldScalarizeInstruction(I));
   };
   return llvm::any_of(IV->users(), isScalarInst);
 }
 
 void InnerLoopVectorizer::recordVectorLoopValueForInductionCast(
     const InductionDescriptor &ID, Value *VectorLoopVal, unsigned Part,
     unsigned Lane) {
   const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
   if (Casts.empty())
     return;
   // Only the first Cast instruction in the Casts vector is of interest.
   // The rest of the Casts (if exist) have no uses outside the
   // induction update chain itself.
   Instruction *CastInst = *Casts.begin();
   if (Lane < UINT_MAX)
     VectorLoopValueMap.setScalarValue(CastInst, {Part, Lane}, VectorLoopVal);
   else
     VectorLoopValueMap.setVectorValue(CastInst, Part, VectorLoopVal);
 }
 
 void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, TruncInst *Trunc) {
   assert((IV->getType()->isIntegerTy() || IV != OldInduction) &&
          "Primary induction variable must have an integer type");
 
   auto II = Legal->getInductionVars()->find(IV);
   assert(II != Legal->getInductionVars()->end() && "IV is not an induction");
 
   auto ID = II->second;
   assert(IV->getType() == ID.getStartValue()->getType() && "Types must match");
 
   // The scalar value to broadcast. This will be derived from the canonical
   // induction variable.
   Value *ScalarIV = nullptr;
 
   // The value from the original loop to which we are mapping the new induction
   // variable.
   Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : IV;
 
   // True if we have vectorized the induction variable.
   auto VectorizedIV = false;
 
   // Determine if we want a scalar version of the induction variable. This is
   // true if the induction variable itself is not widened, or if it has at
   // least one user in the loop that is not widened.
   auto NeedsScalarIV = VF > 1 && needsScalarInduction(EntryVal);
 
   // Generate code for the induction step. Note that induction steps are
   // required to be loop-invariant
   assert(PSE.getSE()->isLoopInvariant(ID.getStep(), OrigLoop) &&
          "Induction step should be loop invariant");
   auto &DL = OrigLoop->getHeader()->getModule()->getDataLayout();
   Value *Step = nullptr;
   if (PSE.getSE()->isSCEVable(IV->getType())) {
     SCEVExpander Exp(*PSE.getSE(), DL, "induction");
     Step = Exp.expandCodeFor(ID.getStep(), ID.getStep()->getType(),
                              LoopVectorPreHeader->getTerminator());
   } else {
     Step = cast<SCEVUnknown>(ID.getStep())->getValue();
   }
 
   // Try to create a new independent vector induction variable. If we can't
   // create the phi node, we will splat the scalar induction variable in each
   // loop iteration.
   if (VF > 1 && !shouldScalarizeInstruction(EntryVal)) {
     createVectorIntOrFpInductionPHI(ID, Step, EntryVal);
     VectorizedIV = true;
   }
 
   // If we haven't yet vectorized the induction variable, or if we will create
   // a scalar one, we need to define the scalar induction variable and step
   // values. If we were given a truncation type, truncate the canonical
   // induction variable and step. Otherwise, derive these values from the
   // induction descriptor.
   if (!VectorizedIV || NeedsScalarIV) {
     ScalarIV = Induction;
     if (IV != OldInduction) {
       ScalarIV = IV->getType()->isIntegerTy()
                      ? Builder.CreateSExtOrTrunc(Induction, IV->getType())
                      : Builder.CreateCast(Instruction::SIToFP, Induction,
                                           IV->getType());
       ScalarIV = ID.transform(Builder, ScalarIV, PSE.getSE(), DL);
       ScalarIV->setName("offset.idx");
     }
     if (Trunc) {
       auto *TruncType = cast<IntegerType>(Trunc->getType());
       assert(Step->getType()->isIntegerTy() &&
              "Truncation requires an integer step");
       ScalarIV = Builder.CreateTrunc(ScalarIV, TruncType);
       Step = Builder.CreateTrunc(Step, TruncType);
     }
   }
 
   // If we haven't yet vectorized the induction variable, splat the scalar
   // induction variable, and build the necessary step vectors.
   // TODO: Don't do it unless the vectorized IV is really required.
   if (!VectorizedIV) {
     Value *Broadcasted = getBroadcastInstrs(ScalarIV);
     for (unsigned Part = 0; Part < UF; ++Part) {
       Value *EntryPart =
           getStepVector(Broadcasted, VF * Part, Step, ID.getInductionOpcode());
       VectorLoopValueMap.setVectorValue(EntryVal, Part, EntryPart);
       if (Trunc)
         addMetadata(EntryPart, Trunc);
       else
         recordVectorLoopValueForInductionCast(ID, EntryPart, Part);
     }
   }
 
   // If an induction variable is only used for counting loop iterations or
   // calculating addresses, it doesn't need to be widened. Create scalar steps
   // that can be used by instructions we will later scalarize. Note that the
   // addition of the scalar steps will not increase the number of instructions
   // in the loop in the common case prior to InstCombine. We will be trading
   // one vector extract for each scalar step.
   if (NeedsScalarIV)
     buildScalarSteps(ScalarIV, Step, EntryVal, ID);
 }
 
 Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx, Value *Step,
                                           Instruction::BinaryOps BinOp) {
   // Create and check the types.
   assert(Val->getType()->isVectorTy() && "Must be a vector");
   int VLen = Val->getType()->getVectorNumElements();
 
   Type *STy = Val->getType()->getScalarType();
   assert((STy->isIntegerTy() || STy->isFloatingPointTy()) &&
          "Induction Step must be an integer or FP");
   assert(Step->getType() == STy && "Step has wrong type");
 
   SmallVector<Constant *, 8> Indices;
 
   if (STy->isIntegerTy()) {
     // Create a vector of consecutive numbers from zero to VF.
     for (int i = 0; i < VLen; ++i)
       Indices.push_back(ConstantInt::get(STy, StartIdx + i));
 
     // Add the consecutive indices to the vector value.
     Constant *Cv = ConstantVector::get(Indices);
     assert(Cv->getType() == Val->getType() && "Invalid consecutive vec");
     Step = Builder.CreateVectorSplat(VLen, Step);
     assert(Step->getType() == Val->getType() && "Invalid step vec");
     // FIXME: The newly created binary instructions should contain nsw/nuw flags,
     // which can be found from the original scalar operations.
     Step = Builder.CreateMul(Cv, Step);
     return Builder.CreateAdd(Val, Step, "induction");
   }
 
   // Floating point induction.
   assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) &&
          "Binary Opcode should be specified for FP induction");
   // Create a vector of consecutive numbers from zero to VF.
   for (int i = 0; i < VLen; ++i)
     Indices.push_back(ConstantFP::get(STy, (double)(StartIdx + i)));
 
   // Add the consecutive indices to the vector value.
   Constant *Cv = ConstantVector::get(Indices);
 
   Step = Builder.CreateVectorSplat(VLen, Step);
 
   // Floating point operations had to be 'fast' to enable the induction.
   FastMathFlags Flags;
   Flags.setFast();
 
   Value *MulOp = Builder.CreateFMul(Cv, Step);
   if (isa<Instruction>(MulOp))
     // Have to check, MulOp may be a constant
     cast<Instruction>(MulOp)->setFastMathFlags(Flags);
 
   Value *BOp = Builder.CreateBinOp(BinOp, Val, MulOp, "induction");
   if (isa<Instruction>(BOp))
     cast<Instruction>(BOp)->setFastMathFlags(Flags);
   return BOp;
 }
 
 void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
                                            Value *EntryVal,
                                            const InductionDescriptor &ID) {
   // We shouldn't have to build scalar steps if we aren't vectorizing.
   assert(VF > 1 && "VF should be greater than one");
 
   // Get the value type and ensure it and the step have the same integer type.
   Type *ScalarIVTy = ScalarIV->getType()->getScalarType();
   assert(ScalarIVTy == Step->getType() &&
          "Val and Step should have the same type");
 
   // We build scalar steps for both integer and floating-point induction
   // variables. Here, we determine the kind of arithmetic we will perform.
   Instruction::BinaryOps AddOp;
   Instruction::BinaryOps MulOp;
   if (ScalarIVTy->isIntegerTy()) {
     AddOp = Instruction::Add;
     MulOp = Instruction::Mul;
   } else {
     AddOp = ID.getInductionOpcode();
     MulOp = Instruction::FMul;
   }
 
   // Determine the number of scalars we need to generate for each unroll
   // iteration. If EntryVal is uniform, we only need to generate the first
   // lane. Otherwise, we generate all VF values.
   unsigned Lanes =
       Cost->isUniformAfterVectorization(cast<Instruction>(EntryVal), VF) ? 1
                                                                          : VF;
   // Compute the scalar steps and save the results in VectorLoopValueMap.
   for (unsigned Part = 0; Part < UF; ++Part) {
     for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
       auto *StartIdx = getSignedIntOrFpConstant(ScalarIVTy, VF * Part + Lane);
       auto *Mul = addFastMathFlag(Builder.CreateBinOp(MulOp, StartIdx, Step));
       auto *Add = addFastMathFlag(Builder.CreateBinOp(AddOp, ScalarIV, Mul));
       VectorLoopValueMap.setScalarValue(EntryVal, {Part, Lane}, Add);
       recordVectorLoopValueForInductionCast(ID, Add, Part, Lane);
     }
   }
 }
 
 int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
   const ValueToValueMap &Strides = getSymbolicStrides() ? *getSymbolicStrides() :
     ValueToValueMap();
 
   int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, true, false);
   if (Stride == 1 || Stride == -1)
     return Stride;
   return 0;
 }
 
 bool LoopVectorizationLegality::isUniform(Value *V) {
   return LAI->isUniform(V);
 }
 
 Value *InnerLoopVectorizer::getOrCreateVectorValue(Value *V, unsigned Part) {
   assert(V != Induction && "The new induction variable should not be used.");
   assert(!V->getType()->isVectorTy() && "Can't widen a vector");
   assert(!V->getType()->isVoidTy() && "Type does not produce a value");
 
   // If we have a stride that is replaced by one, do it here.
   if (Legal->hasStride(V))
     V = ConstantInt::get(V->getType(), 1);
 
   // If we have a vector mapped to this value, return it.
   if (VectorLoopValueMap.hasVectorValue(V, Part))
     return VectorLoopValueMap.getVectorValue(V, Part);
 
   // If the value has not been vectorized, check if it has been scalarized
   // instead. If it has been scalarized, and we actually need the value in
   // vector form, we will construct the vector values on demand.
   if (VectorLoopValueMap.hasAnyScalarValue(V)) {
     Value *ScalarValue = VectorLoopValueMap.getScalarValue(V, {Part, 0});
 
     // If we've scalarized a value, that value should be an instruction.
     auto *I = cast<Instruction>(V);
 
     // If we aren't vectorizing, we can just copy the scalar map values over to
     // the vector map.
     if (VF == 1) {
       VectorLoopValueMap.setVectorValue(V, Part, ScalarValue);
       return ScalarValue;
     }
 
     // Get the last scalar instruction we generated for V and Part. If the value
     // is known to be uniform after vectorization, this corresponds to lane zero
     // of the Part unroll iteration. Otherwise, the last instruction is the one
     // we created for the last vector lane of the Part unroll iteration.
     unsigned LastLane = Cost->isUniformAfterVectorization(I, VF) ? 0 : VF - 1;
     auto *LastInst = cast<Instruction>(
         VectorLoopValueMap.getScalarValue(V, {Part, LastLane}));
 
     // Set the insert point after the last scalarized instruction. This ensures
     // the insertelement sequence will directly follow the scalar definitions.
     auto OldIP = Builder.saveIP();
     auto NewIP = std::next(BasicBlock::iterator(LastInst));
     Builder.SetInsertPoint(&*NewIP);
 
     // However, if we are vectorizing, we need to construct the vector values.
     // If the value is known to be uniform after vectorization, we can just
     // broadcast the scalar value corresponding to lane zero for each unroll
     // iteration. Otherwise, we construct the vector values using insertelement
     // instructions. Since the resulting vectors are stored in
     // VectorLoopValueMap, we will only generate the insertelements once.
     Value *VectorValue = nullptr;
     if (Cost->isUniformAfterVectorization(I, VF)) {
       VectorValue = getBroadcastInstrs(ScalarValue);
       VectorLoopValueMap.setVectorValue(V, Part, VectorValue);
     } else {
       // Initialize packing with insertelements to start from undef.
       Value *Undef = UndefValue::get(VectorType::get(V->getType(), VF));
       VectorLoopValueMap.setVectorValue(V, Part, Undef);
       for (unsigned Lane = 0; Lane < VF; ++Lane)
         packScalarIntoVectorValue(V, {Part, Lane});
       VectorValue = VectorLoopValueMap.getVectorValue(V, Part);
     }
     Builder.restoreIP(OldIP);
     return VectorValue;
   }
 
   // If this scalar is unknown, assume that it is a constant or that it is
   // loop invariant. Broadcast V and save the value for future uses.
   Value *B = getBroadcastInstrs(V);
   VectorLoopValueMap.setVectorValue(V, Part, B);
   return B;
 }
 
 Value *
 InnerLoopVectorizer::getOrCreateScalarValue(Value *V,
                                             const VPIteration &Instance) {
   // If the value is not an instruction contained in the loop, it should
   // already be scalar.
   if (OrigLoop->isLoopInvariant(V))
     return V;
 
   assert(Instance.Lane > 0
              ? !Cost->isUniformAfterVectorization(cast<Instruction>(V), VF)
              : true && "Uniform values only have lane zero");
 
   // If the value from the original loop has not been vectorized, it is
   // represented by UF x VF scalar values in the new loop. Return the requested
   // scalar value.
   if (VectorLoopValueMap.hasScalarValue(V, Instance))
     return VectorLoopValueMap.getScalarValue(V, Instance);
 
   // If the value has not been scalarized, get its entry in VectorLoopValueMap
   // for the given unroll part. If this entry is not a vector type (i.e., the
   // vectorization factor is one), there is no need to generate an
   // extractelement instruction.
   auto *U = getOrCreateVectorValue(V, Instance.Part);
   if (!U->getType()->isVectorTy()) {
     assert(VF == 1 && "Value not scalarized has non-vector type");
     return U;
   }
 
   // Otherwise, the value from the original loop has been vectorized and is
   // represented by UF vector values. Extract and return the requested scalar
   // value from the appropriate vector lane.
   return Builder.CreateExtractElement(U, Builder.getInt32(Instance.Lane));
 }
 
 void InnerLoopVectorizer::packScalarIntoVectorValue(
     Value *V, const VPIteration &Instance) {
   assert(V != Induction && "The new induction variable should not be used.");
   assert(!V->getType()->isVectorTy() && "Can't pack a vector");
   assert(!V->getType()->isVoidTy() && "Type does not produce a value");
 
   Value *ScalarInst = VectorLoopValueMap.getScalarValue(V, Instance);
   Value *VectorValue = VectorLoopValueMap.getVectorValue(V, Instance.Part);
   VectorValue = Builder.CreateInsertElement(VectorValue, ScalarInst,
                                             Builder.getInt32(Instance.Lane));
   VectorLoopValueMap.resetVectorValue(V, Instance.Part, VectorValue);
 }
 
 Value *InnerLoopVectorizer::reverseVector(Value *Vec) {
   assert(Vec->getType()->isVectorTy() && "Invalid type");
   SmallVector<Constant *, 8> ShuffleMask;
   for (unsigned i = 0; i < VF; ++i)
     ShuffleMask.push_back(Builder.getInt32(VF - i - 1));
 
   return Builder.CreateShuffleVector(Vec, UndefValue::get(Vec->getType()),
                                      ConstantVector::get(ShuffleMask),
                                      "reverse");
 }
 
 // Try to vectorize the interleave group that \p Instr belongs to.
 //
 // E.g. Translate following interleaved load group (factor = 3):
 //   for (i = 0; i < N; i+=3) {
 //     R = Pic[i];             // Member of index 0
 //     G = Pic[i+1];           // Member of index 1
 //     B = Pic[i+2];           // Member of index 2
 //     ... // do something to R, G, B
 //   }
 // To:
 //   %wide.vec = load <12 x i32>                       ; Read 4 tuples of R,G,B
 //   %R.vec = shuffle %wide.vec, undef, <0, 3, 6, 9>   ; R elements
 //   %G.vec = shuffle %wide.vec, undef, <1, 4, 7, 10>  ; G elements
 //   %B.vec = shuffle %wide.vec, undef, <2, 5, 8, 11>  ; B elements
 //
 // Or translate following interleaved store group (factor = 3):
 //   for (i = 0; i < N; i+=3) {
 //     ... do something to R, G, B
 //     Pic[i]   = R;           // Member of index 0
 //     Pic[i+1] = G;           // Member of index 1
 //     Pic[i+2] = B;           // Member of index 2
 //   }
 // To:
 //   %R_G.vec = shuffle %R.vec, %G.vec, <0, 1, 2, ..., 7>
 //   %B_U.vec = shuffle %B.vec, undef, <0, 1, 2, 3, u, u, u, u>
 //   %interleaved.vec = shuffle %R_G.vec, %B_U.vec,
 //        <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>    ; Interleave R,G,B elements
 //   store <12 x i32> %interleaved.vec              ; Write 4 tuples of R,G,B
 void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) {
   const InterleaveGroup *Group = Legal->getInterleavedAccessGroup(Instr);
   assert(Group && "Fail to get an interleaved access group.");
 
   // Skip if current instruction is not the insert position.
   if (Instr != Group->getInsertPos())
     return;
 
   const DataLayout &DL = Instr->getModule()->getDataLayout();
   Value *Ptr = getPointerOperand(Instr);
 
   // Prepare for the vector type of the interleaved load/store.
   Type *ScalarTy = getMemInstValueType(Instr);
   unsigned InterleaveFactor = Group->getFactor();
   Type *VecTy = VectorType::get(ScalarTy, InterleaveFactor * VF);
   Type *PtrTy = VecTy->getPointerTo(getMemInstAddressSpace(Instr));
 
   // Prepare for the new pointers.
   setDebugLocFromInst(Builder, Ptr);
   SmallVector<Value *, 2> NewPtrs;
   unsigned Index = Group->getIndex(Instr);
 
   // If the group is reverse, adjust the index to refer to the last vector lane
   // instead of the first. We adjust the index from the first vector lane,
   // rather than directly getting the pointer for lane VF - 1, because the
   // pointer operand of the interleaved access is supposed to be uniform. For
   // uniform instructions, we're only required to generate a value for the
   // first vector lane in each unroll iteration.
   if (Group->isReverse())
     Index += (VF - 1) * Group->getFactor();
 
   for (unsigned Part = 0; Part < UF; Part++) {
     Value *NewPtr = getOrCreateScalarValue(Ptr, {Part, 0});
 
     // Notice current instruction could be any index. Need to adjust the address
     // to the member of index 0.
     //
     // E.g.  a = A[i+1];     // Member of index 1 (Current instruction)
     //       b = A[i];       // Member of index 0
     // Current pointer is pointed to A[i+1], adjust it to A[i].
     //
     // E.g.  A[i+1] = a;     // Member of index 1
     //       A[i]   = b;     // Member of index 0
     //       A[i+2] = c;     // Member of index 2 (Current instruction)
     // Current pointer is pointed to A[i+2], adjust it to A[i].
     NewPtr = Builder.CreateGEP(NewPtr, Builder.getInt32(-Index));
 
     // Cast to the vector pointer type.
     NewPtrs.push_back(Builder.CreateBitCast(NewPtr, PtrTy));
   }
 
   setDebugLocFromInst(Builder, Instr);
   Value *UndefVec = UndefValue::get(VecTy);
 
   // Vectorize the interleaved load group.
   if (isa<LoadInst>(Instr)) {
     // For each unroll part, create a wide load for the group.
     SmallVector<Value *, 2> NewLoads;
     for (unsigned Part = 0; Part < UF; Part++) {
       auto *NewLoad = Builder.CreateAlignedLoad(
           NewPtrs[Part], Group->getAlignment(), "wide.vec");
       Group->addMetadata(NewLoad);
       NewLoads.push_back(NewLoad);
     }
 
     // For each member in the group, shuffle out the appropriate data from the
     // wide loads.
     for (unsigned I = 0; I < InterleaveFactor; ++I) {
       Instruction *Member = Group->getMember(I);
 
       // Skip the gaps in the group.
       if (!Member)
         continue;
 
       Constant *StrideMask = createStrideMask(Builder, I, InterleaveFactor, VF);
       for (unsigned Part = 0; Part < UF; Part++) {
         Value *StridedVec = Builder.CreateShuffleVector(
             NewLoads[Part], UndefVec, StrideMask, "strided.vec");
 
         // If this member has different type, cast the result type.
         if (Member->getType() != ScalarTy) {
           VectorType *OtherVTy = VectorType::get(Member->getType(), VF);
           StridedVec = createBitOrPointerCast(StridedVec, OtherVTy, DL);
         }
 
         if (Group->isReverse())
           StridedVec = reverseVector(StridedVec);
 
         VectorLoopValueMap.setVectorValue(Member, Part, StridedVec);
       }
     }
     return;
   }
 
   // The sub vector type for current instruction.
   VectorType *SubVT = VectorType::get(ScalarTy, VF);
 
   // Vectorize the interleaved store group.
   for (unsigned Part = 0; Part < UF; Part++) {
     // Collect the stored vector from each member.
     SmallVector<Value *, 4> StoredVecs;
     for (unsigned i = 0; i < InterleaveFactor; i++) {
       // Interleaved store group doesn't allow a gap, so each index has a member
       Instruction *Member = Group->getMember(i);
       assert(Member && "Fail to get a member from an interleaved store group");
 
       Value *StoredVec = getOrCreateVectorValue(
           cast<StoreInst>(Member)->getValueOperand(), Part);
       if (Group->isReverse())
         StoredVec = reverseVector(StoredVec);
 
       // If this member has different type, cast it to a unified type.
 
       if (StoredVec->getType() != SubVT)
         StoredVec = createBitOrPointerCast(StoredVec, SubVT, DL);
 
       StoredVecs.push_back(StoredVec);
     }
 
     // Concatenate all vectors into a wide vector.
     Value *WideVec = concatenateVectors(Builder, StoredVecs);
 
     // Interleave the elements in the wide vector.
     Constant *IMask = createInterleaveMask(Builder, VF, InterleaveFactor);
     Value *IVec = Builder.CreateShuffleVector(WideVec, UndefVec, IMask,
                                               "interleaved.vec");
 
     Instruction *NewStoreInstr =
         Builder.CreateAlignedStore(IVec, NewPtrs[Part], Group->getAlignment());
 
     Group->addMetadata(NewStoreInstr);
   }
 }
 
 void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
                                                      VectorParts *BlockInMask) {
   // Attempt to issue a wide load.
   LoadInst *LI = dyn_cast<LoadInst>(Instr);
   StoreInst *SI = dyn_cast<StoreInst>(Instr);
 
   assert((LI || SI) && "Invalid Load/Store instruction");
 
   LoopVectorizationCostModel::InstWidening Decision =
       Cost->getWideningDecision(Instr, VF);
   assert(Decision != LoopVectorizationCostModel::CM_Unknown &&
          "CM decision should be taken at this point");
   if (Decision == LoopVectorizationCostModel::CM_Interleave)
     return vectorizeInterleaveGroup(Instr);
 
   Type *ScalarDataTy = getMemInstValueType(Instr);
   Type *DataTy = VectorType::get(ScalarDataTy, VF);
   Value *Ptr = getPointerOperand(Instr);
   unsigned Alignment = getMemInstAlignment(Instr);
   // An alignment of 0 means target abi alignment. We need to use the scalar's
   // target abi alignment in such a case.
   const DataLayout &DL = Instr->getModule()->getDataLayout();
   if (!Alignment)
     Alignment = DL.getABITypeAlignment(ScalarDataTy);
   unsigned AddressSpace = getMemInstAddressSpace(Instr);
 
   // Determine if the pointer operand of the access is either consecutive or
   // reverse consecutive.
   bool Reverse = (Decision == LoopVectorizationCostModel::CM_Widen_Reverse);
   bool ConsecutiveStride =
       Reverse || (Decision == LoopVectorizationCostModel::CM_Widen);
   bool CreateGatherScatter =
       (Decision == LoopVectorizationCostModel::CM_GatherScatter);
 
   // Either Ptr feeds a vector load/store, or a vector GEP should feed a vector
   // gather/scatter. Otherwise Decision should have been to Scalarize.
   assert((ConsecutiveStride || CreateGatherScatter) &&
          "The instruction should be scalarized");
 
   // Handle consecutive loads/stores.
   if (ConsecutiveStride)
     Ptr = getOrCreateScalarValue(Ptr, {0, 0});
 
   VectorParts Mask;
   bool isMaskRequired = BlockInMask;
   if (isMaskRequired)
     Mask = *BlockInMask;
 
   // Handle Stores:
   if (SI) {
     assert(!Legal->isUniform(SI->getPointerOperand()) &&
            "We do not allow storing to uniform addresses");
     setDebugLocFromInst(Builder, SI);
 
     for (unsigned Part = 0; Part < UF; ++Part) {
       Instruction *NewSI = nullptr;
       Value *StoredVal = getOrCreateVectorValue(SI->getValueOperand(), Part);
       if (CreateGatherScatter) {
         Value *MaskPart = isMaskRequired ? Mask[Part] : nullptr;
         Value *VectorGep = getOrCreateVectorValue(Ptr, Part);
         NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment,
                                             MaskPart);
       } else {
         // Calculate the pointer for the specific unroll-part.
         Value *PartPtr =
             Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(Part * VF));
 
         if (Reverse) {
           // If we store to reverse consecutive memory locations, then we need
           // to reverse the order of elements in the stored value.
           StoredVal = reverseVector(StoredVal);
           // We don't want to update the value in the map as it might be used in
           // another expression. So don't call resetVectorValue(StoredVal).
 
           // If the address is consecutive but reversed, then the
           // wide store needs to start at the last vector element.
           PartPtr =
               Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(-Part * VF));
           PartPtr =
               Builder.CreateGEP(nullptr, PartPtr, Builder.getInt32(1 - VF));
           if (isMaskRequired) // Reverse of a null all-one mask is a null mask.
             Mask[Part] = reverseVector(Mask[Part]);
         }
 
         Value *VecPtr =
             Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace));
 
         if (isMaskRequired)
           NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment,
                                             Mask[Part]);
         else
           NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment);
       }
       addMetadata(NewSI, SI);
     }
     return;
   }
 
   // Handle loads.
   assert(LI && "Must have a load instruction");
   setDebugLocFromInst(Builder, LI);
   for (unsigned Part = 0; Part < UF; ++Part) {
     Value *NewLI;
     if (CreateGatherScatter) {
       Value *MaskPart = isMaskRequired ? Mask[Part] : nullptr;
       Value *VectorGep = getOrCreateVectorValue(Ptr, Part);
       NewLI = Builder.CreateMaskedGather(VectorGep, Alignment, MaskPart,
                                          nullptr, "wide.masked.gather");
       addMetadata(NewLI, LI);
     } else {
       // Calculate the pointer for the specific unroll-part.
       Value *PartPtr =
           Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(Part * VF));
 
       if (Reverse) {
         // If the address is consecutive but reversed, then the
         // wide load needs to start at the last vector element.
         PartPtr = Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(-Part * VF));
         PartPtr = Builder.CreateGEP(nullptr, PartPtr, Builder.getInt32(1 - VF));
         if (isMaskRequired) // Reverse of a null all-one mask is a null mask.
           Mask[Part] = reverseVector(Mask[Part]);
       }
 
       Value *VecPtr =
           Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace));
       if (isMaskRequired)
         NewLI = Builder.CreateMaskedLoad(VecPtr, Alignment, Mask[Part],
                                          UndefValue::get(DataTy),
                                          "wide.masked.load");
       else
         NewLI = Builder.CreateAlignedLoad(VecPtr, Alignment, "wide.load");
 
       // Add metadata to the load, but setVectorValue to the reverse shuffle.
       addMetadata(NewLI, LI);
       if (Reverse)
         NewLI = reverseVector(NewLI);
     }
     VectorLoopValueMap.setVectorValue(Instr, Part, NewLI);
   }
 }
 
 void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr,
                                                const VPIteration &Instance,
                                                bool IfPredicateInstr) {
   assert(!Instr->getType()->isAggregateType() && "Can't handle vectors");
 
   setDebugLocFromInst(Builder, Instr);
 
   // Does this instruction return a value ?
   bool IsVoidRetTy = Instr->getType()->isVoidTy();
 
   Instruction *Cloned = Instr->clone();
   if (!IsVoidRetTy)
     Cloned->setName(Instr->getName() + ".cloned");
 
   // Replace the operands of the cloned instructions with their scalar
   // equivalents in the new loop.
   for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
     auto *NewOp = getOrCreateScalarValue(Instr->getOperand(op), Instance);
     Cloned->setOperand(op, NewOp);
   }
   addNewMetadata(Cloned, Instr);
 
   // Place the cloned scalar in the new loop.
   Builder.Insert(Cloned);
 
   // Add the cloned scalar to the scalar map entry.
   VectorLoopValueMap.setScalarValue(Instr, Instance, Cloned);
 
   // If we just cloned a new assumption, add it the assumption cache.
   if (auto *II = dyn_cast<IntrinsicInst>(Cloned))
     if (II->getIntrinsicID() == Intrinsic::assume)
       AC->registerAssumption(II);
 
   // End if-block.
   if (IfPredicateInstr)
     PredicatedInstructions.push_back(Cloned);
 }
 
 PHINode *InnerLoopVectorizer::createInductionVariable(Loop *L, Value *Start,
                                                       Value *End, Value *Step,
                                                       Instruction *DL) {
   BasicBlock *Header = L->getHeader();
   BasicBlock *Latch = L->getLoopLatch();
   // As we're just creating this loop, it's possible no latch exists
   // yet. If so, use the header as this will be a single block loop.
   if (!Latch)
     Latch = Header;
 
   IRBuilder<> Builder(&*Header->getFirstInsertionPt());
   Instruction *OldInst = getDebugLocFromInstOrOperands(OldInduction);
   setDebugLocFromInst(Builder, OldInst);
   auto *Induction = Builder.CreatePHI(Start->getType(), 2, "index");
 
   Builder.SetInsertPoint(Latch->getTerminator());
   setDebugLocFromInst(Builder, OldInst);
 
   // Create i+1 and fill the PHINode.
   Value *Next = Builder.CreateAdd(Induction, Step, "index.next");
   Induction->addIncoming(Start, L->getLoopPreheader());
   Induction->addIncoming(Next, Latch);
   // Create the compare.
   Value *ICmp = Builder.CreateICmpEQ(Next, End);
   Builder.CreateCondBr(ICmp, L->getExitBlock(), Header);
 
   // Now we have two terminators. Remove the old one from the block.
   Latch->getTerminator()->eraseFromParent();
 
   return Induction;
 }
 
 Value *InnerLoopVectorizer::getOrCreateTripCount(Loop *L) {
   if (TripCount)
     return TripCount;
 
   IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
   // Find the loop boundaries.
   ScalarEvolution *SE = PSE.getSE();
   const SCEV *BackedgeTakenCount = PSE.getBackedgeTakenCount();
   assert(BackedgeTakenCount != SE->getCouldNotCompute() &&
          "Invalid loop count");
 
   Type *IdxTy = Legal->getWidestInductionType();
 
   // The exit count might have the type of i64 while the phi is i32. This can
   // happen if we have an induction variable that is sign extended before the
   // compare. The only way that we get a backedge taken count is that the
   // induction variable was signed and as such will not overflow. In such a case
   // truncation is legal.
   if (BackedgeTakenCount->getType()->getPrimitiveSizeInBits() >
       IdxTy->getPrimitiveSizeInBits())
     BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount, IdxTy);
   BackedgeTakenCount = SE->getNoopOrZeroExtend(BackedgeTakenCount, IdxTy);
 
   // Get the total trip count from the count by adding 1.
   const SCEV *ExitCount = SE->getAddExpr(
       BackedgeTakenCount, SE->getOne(BackedgeTakenCount->getType()));
 
   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
 
   // Expand the trip count and place the new instructions in the preheader.
   // Notice that the pre-header does not change, only the loop body.
   SCEVExpander Exp(*SE, DL, "induction");
 
   // Count holds the overall loop count (N).
   TripCount = Exp.expandCodeFor(ExitCount, ExitCount->getType(),
                                 L->getLoopPreheader()->getTerminator());
 
   if (TripCount->getType()->isPointerTy())
     TripCount =
         CastInst::CreatePointerCast(TripCount, IdxTy, "exitcount.ptrcnt.to.int",
                                     L->getLoopPreheader()->getTerminator());
 
   return TripCount;
 }
 
 Value *InnerLoopVectorizer::getOrCreateVectorTripCount(Loop *L) {
   if (VectorTripCount)
     return VectorTripCount;
 
   Value *TC = getOrCreateTripCount(L);
   IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
 
   // Now we need to generate the expression for the part of the loop that the
   // vectorized body will execute. This is equal to N - (N % Step) if scalar
   // iterations are not required for correctness, or N - Step, otherwise. Step
   // is equal to the vectorization factor (number of SIMD elements) times the
   // unroll factor (number of SIMD instructions).
   Constant *Step = ConstantInt::get(TC->getType(), VF * UF);
   Value *R = Builder.CreateURem(TC, Step, "n.mod.vf");
 
   // If there is a non-reversed interleaved group that may speculatively access
   // memory out-of-bounds, we need to ensure that there will be at least one
   // iteration of the scalar epilogue loop. Thus, if the step evenly divides
   // the trip count, we set the remainder to be equal to the step. If the step
   // does not evenly divide the trip count, no adjustment is necessary since
   // there will already be scalar iterations. Note that the minimum iterations
   // check ensures that N >= Step.
   if (VF > 1 && Legal->requiresScalarEpilogue()) {
     auto *IsZero = Builder.CreateICmpEQ(R, ConstantInt::get(R->getType(), 0));
     R = Builder.CreateSelect(IsZero, Step, R);
   }
 
   VectorTripCount = Builder.CreateSub(TC, R, "n.vec");
 
   return VectorTripCount;
 }
 
 Value *InnerLoopVectorizer::createBitOrPointerCast(Value *V, VectorType *DstVTy,
                                                    const DataLayout &DL) {
   // Verify that V is a vector type with same number of elements as DstVTy.
   unsigned VF = DstVTy->getNumElements();
   VectorType *SrcVecTy = cast<VectorType>(V->getType());
   assert((VF == SrcVecTy->getNumElements()) && "Vector dimensions do not match");
   Type *SrcElemTy = SrcVecTy->getElementType();
   Type *DstElemTy = DstVTy->getElementType();
   assert((DL.getTypeSizeInBits(SrcElemTy) == DL.getTypeSizeInBits(DstElemTy)) &&
          "Vector elements must have same size");
 
   // Do a direct cast if element types are castable.
   if (CastInst::isBitOrNoopPointerCastable(SrcElemTy, DstElemTy, DL)) {
     return Builder.CreateBitOrPointerCast(V, DstVTy);
   }
   // V cannot be directly casted to desired vector type.
   // May happen when V is a floating point vector but DstVTy is a vector of
   // pointers or vice-versa. Handle this using a two-step bitcast using an
   // intermediate Integer type for the bitcast i.e. Ptr <-> Int <-> Float.
   assert((DstElemTy->isPointerTy() != SrcElemTy->isPointerTy()) &&
          "Only one type should be a pointer type");
   assert((DstElemTy->isFloatingPointTy() != SrcElemTy->isFloatingPointTy()) &&
          "Only one type should be a floating point type");
   Type *IntTy =
       IntegerType::getIntNTy(V->getContext(), DL.getTypeSizeInBits(SrcElemTy));
   VectorType *VecIntTy = VectorType::get(IntTy, VF);
   Value *CastVal = Builder.CreateBitOrPointerCast(V, VecIntTy);
   return Builder.CreateBitOrPointerCast(CastVal, DstVTy);
 }
 
 void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L,
                                                          BasicBlock *Bypass) {
   Value *Count = getOrCreateTripCount(L);
   BasicBlock *BB = L->getLoopPreheader();
   IRBuilder<> Builder(BB->getTerminator());
 
   // Generate code to check if the loop's trip count is less than VF * UF, or
   // equal to it in case a scalar epilogue is required; this implies that the
   // vector trip count is zero. This check also covers the case where adding one
   // to the backedge-taken count overflowed leading to an incorrect trip count
   // of zero. In this case we will also jump to the scalar loop.
   auto P = Legal->requiresScalarEpilogue() ? ICmpInst::ICMP_ULE
                                            : ICmpInst::ICMP_ULT;
   Value *CheckMinIters = Builder.CreateICmp(
       P, Count, ConstantInt::get(Count->getType(), VF * UF), "min.iters.check");
 
   BasicBlock *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph");
   // Update dominator tree immediately if the generated block is a
   // LoopBypassBlock because SCEV expansions to generate loop bypass
   // checks may query it before the current function is finished.
   DT->addNewBlock(NewBB, BB);
   if (L->getParentLoop())
     L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI);
   ReplaceInstWithInst(BB->getTerminator(),
                       BranchInst::Create(Bypass, NewBB, CheckMinIters));
   LoopBypassBlocks.push_back(BB);
 }
 
 void InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) {
   BasicBlock *BB = L->getLoopPreheader();
 
   // Generate the code to check that the SCEV assumptions that we made.
   // We want the new basic block to start at the first instruction in a
   // sequence of instructions that form a check.
   SCEVExpander Exp(*PSE.getSE(), Bypass->getModule()->getDataLayout(),
                    "scev.check");
   Value *SCEVCheck =
       Exp.expandCodeForPredicate(&PSE.getUnionPredicate(), BB->getTerminator());
 
   if (auto *C = dyn_cast<ConstantInt>(SCEVCheck))
     if (C->isZero())
       return;
 
   // Create a new block containing the stride check.
   BB->setName("vector.scevcheck");
   auto *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph");
   // Update dominator tree immediately if the generated block is a
   // LoopBypassBlock because SCEV expansions to generate loop bypass
   // checks may query it before the current function is finished.
   DT->addNewBlock(NewBB, BB);
   if (L->getParentLoop())
     L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI);
   ReplaceInstWithInst(BB->getTerminator(),
                       BranchInst::Create(Bypass, NewBB, SCEVCheck));
   LoopBypassBlocks.push_back(BB);
   AddedSafetyChecks = true;
 }
 
 void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass) {
   BasicBlock *BB = L->getLoopPreheader();
 
   // Generate the code that checks in runtime if arrays overlap. We put the
   // checks into a separate block to make the more common case of few elements
   // faster.
   Instruction *FirstCheckInst;
   Instruction *MemRuntimeCheck;
   std::tie(FirstCheckInst, MemRuntimeCheck) =
       Legal->getLAI()->addRuntimeChecks(BB->getTerminator());
   if (!MemRuntimeCheck)
     return;
 
   // Create a new block containing the memory check.
   BB->setName("vector.memcheck");
   auto *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph");
   // Update dominator tree immediately if the generated block is a
   // LoopBypassBlock because SCEV expansions to generate loop bypass
   // checks may query it before the current function is finished.
   DT->addNewBlock(NewBB, BB);
   if (L->getParentLoop())
     L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI);
   ReplaceInstWithInst(BB->getTerminator(),
                       BranchInst::Create(Bypass, NewBB, MemRuntimeCheck));
   LoopBypassBlocks.push_back(BB);
   AddedSafetyChecks = true;
 
   // We currently don't use LoopVersioning for the actual loop cloning but we
   // still use it to add the noalias metadata.
   LVer = llvm::make_unique<LoopVersioning>(*Legal->getLAI(), OrigLoop, LI, DT,
                                            PSE.getSE());
   LVer->prepareNoAliasMetadata();
 }
 
 BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
   /*
    In this function we generate a new loop. The new loop will contain
    the vectorized instructions while the old loop will continue to run the
    scalar remainder.
 
        [ ] <-- loop iteration number check.
     /   |
    /    v
   |    [ ] <-- vector loop bypass (may consist of multiple blocks).
   |  /  |
   | /   v
   ||   [ ]     <-- vector pre header.
   |/    |
   |     v
   |    [  ] \
   |    [  ]_|   <-- vector loop.
   |     |
   |     v
   |   -[ ]   <--- middle-block.
   |  /  |
   | /   v
   -|- >[ ]     <--- new preheader.
    |    |
    |    v
    |   [ ] \
    |   [ ]_|   <-- old scalar loop to handle remainder.
     \   |
      \  v
       >[ ]     <-- exit block.
    ...
    */
 
   BasicBlock *OldBasicBlock = OrigLoop->getHeader();
   BasicBlock *VectorPH = OrigLoop->getLoopPreheader();
   BasicBlock *ExitBlock = OrigLoop->getExitBlock();
   assert(VectorPH && "Invalid loop structure");
   assert(ExitBlock && "Must have an exit block");
 
   // Some loops have a single integer induction variable, while other loops
   // don't. One example is c++ iterators that often have multiple pointer
   // induction variables. In the code below we also support a case where we
   // don't have a single induction variable.
   //
   // We try to obtain an induction variable from the original loop as hard
   // as possible. However if we don't find one that:
   //   - is an integer
   //   - counts from zero, stepping by one
   //   - is the size of the widest induction variable type
   // then we create a new one.
   OldInduction = Legal->getPrimaryInduction();
   Type *IdxTy = Legal->getWidestInductionType();
 
   // Split the single block loop into the two loop structure described above.
   BasicBlock *VecBody =
       VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.body");
   BasicBlock *MiddleBlock =
       VecBody->splitBasicBlock(VecBody->getTerminator(), "middle.block");
   BasicBlock *ScalarPH =
       MiddleBlock->splitBasicBlock(MiddleBlock->getTerminator(), "scalar.ph");
 
   // Create and register the new vector loop.
   Loop *Lp = LI->AllocateLoop();
   Loop *ParentLoop = OrigLoop->getParentLoop();
 
   // Insert the new loop into the loop nest and register the new basic blocks
   // before calling any utilities such as SCEV that require valid LoopInfo.
   if (ParentLoop) {
     ParentLoop->addChildLoop(Lp);
     ParentLoop->addBasicBlockToLoop(ScalarPH, *LI);
     ParentLoop->addBasicBlockToLoop(MiddleBlock, *LI);
   } else {
     LI->addTopLevelLoop(Lp);
   }
   Lp->addBasicBlockToLoop(VecBody, *LI);
 
   // Find the loop boundaries.
   Value *Count = getOrCreateTripCount(Lp);
 
   Value *StartIdx = ConstantInt::get(IdxTy, 0);
 
   // Now, compare the new count to zero. If it is zero skip the vector loop and
   // jump to the scalar loop. This check also covers the case where the
   // backedge-taken count is uint##_max: adding one to it will overflow leading
   // to an incorrect trip count of zero. In this (rare) case we will also jump
   // to the scalar loop.
   emitMinimumIterationCountCheck(Lp, ScalarPH);
 
   // Generate the code to check any assumptions that we've made for SCEV
   // expressions.
   emitSCEVChecks(Lp, ScalarPH);
 
   // Generate the code that checks in runtime if arrays overlap. We put the
   // checks into a separate block to make the more common case of few elements
   // faster.
   emitMemRuntimeChecks(Lp, ScalarPH);
 
   // Generate the induction variable.
   // The loop step is equal to the vectorization factor (num of SIMD elements)
   // times the unroll factor (num of SIMD instructions).
   Value *CountRoundDown = getOrCreateVectorTripCount(Lp);
   Constant *Step = ConstantInt::get(IdxTy, VF * UF);
   Induction =
       createInductionVariable(Lp, StartIdx, CountRoundDown, Step,
                               getDebugLocFromInstOrOperands(OldInduction));
 
   // We are going to resume the execution of the scalar loop.
   // Go over all of the induction variables that we found and fix the
   // PHIs that are left in the scalar version of the loop.
   // The starting values of PHI nodes depend on the counter of the last
   // iteration in the vectorized loop.
   // If we come from a bypass edge then we need to start from the original
   // start value.
 
   // This variable saves the new starting index for the scalar loop. It is used
   // to test if there are any tail iterations left once the vector loop has
   // completed.
   LoopVectorizationLegality::InductionList *List = Legal->getInductionVars();
   for (auto &InductionEntry : *List) {
     PHINode *OrigPhi = InductionEntry.first;
     InductionDescriptor II = InductionEntry.second;
 
     // Create phi nodes to merge from the  backedge-taken check block.
     PHINode *BCResumeVal = PHINode::Create(
         OrigPhi->getType(), 3, "bc.resume.val", ScalarPH->getTerminator());
     Value *&EndValue = IVEndValues[OrigPhi];
     if (OrigPhi == OldInduction) {
       // We know what the end value is.
       EndValue = CountRoundDown;
     } else {
       IRBuilder<> B(Lp->getLoopPreheader()->getTerminator());
       Type *StepType = II.getStep()->getType();
       Instruction::CastOps CastOp =
         CastInst::getCastOpcode(CountRoundDown, true, StepType, true);
       Value *CRD = B.CreateCast(CastOp, CountRoundDown, StepType, "cast.crd");
       const DataLayout &DL = OrigLoop->getHeader()->getModule()->getDataLayout();
       EndValue = II.transform(B, CRD, PSE.getSE(), DL);
       EndValue->setName("ind.end");
     }
 
     // The new PHI merges the original incoming value, in case of a bypass,
     // or the value at the end of the vectorized loop.
     BCResumeVal->addIncoming(EndValue, MiddleBlock);
 
     // Fix the scalar body counter (PHI node).
     unsigned BlockIdx = OrigPhi->getBasicBlockIndex(ScalarPH);
 
     // The old induction's phi node in the scalar body needs the truncated
     // value.
     for (BasicBlock *BB : LoopBypassBlocks)
       BCResumeVal->addIncoming(II.getStartValue(), BB);
     OrigPhi->setIncomingValue(BlockIdx, BCResumeVal);
   }
 
   // Add a check in the middle block to see if we have completed
   // all of the iterations in the first vector loop.
   // If (N - N%VF) == N, then we *don't* need to run the remainder.
   Value *CmpN =
       CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, Count,
                       CountRoundDown, "cmp.n", MiddleBlock->getTerminator());
   ReplaceInstWithInst(MiddleBlock->getTerminator(),
                       BranchInst::Create(ExitBlock, ScalarPH, CmpN));
 
   // Get ready to start creating new instructions into the vectorized body.
   Builder.SetInsertPoint(&*VecBody->getFirstInsertionPt());
 
   // Save the state.
   LoopVectorPreHeader = Lp->getLoopPreheader();
   LoopScalarPreHeader = ScalarPH;
   LoopMiddleBlock = MiddleBlock;
   LoopExitBlock = ExitBlock;
   LoopVectorBody = VecBody;
   LoopScalarBody = OldBasicBlock;
 
   // Keep all loop hints from the original loop on the vector loop (we'll
   // replace the vectorizer-specific hints below).
   if (MDNode *LID = OrigLoop->getLoopID())
     Lp->setLoopID(LID);
 
   LoopVectorizeHints Hints(Lp, true, *ORE);
   Hints.setAlreadyVectorized();
 
   return LoopVectorPreHeader;
 }
 
 // Fix up external users of the induction variable. At this point, we are
 // in LCSSA form, with all external PHIs that use the IV having one input value,
 // coming from the remainder loop. We need those PHIs to also have a correct
 // value for the IV when arriving directly from the middle block.
 void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
                                        const InductionDescriptor &II,
                                        Value *CountRoundDown, Value *EndValue,
                                        BasicBlock *MiddleBlock) {
   // There are two kinds of external IV usages - those that use the value
   // computed in the last iteration (the PHI) and those that use the penultimate
   // value (the value that feeds into the phi from the loop latch).
   // We allow both, but they, obviously, have different values.
 
   assert(OrigLoop->getExitBlock() && "Expected a single exit block");
 
   DenseMap<Value *, Value *> MissingVals;
 
   // An external user of the last iteration's value should see the value that
   // the remainder loop uses to initialize its own IV.
   Value *PostInc = OrigPhi->getIncomingValueForBlock(OrigLoop->getLoopLatch());
   for (User *U : PostInc->users()) {
     Instruction *UI = cast<Instruction>(U);
     if (!OrigLoop->contains(UI)) {
       assert(isa<PHINode>(UI) && "Expected LCSSA form");
       MissingVals[UI] = EndValue;
     }
   }
 
   // An external user of the penultimate value need to see EndValue - Step.
   // The simplest way to get this is to recompute it from the constituent SCEVs,
   // that is Start + (Step * (CRD - 1)).
   for (User *U : OrigPhi->users()) {
     auto *UI = cast<Instruction>(U);
     if (!OrigLoop->contains(UI)) {
       const DataLayout &DL =
           OrigLoop->getHeader()->getModule()->getDataLayout();
       assert(isa<PHINode>(UI) && "Expected LCSSA form");
 
       IRBuilder<> B(MiddleBlock->getTerminator());
       Value *CountMinusOne = B.CreateSub(
           CountRoundDown, ConstantInt::get(CountRoundDown->getType(), 1));
       Value *CMO =
           !II.getStep()->getType()->isIntegerTy()
               ? B.CreateCast(Instruction::SIToFP, CountMinusOne,
                              II.getStep()->getType())
               : B.CreateSExtOrTrunc(CountMinusOne, II.getStep()->getType());
       CMO->setName("cast.cmo");
       Value *Escape = II.transform(B, CMO, PSE.getSE(), DL);
       Escape->setName("ind.escape");
       MissingVals[UI] = Escape;
     }
   }
 
   for (auto &I : MissingVals) {
     PHINode *PHI = cast<PHINode>(I.first);
     // One corner case we have to handle is two IVs "chasing" each-other,
     // that is %IV2 = phi [...], [ %IV1, %latch ]
     // In this case, if IV1 has an external use, we need to avoid adding both
     // "last value of IV1" and "penultimate value of IV2". So, verify that we
     // don't already have an incoming value for the middle block.
     if (PHI->getBasicBlockIndex(MiddleBlock) == -1)
       PHI->addIncoming(I.second, MiddleBlock);
   }
 }
 
 namespace {
 
 struct CSEDenseMapInfo {
   static bool canHandle(const Instruction *I) {
     return isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
            isa<ShuffleVectorInst>(I) || isa<GetElementPtrInst>(I);
   }
 
   static inline Instruction *getEmptyKey() {
     return DenseMapInfo<Instruction *>::getEmptyKey();
   }
 
   static inline Instruction *getTombstoneKey() {
     return DenseMapInfo<Instruction *>::getTombstoneKey();
   }
 
   static unsigned getHashValue(const Instruction *I) {
     assert(canHandle(I) && "Unknown instruction!");
     return hash_combine(I->getOpcode(), hash_combine_range(I->value_op_begin(),
                                                            I->value_op_end()));
   }
 
   static bool isEqual(const Instruction *LHS, const Instruction *RHS) {
     if (LHS == getEmptyKey() || RHS == getEmptyKey() ||
         LHS == getTombstoneKey() || RHS == getTombstoneKey())
       return LHS == RHS;
     return LHS->isIdenticalTo(RHS);
   }
 };
 
 } // end anonymous namespace
 
 ///\brief Perform cse of induction variable instructions.
 static void cse(BasicBlock *BB) {
   // Perform simple cse.
   SmallDenseMap<Instruction *, Instruction *, 4, CSEDenseMapInfo> CSEMap;
   for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
     Instruction *In = &*I++;
 
     if (!CSEDenseMapInfo::canHandle(In))
       continue;
 
     // Check if we can replace this instruction with any of the
     // visited instructions.
     if (Instruction *V = CSEMap.lookup(In)) {
       In->replaceAllUsesWith(V);
       In->eraseFromParent();
       continue;
     }
 
     CSEMap[In] = In;
   }
 }
 
 /// \brief Estimate the overhead of scalarizing an instruction. This is a
 /// convenience wrapper for the type-based getScalarizationOverhead API.
 static unsigned getScalarizationOverhead(Instruction *I, unsigned VF,
                                          const TargetTransformInfo &TTI) {
   if (VF == 1)
     return 0;
 
   unsigned Cost = 0;
   Type *RetTy = ToVectorTy(I->getType(), VF);
   if (!RetTy->isVoidTy() &&
       (!isa<LoadInst>(I) ||
        !TTI.supportsEfficientVectorElementLoadStore()))
     Cost += TTI.getScalarizationOverhead(RetTy, true, false);
 
   if (CallInst *CI = dyn_cast<CallInst>(I)) {
     SmallVector<const Value *, 4> Operands(CI->arg_operands());
     Cost += TTI.getOperandsScalarizationOverhead(Operands, VF);
   }
   else if (!isa<StoreInst>(I) ||
            !TTI.supportsEfficientVectorElementLoadStore()) {
     SmallVector<const Value *, 4> Operands(I->operand_values());
     Cost += TTI.getOperandsScalarizationOverhead(Operands, VF);
   }
 
   return Cost;
 }
 
 // Estimate cost of a call instruction CI if it were vectorized with factor VF.
 // Return the cost of the instruction, including scalarization overhead if it's
 // needed. The flag NeedToScalarize shows if the call needs to be scalarized -
 // i.e. either vector version isn't available, or is too expensive.
 static unsigned getVectorCallCost(CallInst *CI, unsigned VF,
                                   const TargetTransformInfo &TTI,
                                   const TargetLibraryInfo *TLI,
                                   bool &NeedToScalarize) {
   Function *F = CI->getCalledFunction();
   StringRef FnName = CI->getCalledFunction()->getName();
   Type *ScalarRetTy = CI->getType();
   SmallVector<Type *, 4> Tys, ScalarTys;
   for (auto &ArgOp : CI->arg_operands())
     ScalarTys.push_back(ArgOp->getType());
 
   // Estimate cost of scalarized vector call. The source operands are assumed
   // to be vectors, so we need to extract individual elements from there,
   // execute VF scalar calls, and then gather the result into the vector return
   // value.
   unsigned ScalarCallCost = TTI.getCallInstrCost(F, ScalarRetTy, ScalarTys);
   if (VF == 1)
     return ScalarCallCost;
 
   // Compute corresponding vector type for return value and arguments.
   Type *RetTy = ToVectorTy(ScalarRetTy, VF);
   for (Type *ScalarTy : ScalarTys)
     Tys.push_back(ToVectorTy(ScalarTy, VF));
 
   // Compute costs of unpacking argument values for the scalar calls and
   // packing the return values to a vector.
   unsigned ScalarizationCost = getScalarizationOverhead(CI, VF, TTI);
 
   unsigned Cost = ScalarCallCost * VF + ScalarizationCost;
 
   // If we can't emit a vector call for this function, then the currently found
   // cost is the cost we need to return.
   NeedToScalarize = true;
   if (!TLI || !TLI->isFunctionVectorizable(FnName, VF) || CI->isNoBuiltin())
     return Cost;
 
   // If the corresponding vector cost is cheaper, return its cost.
   unsigned VectorCallCost = TTI.getCallInstrCost(nullptr, RetTy, Tys);
   if (VectorCallCost < Cost) {
     NeedToScalarize = false;
     return VectorCallCost;
   }
   return Cost;
 }
 
 // Estimate cost of an intrinsic call instruction CI if it were vectorized with
 // factor VF.  Return the cost of the instruction, including scalarization
 // overhead if it's needed.
 static unsigned getVectorIntrinsicCost(CallInst *CI, unsigned VF,
                                        const TargetTransformInfo &TTI,
                                        const TargetLibraryInfo *TLI) {
   Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
   assert(ID && "Expected intrinsic call!");
 
   FastMathFlags FMF;
   if (auto *FPMO = dyn_cast<FPMathOperator>(CI))
     FMF = FPMO->getFastMathFlags();
 
   SmallVector<Value *, 4> Operands(CI->arg_operands());
   return TTI.getIntrinsicInstrCost(ID, CI->getType(), Operands, FMF, VF);
 }
 
 static Type *smallestIntegerVectorType(Type *T1, Type *T2) {
   auto *I1 = cast<IntegerType>(T1->getVectorElementType());
   auto *I2 = cast<IntegerType>(T2->getVectorElementType());
   return I1->getBitWidth() < I2->getBitWidth() ? T1 : T2;
 }
 static Type *largestIntegerVectorType(Type *T1, Type *T2) {
   auto *I1 = cast<IntegerType>(T1->getVectorElementType());
   auto *I2 = cast<IntegerType>(T2->getVectorElementType());
   return I1->getBitWidth() > I2->getBitWidth() ? T1 : T2;
 }
 
 void InnerLoopVectorizer::truncateToMinimalBitwidths() {
   // For every instruction `I` in MinBWs, truncate the operands, create a
   // truncated version of `I` and reextend its result. InstCombine runs
   // later and will remove any ext/trunc pairs.
   SmallPtrSet<Value *, 4> Erased;
   for (const auto &KV : Cost->getMinimalBitwidths()) {
     // If the value wasn't vectorized, we must maintain the original scalar
     // type. The absence of the value from VectorLoopValueMap indicates that it
     // wasn't vectorized.
     if (!VectorLoopValueMap.hasAnyVectorValue(KV.first))
       continue;
     for (unsigned Part = 0; Part < UF; ++Part) {
       Value *I = getOrCreateVectorValue(KV.first, Part);
       if (Erased.count(I) || I->use_empty() || !isa<Instruction>(I))
         continue;
       Type *OriginalTy = I->getType();
       Type *ScalarTruncatedTy =
           IntegerType::get(OriginalTy->getContext(), KV.second);
       Type *TruncatedTy = VectorType::get(ScalarTruncatedTy,
                                           OriginalTy->getVectorNumElements());
       if (TruncatedTy == OriginalTy)
         continue;
 
       IRBuilder<> B(cast<Instruction>(I));
       auto ShrinkOperand = [&](Value *V) -> Value * {
         if (auto *ZI = dyn_cast<ZExtInst>(V))
           if (ZI->getSrcTy() == TruncatedTy)
             return ZI->getOperand(0);
         return B.CreateZExtOrTrunc(V, TruncatedTy);
       };
 
       // The actual instruction modification depends on the instruction type,
       // unfortunately.
       Value *NewI = nullptr;
       if (auto *BO = dyn_cast<BinaryOperator>(I)) {
         NewI = B.CreateBinOp(BO->getOpcode(), ShrinkOperand(BO->getOperand(0)),
                              ShrinkOperand(BO->getOperand(1)));
 
         // Any wrapping introduced by shrinking this operation shouldn't be
         // considered undefined behavior. So, we can't unconditionally copy
         // arithmetic wrapping flags to NewI.
         cast<BinaryOperator>(NewI)->copyIRFlags(I, /*IncludeWrapFlags=*/false);
       } else if (auto *CI = dyn_cast<ICmpInst>(I)) {
         NewI =
             B.CreateICmp(CI->getPredicate(), ShrinkOperand(CI->getOperand(0)),
                          ShrinkOperand(CI->getOperand(1)));
       } else if (auto *SI = dyn_cast<SelectInst>(I)) {
         NewI = B.CreateSelect(SI->getCondition(),
                               ShrinkOperand(SI->getTrueValue()),
                               ShrinkOperand(SI->getFalseValue()));
       } else if (auto *CI = dyn_cast<CastInst>(I)) {
         switch (CI->getOpcode()) {
         default:
           llvm_unreachable("Unhandled cast!");
         case Instruction::Trunc:
           NewI = ShrinkOperand(CI->getOperand(0));
           break;
         case Instruction::SExt:
           NewI = B.CreateSExtOrTrunc(
               CI->getOperand(0),
               smallestIntegerVectorType(OriginalTy, TruncatedTy));
           break;
         case Instruction::ZExt:
           NewI = B.CreateZExtOrTrunc(
               CI->getOperand(0),
               smallestIntegerVectorType(OriginalTy, TruncatedTy));
           break;
         }
       } else if (auto *SI = dyn_cast<ShuffleVectorInst>(I)) {
         auto Elements0 = SI->getOperand(0)->getType()->getVectorNumElements();
         auto *O0 = B.CreateZExtOrTrunc(
             SI->getOperand(0), VectorType::get(ScalarTruncatedTy, Elements0));
         auto Elements1 = SI->getOperand(1)->getType()->getVectorNumElements();
         auto *O1 = B.CreateZExtOrTrunc(
             SI->getOperand(1), VectorType::get(ScalarTruncatedTy, Elements1));
 
         NewI = B.CreateShuffleVector(O0, O1, SI->getMask());
       } else if (isa<LoadInst>(I)) {
         // Don't do anything with the operands, just extend the result.
         continue;
       } else if (auto *IE = dyn_cast<InsertElementInst>(I)) {
         auto Elements = IE->getOperand(0)->getType()->getVectorNumElements();
         auto *O0 = B.CreateZExtOrTrunc(
             IE->getOperand(0), VectorType::get(ScalarTruncatedTy, Elements));
         auto *O1 = B.CreateZExtOrTrunc(IE->getOperand(1), ScalarTruncatedTy);
         NewI = B.CreateInsertElement(O0, O1, IE->getOperand(2));
       } else if (auto *EE = dyn_cast<ExtractElementInst>(I)) {
         auto Elements = EE->getOperand(0)->getType()->getVectorNumElements();
         auto *O0 = B.CreateZExtOrTrunc(
             EE->getOperand(0), VectorType::get(ScalarTruncatedTy, Elements));
         NewI = B.CreateExtractElement(O0, EE->getOperand(2));
       } else {
         llvm_unreachable("Unhandled instruction type!");
       }
 
       // Lastly, extend the result.
       NewI->takeName(cast<Instruction>(I));
       Value *Res = B.CreateZExtOrTrunc(NewI, OriginalTy);
       I->replaceAllUsesWith(Res);
       cast<Instruction>(I)->eraseFromParent();
       Erased.insert(I);
       VectorLoopValueMap.resetVectorValue(KV.first, Part, Res);
     }
   }
 
   // We'll have created a bunch of ZExts that are now parentless. Clean up.
   for (const auto &KV : Cost->getMinimalBitwidths()) {
     // If the value wasn't vectorized, we must maintain the original scalar
     // type. The absence of the value from VectorLoopValueMap indicates that it
     // wasn't vectorized.
     if (!VectorLoopValueMap.hasAnyVectorValue(KV.first))
       continue;
     for (unsigned Part = 0; Part < UF; ++Part) {
       Value *I = getOrCreateVectorValue(KV.first, Part);
       ZExtInst *Inst = dyn_cast<ZExtInst>(I);
       if (Inst && Inst->use_empty()) {
         Value *NewI = Inst->getOperand(0);
         Inst->eraseFromParent();
         VectorLoopValueMap.resetVectorValue(KV.first, Part, NewI);
       }
     }
   }
 }
 
 void InnerLoopVectorizer::fixVectorizedLoop() {
   // Insert truncates and extends for any truncated instructions as hints to
   // InstCombine.
   if (VF > 1)
     truncateToMinimalBitwidths();
 
   // At this point every instruction in the original loop is widened to a
   // vector form. Now we need to fix the recurrences in the loop. These PHI
   // nodes are currently empty because we did not want to introduce cycles.
   // This is the second stage of vectorizing recurrences.
   fixCrossIterationPHIs();
 
   // Update the dominator tree.
   //
   // FIXME: After creating the structure of the new loop, the dominator tree is
   //        no longer up-to-date, and it remains that way until we update it
   //        here. An out-of-date dominator tree is problematic for SCEV,
   //        because SCEVExpander uses it to guide code generation. The
   //        vectorizer use SCEVExpanders in several places. Instead, we should
   //        keep the dominator tree up-to-date as we go.
   updateAnalysis();
 
   // Fix-up external users of the induction variables.
   for (auto &Entry : *Legal->getInductionVars())
     fixupIVUsers(Entry.first, Entry.second,
                  getOrCreateVectorTripCount(LI->getLoopFor(LoopVectorBody)),
                  IVEndValues[Entry.first], LoopMiddleBlock);
 
   fixLCSSAPHIs();
   for (Instruction *PI : PredicatedInstructions)
     sinkScalarOperands(&*PI);
 
   // Remove redundant induction instructions.
   cse(LoopVectorBody);
 }
 
 void InnerLoopVectorizer::fixCrossIterationPHIs() {
   // In order to support recurrences we need to be able to vectorize Phi nodes.
   // Phi nodes have cycles, so we need to vectorize them in two stages. This is
   // stage #2: We now need to fix the recurrences by adding incoming edges to
   // the currently empty PHI nodes. At this point every instruction in the
   // original loop is widened to a vector form so we can use them to construct
   // the incoming edges.
   for (PHINode &Phi : OrigLoop->getHeader()->phis()) {
     // Handle first-order recurrences and reductions that need to be fixed.
     if (Legal->isFirstOrderRecurrence(&Phi))
       fixFirstOrderRecurrence(&Phi);
     else if (Legal->isReductionVariable(&Phi))
       fixReduction(&Phi);
   }
 }
 
 void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
   // This is the second phase of vectorizing first-order recurrences. An
   // overview of the transformation is described below. Suppose we have the
   // following loop.
   //
   //   for (int i = 0; i < n; ++i)
   //     b[i] = a[i] - a[i - 1];
   //
   // There is a first-order recurrence on "a". For this loop, the shorthand
   // scalar IR looks like:
   //
   //   scalar.ph:
   //     s_init = a[-1]
   //     br scalar.body
   //
   //   scalar.body:
   //     i = phi [0, scalar.ph], [i+1, scalar.body]
   //     s1 = phi [s_init, scalar.ph], [s2, scalar.body]
   //     s2 = a[i]
   //     b[i] = s2 - s1
   //     br cond, scalar.body, ...
   //
   // In this example, s1 is a recurrence because it's value depends on the
   // previous iteration. In the first phase of vectorization, we created a
   // temporary value for s1. We now complete the vectorization and produce the
   // shorthand vector IR shown below (for VF = 4, UF = 1).
   //
   //   vector.ph:
   //     v_init = vector(..., ..., ..., a[-1])
   //     br vector.body
   //
   //   vector.body
   //     i = phi [0, vector.ph], [i+4, vector.body]
   //     v1 = phi [v_init, vector.ph], [v2, vector.body]
   //     v2 = a[i, i+1, i+2, i+3];
   //     v3 = vector(v1(3), v2(0, 1, 2))
   //     b[i, i+1, i+2, i+3] = v2 - v3
   //     br cond, vector.body, middle.block
   //
   //   middle.block:
   //     x = v2(3)
   //     br scalar.ph
   //
   //   scalar.ph:
   //     s_init = phi [x, middle.block], [a[-1], otherwise]
   //     br scalar.body
   //
   // After execution completes the vector loop, we extract the next value of
   // the recurrence (x) to use as the initial value in the scalar loop.
 
   // Get the original loop preheader and single loop latch.
   auto *Preheader = OrigLoop->getLoopPreheader();
   auto *Latch = OrigLoop->getLoopLatch();
 
   // Get the initial and previous values of the scalar recurrence.
   auto *ScalarInit = Phi->getIncomingValueForBlock(Preheader);
   auto *Previous = Phi->getIncomingValueForBlock(Latch);
 
   // Create a vector from the initial value.
   auto *VectorInit = ScalarInit;
   if (VF > 1) {
     Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
     VectorInit = Builder.CreateInsertElement(
         UndefValue::get(VectorType::get(VectorInit->getType(), VF)), VectorInit,
         Builder.getInt32(VF - 1), "vector.recur.init");
   }
 
   // We constructed a temporary phi node in the first phase of vectorization.
   // This phi node will eventually be deleted.
   Builder.SetInsertPoint(
       cast<Instruction>(VectorLoopValueMap.getVectorValue(Phi, 0)));
 
   // Create a phi node for the new recurrence. The current value will either be
   // the initial value inserted into a vector or loop-varying vector value.
   auto *VecPhi = Builder.CreatePHI(VectorInit->getType(), 2, "vector.recur");
   VecPhi->addIncoming(VectorInit, LoopVectorPreHeader);
 
   // Get the vectorized previous value of the last part UF - 1. It appears last
   // among all unrolled iterations, due to the order of their construction.
   Value *PreviousLastPart = getOrCreateVectorValue(Previous, UF - 1);
 
   // Set the insertion point after the previous value if it is an instruction.
   // Note that the previous value may have been constant-folded so it is not
   // guaranteed to be an instruction in the vector loop. Also, if the previous
   // value is a phi node, we should insert after all the phi nodes to avoid
   // breaking basic block verification.
   if (LI->getLoopFor(LoopVectorBody)->isLoopInvariant(PreviousLastPart) ||
       isa<PHINode>(PreviousLastPart))
     Builder.SetInsertPoint(&*LoopVectorBody->getFirstInsertionPt());
   else
     Builder.SetInsertPoint(
         &*++BasicBlock::iterator(cast<Instruction>(PreviousLastPart)));
 
   // We will construct a vector for the recurrence by combining the values for
   // the current and previous iterations. This is the required shuffle mask.
   SmallVector<Constant *, 8> ShuffleMask(VF);
   ShuffleMask[0] = Builder.getInt32(VF - 1);
   for (unsigned I = 1; I < VF; ++I)
     ShuffleMask[I] = Builder.getInt32(I + VF - 1);
 
   // The vector from which to take the initial value for the current iteration
   // (actual or unrolled). Initially, this is the vector phi node.
   Value *Incoming = VecPhi;
 
   // Shuffle the current and previous vector and update the vector parts.
   for (unsigned Part = 0; Part < UF; ++Part) {
     Value *PreviousPart = getOrCreateVectorValue(Previous, Part);
     Value *PhiPart = VectorLoopValueMap.getVectorValue(Phi, Part);
     auto *Shuffle =
         VF > 1 ? Builder.CreateShuffleVector(Incoming, PreviousPart,
                                              ConstantVector::get(ShuffleMask))
                : Incoming;
     PhiPart->replaceAllUsesWith(Shuffle);
     cast<Instruction>(PhiPart)->eraseFromParent();
     VectorLoopValueMap.resetVectorValue(Phi, Part, Shuffle);
     Incoming = PreviousPart;
   }
 
   // Fix the latch value of the new recurrence in the vector loop.
   VecPhi->addIncoming(Incoming, LI->getLoopFor(LoopVectorBody)->getLoopLatch());
 
   // Extract the last vector element in the middle block. This will be the
   // initial value for the recurrence when jumping to the scalar loop.
   auto *ExtractForScalar = Incoming;
   if (VF > 1) {
     Builder.SetInsertPoint(LoopMiddleBlock->getTerminator());
     ExtractForScalar = Builder.CreateExtractElement(
         ExtractForScalar, Builder.getInt32(VF - 1), "vector.recur.extract");
   }
   // Extract the second last element in the middle block if the
   // Phi is used outside the loop. We need to extract the phi itself
   // and not the last element (the phi update in the current iteration). This
   // will be the value when jumping to the exit block from the LoopMiddleBlock,
   // when the scalar loop is not run at all.
   Value *ExtractForPhiUsedOutsideLoop = nullptr;
   if (VF > 1)
     ExtractForPhiUsedOutsideLoop = Builder.CreateExtractElement(
         Incoming, Builder.getInt32(VF - 2), "vector.recur.extract.for.phi");
   // When loop is unrolled without vectorizing, initialize
   // ExtractForPhiUsedOutsideLoop with the value just prior to unrolled value of
   // `Incoming`. This is analogous to the vectorized case above: extracting the
   // second last element when VF > 1.
   else if (UF > 1)
     ExtractForPhiUsedOutsideLoop = getOrCreateVectorValue(Previous, UF - 2);
 
   // Fix the initial value of the original recurrence in the scalar loop.
   Builder.SetInsertPoint(&*LoopScalarPreHeader->begin());
   auto *Start = Builder.CreatePHI(Phi->getType(), 2, "scalar.recur.init");
   for (auto *BB : predecessors(LoopScalarPreHeader)) {
     auto *Incoming = BB == LoopMiddleBlock ? ExtractForScalar : ScalarInit;
     Start->addIncoming(Incoming, BB);
   }
 
   Phi->setIncomingValue(Phi->getBasicBlockIndex(LoopScalarPreHeader), Start);
   Phi->setName("scalar.recur");
 
   // Finally, fix users of the recurrence outside the loop. The users will need
   // either the last value of the scalar recurrence or the last value of the
   // vector recurrence we extracted in the middle block. Since the loop is in
   // LCSSA form, we just need to find the phi node for the original scalar
   // recurrence in the exit block, and then add an edge for the middle block.
   for (PHINode &LCSSAPhi : LoopExitBlock->phis()) {
     if (LCSSAPhi.getIncomingValue(0) == Phi) {
       LCSSAPhi.addIncoming(ExtractForPhiUsedOutsideLoop, LoopMiddleBlock);
       break;
     }
   }
 }
 
 void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
   Constant *Zero = Builder.getInt32(0);
 
   // Get it's reduction variable descriptor.
   assert(Legal->isReductionVariable(Phi) &&
          "Unable to find the reduction variable");
   RecurrenceDescriptor RdxDesc = (*Legal->getReductionVars())[Phi];
 
   RecurrenceDescriptor::RecurrenceKind RK = RdxDesc.getRecurrenceKind();
   TrackingVH<Value> ReductionStartValue = RdxDesc.getRecurrenceStartValue();
   Instruction *LoopExitInst = RdxDesc.getLoopExitInstr();
   RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind =
     RdxDesc.getMinMaxRecurrenceKind();
   setDebugLocFromInst(Builder, ReductionStartValue);
 
   // We need to generate a reduction vector from the incoming scalar.
   // To do so, we need to generate the 'identity' vector and override
   // one of the elements with the incoming scalar reduction. We need
   // to do it in the vector-loop preheader.
   Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
 
   // This is the vector-clone of the value that leaves the loop.
   Type *VecTy = getOrCreateVectorValue(LoopExitInst, 0)->getType();
 
   // Find the reduction identity variable. Zero for addition, or, xor,
   // one for multiplication, -1 for And.
   Value *Identity;
   Value *VectorStart;
   if (RK == RecurrenceDescriptor::RK_IntegerMinMax ||
       RK == RecurrenceDescriptor::RK_FloatMinMax) {
     // MinMax reduction have the start value as their identify.
     if (VF == 1) {
       VectorStart = Identity = ReductionStartValue;
     } else {
       VectorStart = Identity =
         Builder.CreateVectorSplat(VF, ReductionStartValue, "minmax.ident");
     }
   } else {
     // Handle other reduction kinds:
     Constant *Iden = RecurrenceDescriptor::getRecurrenceIdentity(
         RK, VecTy->getScalarType());
     if (VF == 1) {
       Identity = Iden;
       // This vector is the Identity vector where the first element is the
       // incoming scalar reduction.
       VectorStart = ReductionStartValue;
     } else {
       Identity = ConstantVector::getSplat(VF, Iden);
 
       // This vector is the Identity vector where the first element is the
       // incoming scalar reduction.
       VectorStart =
         Builder.CreateInsertElement(Identity, ReductionStartValue, Zero);
     }
   }
 
   // Fix the vector-loop phi.
 
   // Reductions do not have to start at zero. They can start with
   // any loop invariant values.
   BasicBlock *Latch = OrigLoop->getLoopLatch();
   Value *LoopVal = Phi->getIncomingValueForBlock(Latch);
   for (unsigned Part = 0; Part < UF; ++Part) {
     Value *VecRdxPhi = getOrCreateVectorValue(Phi, Part);
     Value *Val = getOrCreateVectorValue(LoopVal, Part);
     // Make sure to add the reduction stat value only to the
     // first unroll part.
     Value *StartVal = (Part == 0) ? VectorStart : Identity;
     cast<PHINode>(VecRdxPhi)->addIncoming(StartVal, LoopVectorPreHeader);
     cast<PHINode>(VecRdxPhi)
       ->addIncoming(Val, LI->getLoopFor(LoopVectorBody)->getLoopLatch());
   }
 
   // Before each round, move the insertion point right between
   // the PHIs and the values we are going to write.
   // This allows us to write both PHINodes and the extractelement
   // instructions.
   Builder.SetInsertPoint(&*LoopMiddleBlock->getFirstInsertionPt());
 
   setDebugLocFromInst(Builder, LoopExitInst);
 
   // If the vector reduction can be performed in a smaller type, we truncate
   // then extend the loop exit value to enable InstCombine to evaluate the
   // entire expression in the smaller type.
   if (VF > 1 && Phi->getType() != RdxDesc.getRecurrenceType()) {
     Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), VF);
     Builder.SetInsertPoint(
         LI->getLoopFor(LoopVectorBody)->getLoopLatch()->getTerminator());
     VectorParts RdxParts(UF);
     for (unsigned Part = 0; Part < UF; ++Part) {
       RdxParts[Part] = VectorLoopValueMap.getVectorValue(LoopExitInst, Part);
       Value *Trunc = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
       Value *Extnd = RdxDesc.isSigned() ? Builder.CreateSExt(Trunc, VecTy)
                                         : Builder.CreateZExt(Trunc, VecTy);
       for (Value::user_iterator UI = RdxParts[Part]->user_begin();
            UI != RdxParts[Part]->user_end();)
         if (*UI != Trunc) {
           (*UI++)->replaceUsesOfWith(RdxParts[Part], Extnd);
           RdxParts[Part] = Extnd;
         } else {
           ++UI;
         }
     }
     Builder.SetInsertPoint(&*LoopMiddleBlock->getFirstInsertionPt());
     for (unsigned Part = 0; Part < UF; ++Part) {
       RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
       VectorLoopValueMap.resetVectorValue(LoopExitInst, Part, RdxParts[Part]);
     }
   }
 
   // Reduce all of the unrolled parts into a single vector.
   Value *ReducedPartRdx = VectorLoopValueMap.getVectorValue(LoopExitInst, 0);
   unsigned Op = RecurrenceDescriptor::getRecurrenceBinOp(RK);
   setDebugLocFromInst(Builder, ReducedPartRdx);
   for (unsigned Part = 1; Part < UF; ++Part) {
     Value *RdxPart = VectorLoopValueMap.getVectorValue(LoopExitInst, Part);
     if (Op != Instruction::ICmp && Op != Instruction::FCmp)
       // Floating point operations had to be 'fast' to enable the reduction.
       ReducedPartRdx = addFastMathFlag(
           Builder.CreateBinOp((Instruction::BinaryOps)Op, RdxPart,
                               ReducedPartRdx, "bin.rdx"));
     else
       ReducedPartRdx = RecurrenceDescriptor::createMinMaxOp(
           Builder, MinMaxKind, ReducedPartRdx, RdxPart);
   }
 
   if (VF > 1) {
     bool NoNaN = Legal->hasFunNoNaNAttr();
     ReducedPartRdx =
         createTargetReduction(Builder, TTI, RdxDesc, ReducedPartRdx, NoNaN);
     // If the reduction can be performed in a smaller type, we need to extend
     // the reduction to the wider type before we branch to the original loop.
     if (Phi->getType() != RdxDesc.getRecurrenceType())
       ReducedPartRdx =
         RdxDesc.isSigned()
         ? Builder.CreateSExt(ReducedPartRdx, Phi->getType())
         : Builder.CreateZExt(ReducedPartRdx, Phi->getType());
   }
 
   // Create a phi node that merges control-flow from the backedge-taken check
   // block and the middle block.
   PHINode *BCBlockPhi = PHINode::Create(Phi->getType(), 2, "bc.merge.rdx",
                                         LoopScalarPreHeader->getTerminator());
   for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
     BCBlockPhi->addIncoming(ReductionStartValue, LoopBypassBlocks[I]);
   BCBlockPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock);
 
   // Now, we need to fix the users of the reduction variable
   // inside and outside of the scalar remainder loop.
   // We know that the loop is in LCSSA form. We need to update the
   // PHI nodes in the exit blocks.
   for (PHINode &LCSSAPhi : LoopExitBlock->phis()) {
     // All PHINodes need to have a single entry edge, or two if
     // we already fixed them.
     assert(LCSSAPhi.getNumIncomingValues() < 3 && "Invalid LCSSA PHI");
 
     // We found a reduction value exit-PHI. Update it with the
     // incoming bypass edge.
     if (LCSSAPhi.getIncomingValue(0) == LoopExitInst)
       LCSSAPhi.addIncoming(ReducedPartRdx, LoopMiddleBlock);
   } // end of the LCSSA phi scan.
 
     // Fix the scalar loop reduction variable with the incoming reduction sum
     // from the vector body and from the backedge value.
   int IncomingEdgeBlockIdx =
     Phi->getBasicBlockIndex(OrigLoop->getLoopLatch());
   assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index");
   // Pick the other block.
   int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1);
   Phi->setIncomingValue(SelfEdgeBlockIdx, BCBlockPhi);
   Phi->setIncomingValue(IncomingEdgeBlockIdx, LoopExitInst);
 }
 
 void InnerLoopVectorizer::fixLCSSAPHIs() {
   for (PHINode &LCSSAPhi : LoopExitBlock->phis()) {
     if (LCSSAPhi.getNumIncomingValues() == 1) {
       assert(OrigLoop->isLoopInvariant(LCSSAPhi.getIncomingValue(0)) &&
              "Incoming value isn't loop invariant");
       LCSSAPhi.addIncoming(LCSSAPhi.getIncomingValue(0), LoopMiddleBlock);
     }
   }
 }
 
 void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) {
   // The basic block and loop containing the predicated instruction.
   auto *PredBB = PredInst->getParent();
   auto *VectorLoop = LI->getLoopFor(PredBB);
 
   // Initialize a worklist with the operands of the predicated instruction.
   SetVector<Value *> Worklist(PredInst->op_begin(), PredInst->op_end());
 
   // Holds instructions that we need to analyze again. An instruction may be
   // reanalyzed if we don't yet know if we can sink it or not.
   SmallVector<Instruction *, 8> InstsToReanalyze;
 
   // Returns true if a given use occurs in the predicated block. Phi nodes use
   // their operands in their corresponding predecessor blocks.
   auto isBlockOfUsePredicated = [&](Use &U) -> bool {
     auto *I = cast<Instruction>(U.getUser());
     BasicBlock *BB = I->getParent();
     if (auto *Phi = dyn_cast<PHINode>(I))
       BB = Phi->getIncomingBlock(
           PHINode::getIncomingValueNumForOperand(U.getOperandNo()));
     return BB == PredBB;
   };
 
   // Iteratively sink the scalarized operands of the predicated instruction
   // into the block we created for it. When an instruction is sunk, it's
   // operands are then added to the worklist. The algorithm ends after one pass
   // through the worklist doesn't sink a single instruction.
   bool Changed;
   do {
     // Add the instructions that need to be reanalyzed to the worklist, and
     // reset the changed indicator.
     Worklist.insert(InstsToReanalyze.begin(), InstsToReanalyze.end());
     InstsToReanalyze.clear();
     Changed = false;
 
     while (!Worklist.empty()) {
       auto *I = dyn_cast<Instruction>(Worklist.pop_back_val());
 
       // We can't sink an instruction if it is a phi node, is already in the
       // predicated block, is not in the loop, or may have side effects.
       if (!I || isa<PHINode>(I) || I->getParent() == PredBB ||
           !VectorLoop->contains(I) || I->mayHaveSideEffects())
         continue;
 
       // It's legal to sink the instruction if all its uses occur in the
       // predicated block. Otherwise, there's nothing to do yet, and we may
       // need to reanalyze the instruction.
       if (!llvm::all_of(I->uses(), isBlockOfUsePredicated)) {
         InstsToReanalyze.push_back(I);
         continue;
       }
 
       // Move the instruction to the beginning of the predicated block, and add
       // it's operands to the worklist.
       I->moveBefore(&*PredBB->getFirstInsertionPt());
       Worklist.insert(I->op_begin(), I->op_end());
 
       // The sinking may have enabled other instructions to be sunk, so we will
       // need to iterate.
       Changed = true;
     }
   } while (Changed);
 }
 
 void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF,
                                               unsigned VF) {
   assert(PN->getParent() == OrigLoop->getHeader() &&
          "Non-header phis should have been handled elsewhere");
 
   PHINode *P = cast<PHINode>(PN);
   // In order to support recurrences we need to be able to vectorize Phi nodes.
   // Phi nodes have cycles, so we need to vectorize them in two stages. This is
   // stage #1: We create a new vector PHI node with no incoming edges. We'll use
   // this value when we vectorize all of the instructions that use the PHI.
   if (Legal->isReductionVariable(P) || Legal->isFirstOrderRecurrence(P)) {
     for (unsigned Part = 0; Part < UF; ++Part) {
       // This is phase one of vectorizing PHIs.
       Type *VecTy =
           (VF == 1) ? PN->getType() : VectorType::get(PN->getType(), VF);
       Value *EntryPart = PHINode::Create(
           VecTy, 2, "vec.phi", &*LoopVectorBody->getFirstInsertionPt());
       VectorLoopValueMap.setVectorValue(P, Part, EntryPart);
     }
     return;
   }
 
   setDebugLocFromInst(Builder, P);
 
   // This PHINode must be an induction variable.
   // Make sure that we know about it.
   assert(Legal->getInductionVars()->count(P) && "Not an induction variable");
 
   InductionDescriptor II = Legal->getInductionVars()->lookup(P);
   const DataLayout &DL = OrigLoop->getHeader()->getModule()->getDataLayout();
 
   // FIXME: The newly created binary instructions should contain nsw/nuw flags,
   // which can be found from the original scalar operations.
   switch (II.getKind()) {
   case InductionDescriptor::IK_NoInduction:
     llvm_unreachable("Unknown induction");
   case InductionDescriptor::IK_IntInduction:
   case InductionDescriptor::IK_FpInduction:
     llvm_unreachable("Integer/fp induction is handled elsewhere.");
   case InductionDescriptor::IK_PtrInduction: {
     // Handle the pointer induction variable case.
     assert(P->getType()->isPointerTy() && "Unexpected type.");
     // This is the normalized GEP that starts counting at zero.
     Value *PtrInd = Induction;
     PtrInd = Builder.CreateSExtOrTrunc(PtrInd, II.getStep()->getType());
     // Determine the number of scalars we need to generate for each unroll
     // iteration. If the instruction is uniform, we only need to generate the
     // first lane. Otherwise, we generate all VF values.
     unsigned Lanes = Cost->isUniformAfterVectorization(P, VF) ? 1 : VF;
     // These are the scalar results. Notice that we don't generate vector GEPs
     // because scalar GEPs result in better code.
     for (unsigned Part = 0; Part < UF; ++Part) {
       for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
         Constant *Idx = ConstantInt::get(PtrInd->getType(), Lane + Part * VF);
         Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx);
         Value *SclrGep = II.transform(Builder, GlobalIdx, PSE.getSE(), DL);
         SclrGep->setName("next.gep");
         VectorLoopValueMap.setScalarValue(P, {Part, Lane}, SclrGep);
       }
     }
     return;
   }
   }
 }
 
 /// A helper function for checking whether an integer division-related
 /// instruction may divide by zero (in which case it must be predicated if
 /// executed conditionally in the scalar code).
 /// TODO: It may be worthwhile to generalize and check isKnownNonZero().
 /// Non-zero divisors that are non compile-time constants will not be
 /// converted into multiplication, so we will still end up scalarizing
 /// the division, but can do so w/o predication.
 static bool mayDivideByZero(Instruction &I) {
   assert((I.getOpcode() == Instruction::UDiv ||
           I.getOpcode() == Instruction::SDiv ||
           I.getOpcode() == Instruction::URem ||
           I.getOpcode() == Instruction::SRem) &&
          "Unexpected instruction");
   Value *Divisor = I.getOperand(1);
   auto *CInt = dyn_cast<ConstantInt>(Divisor);
   return !CInt || CInt->isZero();
 }
 
 void InnerLoopVectorizer::widenInstruction(Instruction &I) {
   switch (I.getOpcode()) {
   case Instruction::Br:
   case Instruction::PHI:
     llvm_unreachable("This instruction is handled by a different recipe.");
   case Instruction::GetElementPtr: {
     // Construct a vector GEP by widening the operands of the scalar GEP as
     // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
     // results in a vector of pointers when at least one operand of the GEP
     // is vector-typed. Thus, to keep the representation compact, we only use
     // vector-typed operands for loop-varying values.
     auto *GEP = cast<GetElementPtrInst>(&I);
 
     if (VF > 1 && OrigLoop->hasLoopInvariantOperands(GEP)) {
       // If we are vectorizing, but the GEP has only loop-invariant operands,
       // the GEP we build (by only using vector-typed operands for
       // loop-varying values) would be a scalar pointer. Thus, to ensure we
       // produce a vector of pointers, we need to either arbitrarily pick an
       // operand to broadcast, or broadcast a clone of the original GEP.
       // Here, we broadcast a clone of the original.
       //
       // TODO: If at some point we decide to scalarize instructions having
       //       loop-invariant operands, this special case will no longer be
       //       required. We would add the scalarization decision to
       //       collectLoopScalars() and teach getVectorValue() to broadcast
       //       the lane-zero scalar value.
       auto *Clone = Builder.Insert(GEP->clone());
       for (unsigned Part = 0; Part < UF; ++Part) {
         Value *EntryPart = Builder.CreateVectorSplat(VF, Clone);
         VectorLoopValueMap.setVectorValue(&I, Part, EntryPart);
         addMetadata(EntryPart, GEP);
       }
     } else {
       // If the GEP has at least one loop-varying operand, we are sure to
       // produce a vector of pointers. But if we are only unrolling, we want
       // to produce a scalar GEP for each unroll part. Thus, the GEP we
       // produce with the code below will be scalar (if VF == 1) or vector
       // (otherwise). Note that for the unroll-only case, we still maintain
       // values in the vector mapping with initVector, as we do for other
       // instructions.
       for (unsigned Part = 0; Part < UF; ++Part) {
         // The pointer operand of the new GEP. If it's loop-invariant, we
         // won't broadcast it.
         auto *Ptr =
             OrigLoop->isLoopInvariant(GEP->getPointerOperand())
                 ? GEP->getPointerOperand()
                 : getOrCreateVectorValue(GEP->getPointerOperand(), Part);
 
         // Collect all the indices for the new GEP. If any index is
         // loop-invariant, we won't broadcast it.
         SmallVector<Value *, 4> Indices;
         for (auto &U : make_range(GEP->idx_begin(), GEP->idx_end())) {
           if (OrigLoop->isLoopInvariant(U.get()))
             Indices.push_back(U.get());
           else
             Indices.push_back(getOrCreateVectorValue(U.get(), Part));
         }
 
         // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
         // but it should be a vector, otherwise.
         auto *NewGEP = GEP->isInBounds()
                            ? Builder.CreateInBoundsGEP(Ptr, Indices)
                            : Builder.CreateGEP(Ptr, Indices);
         assert((VF == 1 || NewGEP->getType()->isVectorTy()) &&
                "NewGEP is not a pointer vector");
         VectorLoopValueMap.setVectorValue(&I, Part, NewGEP);
         addMetadata(NewGEP, GEP);
       }
     }
 
     break;
   }
   case Instruction::UDiv:
   case Instruction::SDiv:
   case Instruction::SRem:
   case Instruction::URem:
   case Instruction::Add:
   case Instruction::FAdd:
   case Instruction::Sub:
   case Instruction::FSub:
   case Instruction::Mul:
   case Instruction::FMul:
   case Instruction::FDiv:
   case Instruction::FRem:
   case Instruction::Shl:
   case Instruction::LShr:
   case Instruction::AShr:
   case Instruction::And:
   case Instruction::Or:
   case Instruction::Xor: {
     // Just widen binops.
     auto *BinOp = cast<BinaryOperator>(&I);
     setDebugLocFromInst(Builder, BinOp);
 
     for (unsigned Part = 0; Part < UF; ++Part) {
       Value *A = getOrCreateVectorValue(BinOp->getOperand(0), Part);
       Value *B = getOrCreateVectorValue(BinOp->getOperand(1), Part);
       Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A, B);
 
       if (BinaryOperator *VecOp = dyn_cast<BinaryOperator>(V))
         VecOp->copyIRFlags(BinOp);
 
       // Use this vector value for all users of the original instruction.
       VectorLoopValueMap.setVectorValue(&I, Part, V);
       addMetadata(V, BinOp);
     }
 
     break;
   }
   case Instruction::Select: {
     // Widen selects.
     // If the selector is loop invariant we can create a select
     // instruction with a scalar condition. Otherwise, use vector-select.
     auto *SE = PSE.getSE();
     bool InvariantCond =
         SE->isLoopInvariant(PSE.getSCEV(I.getOperand(0)), OrigLoop);
     setDebugLocFromInst(Builder, &I);
 
     // The condition can be loop invariant  but still defined inside the
     // loop. This means that we can't just use the original 'cond' value.
     // We have to take the 'vectorized' value and pick the first lane.
     // Instcombine will make this a no-op.
 
     auto *ScalarCond = getOrCreateScalarValue(I.getOperand(0), {0, 0});
 
     for (unsigned Part = 0; Part < UF; ++Part) {
       Value *Cond = getOrCreateVectorValue(I.getOperand(0), Part);
       Value *Op0 = getOrCreateVectorValue(I.getOperand(1), Part);
       Value *Op1 = getOrCreateVectorValue(I.getOperand(2), Part);
       Value *Sel =
           Builder.CreateSelect(InvariantCond ? ScalarCond : Cond, Op0, Op1);
       VectorLoopValueMap.setVectorValue(&I, Part, Sel);
       addMetadata(Sel, &I);
     }
 
     break;
   }
 
   case Instruction::ICmp:
   case Instruction::FCmp: {
     // Widen compares. Generate vector compares.
     bool FCmp = (I.getOpcode() == Instruction::FCmp);
     auto *Cmp = dyn_cast<CmpInst>(&I);
     setDebugLocFromInst(Builder, Cmp);
     for (unsigned Part = 0; Part < UF; ++Part) {
       Value *A = getOrCreateVectorValue(Cmp->getOperand(0), Part);
       Value *B = getOrCreateVectorValue(Cmp->getOperand(1), Part);
       Value *C = nullptr;
       if (FCmp) {
         // Propagate fast math flags.
         IRBuilder<>::FastMathFlagGuard FMFG(Builder);
         Builder.setFastMathFlags(Cmp->getFastMathFlags());
         C = Builder.CreateFCmp(Cmp->getPredicate(), A, B);
       } else {
         C = Builder.CreateICmp(Cmp->getPredicate(), A, B);
       }
       VectorLoopValueMap.setVectorValue(&I, Part, C);
       addMetadata(C, &I);
     }
 
     break;
   }
 
   case Instruction::ZExt:
   case Instruction::SExt:
   case Instruction::FPToUI:
   case Instruction::FPToSI:
   case Instruction::FPExt:
   case Instruction::PtrToInt:
   case Instruction::IntToPtr:
   case Instruction::SIToFP:
   case Instruction::UIToFP:
   case Instruction::Trunc:
   case Instruction::FPTrunc:
   case Instruction::BitCast: {
     auto *CI = dyn_cast<CastInst>(&I);
     setDebugLocFromInst(Builder, CI);
 
     /// Vectorize casts.
     Type *DestTy =
         (VF == 1) ? CI->getType() : VectorType::get(CI->getType(), VF);
 
     for (unsigned Part = 0; Part < UF; ++Part) {
       Value *A = getOrCreateVectorValue(CI->getOperand(0), Part);
       Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy);
       VectorLoopValueMap.setVectorValue(&I, Part, Cast);
       addMetadata(Cast, &I);
     }
     break;
   }
 
   case Instruction::Call: {
     // Ignore dbg intrinsics.
     if (isa<DbgInfoIntrinsic>(I))
       break;
     setDebugLocFromInst(Builder, &I);
 
     Module *M = I.getParent()->getParent()->getParent();
     auto *CI = cast<CallInst>(&I);
 
     StringRef FnName = CI->getCalledFunction()->getName();
     Function *F = CI->getCalledFunction();
     Type *RetTy = ToVectorTy(CI->getType(), VF);
     SmallVector<Type *, 4> Tys;
     for (Value *ArgOperand : CI->arg_operands())
       Tys.push_back(ToVectorTy(ArgOperand->getType(), VF));
 
     Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
 
     // The flag shows whether we use Intrinsic or a usual Call for vectorized
     // version of the instruction.
     // Is it beneficial to perform intrinsic call compared to lib call?
     bool NeedToScalarize;
     unsigned CallCost = getVectorCallCost(CI, VF, *TTI, TLI, NeedToScalarize);
     bool UseVectorIntrinsic =
         ID && getVectorIntrinsicCost(CI, VF, *TTI, TLI) <= CallCost;
     assert((UseVectorIntrinsic || !NeedToScalarize) &&
            "Instruction should be scalarized elsewhere.");
 
     for (unsigned Part = 0; Part < UF; ++Part) {
       SmallVector<Value *, 4> Args;
       for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
         Value *Arg = CI->getArgOperand(i);
         // Some intrinsics have a scalar argument - don't replace it with a
         // vector.
         if (!UseVectorIntrinsic || !hasVectorInstrinsicScalarOpd(ID, i))
           Arg = getOrCreateVectorValue(CI->getArgOperand(i), Part);
         Args.push_back(Arg);
       }
 
       Function *VectorF;
       if (UseVectorIntrinsic) {
         // Use vector version of the intrinsic.
         Type *TysForDecl[] = {CI->getType()};
         if (VF > 1)
           TysForDecl[0] = VectorType::get(CI->getType()->getScalarType(), VF);
         VectorF = Intrinsic::getDeclaration(M, ID, TysForDecl);
       } else {
         // Use vector version of the library call.
         StringRef VFnName = TLI->getVectorizedFunction(FnName, VF);
         assert(!VFnName.empty() && "Vector function name is empty.");
         VectorF = M->getFunction(VFnName);
         if (!VectorF) {
           // Generate a declaration
           FunctionType *FTy = FunctionType::get(RetTy, Tys, false);
           VectorF =
               Function::Create(FTy, Function::ExternalLinkage, VFnName, M);
           VectorF->copyAttributesFrom(F);
         }
       }
       assert(VectorF && "Can't create vector function.");
 
       SmallVector<OperandBundleDef, 1> OpBundles;
       CI->getOperandBundlesAsDefs(OpBundles);
       CallInst *V = Builder.CreateCall(VectorF, Args, OpBundles);
 
       if (isa<FPMathOperator>(V))
         V->copyFastMathFlags(CI);
 
       VectorLoopValueMap.setVectorValue(&I, Part, V);
       addMetadata(V, &I);
     }
 
     break;
   }
 
   default:
     // This instruction is not vectorized by simple widening.
     DEBUG(dbgs() << "LV: Found an unhandled instruction: " << I);
     llvm_unreachable("Unhandled instruction!");
   } // end of switch.
 }
 
 void InnerLoopVectorizer::updateAnalysis() {
   // Forget the original basic block.
   PSE.getSE()->forgetLoop(OrigLoop);
 
   // Update the dominator tree information.
   assert(DT->properlyDominates(LoopBypassBlocks.front(), LoopExitBlock) &&
          "Entry does not dominate exit.");
 
   DT->addNewBlock(LoopMiddleBlock,
                   LI->getLoopFor(LoopVectorBody)->getLoopLatch());
   DT->addNewBlock(LoopScalarPreHeader, LoopBypassBlocks[0]);
   DT->changeImmediateDominator(LoopScalarBody, LoopScalarPreHeader);
   DT->changeImmediateDominator(LoopExitBlock, LoopBypassBlocks[0]);
   DEBUG(DT->verifyDomTree());
 }
 
 /// \brief Check whether it is safe to if-convert this phi node.
 ///
 /// Phi nodes with constant expressions that can trap are not safe to if
 /// convert.
 static bool canIfConvertPHINodes(BasicBlock *BB) {
   for (PHINode &Phi : BB->phis()) {
     for (Value *V : Phi.incoming_values())
       if (auto *C = dyn_cast<Constant>(V))
         if (C->canTrap())
           return false;
   }
   return true;
 }
 
 bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
   if (!EnableIfConversion) {
     ORE->emit(createMissedAnalysis("IfConversionDisabled")
               << "if-conversion is disabled");
     return false;
   }
 
   assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
 
   // A list of pointers that we can safely read and write to.
   SmallPtrSet<Value *, 8> SafePointes;
 
   // Collect safe addresses.
   for (BasicBlock *BB : TheLoop->blocks()) {
     if (blockNeedsPredication(BB))
       continue;
 
     for (Instruction &I : *BB)
       if (auto *Ptr = getPointerOperand(&I))
         SafePointes.insert(Ptr);
   }
 
   // Collect the blocks that need predication.
   BasicBlock *Header = TheLoop->getHeader();
   for (BasicBlock *BB : TheLoop->blocks()) {
     // We don't support switch statements inside loops.
     if (!isa<BranchInst>(BB->getTerminator())) {
       ORE->emit(createMissedAnalysis("LoopContainsSwitch", BB->getTerminator())
                 << "loop contains a switch statement");
       return false;
     }
 
     // We must be able to predicate all blocks that need to be predicated.
     if (blockNeedsPredication(BB)) {
       if (!blockCanBePredicated(BB, SafePointes)) {
         ORE->emit(createMissedAnalysis("NoCFGForSelect", BB->getTerminator())
                   << "control flow cannot be substituted for a select");
         return false;
       }
     } else if (BB != Header && !canIfConvertPHINodes(BB)) {
       ORE->emit(createMissedAnalysis("NoCFGForSelect", BB->getTerminator())
                 << "control flow cannot be substituted for a select");
       return false;
     }
   }
 
   // We can if-convert this loop.
   return true;
 }
 
 bool LoopVectorizationLegality::canVectorize() {
   // Store the result and return it at the end instead of exiting early, in case
   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
   bool Result = true;
   
   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
   // We must have a loop in canonical form. Loops with indirectbr in them cannot
   // be canonicalized.
   if (!TheLoop->getLoopPreheader()) {
     DEBUG(dbgs() << "LV: Loop doesn't have a legal pre-header.\n");
     ORE->emit(createMissedAnalysis("CFGNotUnderstood")
               << "loop control flow is not understood by vectorizer");
     if (DoExtraAnalysis)
       Result = false;
     else
       return false;
   }
 
   // FIXME: The code is currently dead, since the loop gets sent to
   // LoopVectorizationLegality is already an innermost loop.
   //
   // We can only vectorize innermost loops.
   if (!TheLoop->empty()) {
     ORE->emit(createMissedAnalysis("NotInnermostLoop")
               << "loop is not the innermost loop");
     if (DoExtraAnalysis)
       Result = false;
     else
       return false;
   }
 
   // We must have a single backedge.
   if (TheLoop->getNumBackEdges() != 1) {
     ORE->emit(createMissedAnalysis("CFGNotUnderstood")
               << "loop control flow is not understood by vectorizer");
     if (DoExtraAnalysis)
       Result = false;
     else
       return false;
   }
 
   // We must have a single exiting block.
   if (!TheLoop->getExitingBlock()) {
     ORE->emit(createMissedAnalysis("CFGNotUnderstood")
               << "loop control flow is not understood by vectorizer");
     if (DoExtraAnalysis)
       Result = false;
     else
       return false;
   }
 
   // We only handle bottom-tested loops, i.e. loop in which the condition is
   // checked at the end of each iteration. With that we can assume that all
   // instructions in the loop are executed the same number of times.
   if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
     ORE->emit(createMissedAnalysis("CFGNotUnderstood")
               << "loop control flow is not understood by vectorizer");
     if (DoExtraAnalysis)
       Result = false;
     else
       return false;
   }
 
   // We need to have a loop header.
   DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
                << '\n');
 
   // Check if we can if-convert non-single-bb loops.
   unsigned NumBlocks = TheLoop->getNumBlocks();
   if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
     DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
     if (DoExtraAnalysis)
       Result = false;
     else
       return false;
   }
 
   // Check if we can vectorize the instructions and CFG in this loop.
   if (!canVectorizeInstrs()) {
     DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
     if (DoExtraAnalysis)
       Result = false;
     else
       return false;
   }
 
   // Go over each instruction and look at memory deps.
   if (!canVectorizeMemory()) {
     DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
     if (DoExtraAnalysis)
       Result = false;
     else
       return false;
   }
 
   DEBUG(dbgs() << "LV: We can vectorize this loop"
                << (LAI->getRuntimePointerChecking()->Need
                        ? " (with a runtime bound check)"
                        : "")
                << "!\n");
 
   bool UseInterleaved = TTI->enableInterleavedAccessVectorization();
 
   // If an override option has been passed in for interleaved accesses, use it.
   if (EnableInterleavedMemAccesses.getNumOccurrences() > 0)
     UseInterleaved = EnableInterleavedMemAccesses;
 
   // Analyze interleaved memory accesses.
   if (UseInterleaved)
     InterleaveInfo.analyzeInterleaving(*getSymbolicStrides());
 
   unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
   if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
     SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
 
   if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
     ORE->emit(createMissedAnalysis("TooManySCEVRunTimeChecks")
               << "Too many SCEV assumptions need to be made and checked "
               << "at runtime");
     DEBUG(dbgs() << "LV: Too many SCEV checks needed.\n");
     if (DoExtraAnalysis)
       Result = false;
     else
       return false;
   }
 
   // Okay! We've done all the tests. If any have failed, return false. Otherwise
   // we can vectorize, and at this point we don't have any other mem analysis
   // which may limit our maximum vectorization factor, so just return true with
   // no restrictions.
   return Result;
 }
 
 static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) {
   if (Ty->isPointerTy())
     return DL.getIntPtrType(Ty);
 
   // It is possible that char's or short's overflow when we ask for the loop's
   // trip count, work around this by changing the type size.
   if (Ty->getScalarSizeInBits() < 32)
     return Type::getInt32Ty(Ty->getContext());
 
   return Ty;
 }
 
 static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
   Ty0 = convertPointerToIntegerType(DL, Ty0);
   Ty1 = convertPointerToIntegerType(DL, Ty1);
   if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
     return Ty0;
   return Ty1;
 }
 
 /// \brief Check that the instruction has outside loop users and is not an
 /// identified reduction variable.
 static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
                                SmallPtrSetImpl<Value *> &AllowedExit) {
   // Reduction and Induction instructions are allowed to have exit users. All
   // other instructions must not have external users.
   if (!AllowedExit.count(Inst))
     // Check that all of the users of the loop are inside the BB.
     for (User *U : Inst->users()) {
       Instruction *UI = cast<Instruction>(U);
       // This user may be a reduction exit value.
       if (!TheLoop->contains(UI)) {
         DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
         return true;
       }
     }
   return false;
 }
 
 void LoopVectorizationLegality::addInductionPhi(
     PHINode *Phi, const InductionDescriptor &ID,
     SmallPtrSetImpl<Value *> &AllowedExit) {
   Inductions[Phi] = ID;
 
   // In case this induction also comes with casts that we know we can ignore
   // in the vectorized loop body, record them here. All casts could be recorded
   // here for ignoring, but suffices to record only the first (as it is the
   // only one that may bw used outside the cast sequence).
   const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
   if (!Casts.empty())
     InductionCastsToIgnore.insert(*Casts.begin());
 
   Type *PhiTy = Phi->getType();
   const DataLayout &DL = Phi->getModule()->getDataLayout();
 
   // Get the widest type.
   if (!PhiTy->isFloatingPointTy()) {
     if (!WidestIndTy)
       WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
     else
       WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
   }
 
   // Int inductions are special because we only allow one IV.
   if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
       ID.getConstIntStepValue() &&
       ID.getConstIntStepValue()->isOne() &&
       isa<Constant>(ID.getStartValue()) &&
       cast<Constant>(ID.getStartValue())->isNullValue()) {
 
     // Use the phi node with the widest type as induction. Use the last
     // one if there are multiple (no good reason for doing this other
     // than it is expedient). We've checked that it begins at zero and
     // steps by one, so this is a canonical induction variable.
     if (!PrimaryInduction || PhiTy == WidestIndTy)
       PrimaryInduction = Phi;
   }
 
   // Both the PHI node itself, and the "post-increment" value feeding
   // back into the PHI node may have external users.
   // We can allow those uses, except if the SCEVs we have for them rely
   // on predicates that only hold within the loop, since allowing the exit
   // currently means re-using this SCEV outside the loop.
   if (PSE.getUnionPredicate().isAlwaysTrue()) {
     AllowedExit.insert(Phi);
     AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
   }
 
   DEBUG(dbgs() << "LV: Found an induction variable.\n");
 }
 
 bool LoopVectorizationLegality::canVectorizeInstrs() {
   BasicBlock *Header = TheLoop->getHeader();
 
   // Look for the attribute signaling the absence of NaNs.
   Function &F = *Header->getParent();
   HasFunNoNaNAttr =
       F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
 
   // For each block in the loop.
   for (BasicBlock *BB : TheLoop->blocks()) {
     // Scan the instructions in the block and look for hazards.
     for (Instruction &I : *BB) {
       if (auto *Phi = dyn_cast<PHINode>(&I)) {
         Type *PhiTy = Phi->getType();
         // Check that this PHI type is allowed.
         if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
             !PhiTy->isPointerTy()) {
           ORE->emit(createMissedAnalysis("CFGNotUnderstood", Phi)
                     << "loop control flow is not understood by vectorizer");
           DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n");
           return false;
         }
 
         // If this PHINode is not in the header block, then we know that we
         // can convert it to select during if-conversion. No need to check if
         // the PHIs in this block are induction or reduction variables.
         if (BB != Header) {
           // Check that this instruction has no outside users or is an
           // identified reduction value with an outside user.
           if (!hasOutsideLoopUser(TheLoop, Phi, AllowedExit))
             continue;
           ORE->emit(createMissedAnalysis("NeitherInductionNorReduction", Phi)
                     << "value could not be identified as "
                        "an induction or reduction variable");
           return false;
         }
 
         // We only allow if-converted PHIs with exactly two incoming values.
         if (Phi->getNumIncomingValues() != 2) {
           ORE->emit(createMissedAnalysis("CFGNotUnderstood", Phi)
                     << "control flow not understood by vectorizer");
           DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
           return false;
         }
 
         RecurrenceDescriptor RedDes;
-        if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes)) {
+        if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
+                                                 DT)) {
           if (RedDes.hasUnsafeAlgebra())
             Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst());
           AllowedExit.insert(RedDes.getLoopExitInstr());
           Reductions[Phi] = RedDes;
           continue;
         }
 
         InductionDescriptor ID;
         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
           addInductionPhi(Phi, ID, AllowedExit);
           if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr)
             Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst());
           continue;
         }
 
         if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop,
                                                          SinkAfter, DT)) {
           FirstOrderRecurrences.insert(Phi);
           continue;
         }
 
         // As a last resort, coerce the PHI to a AddRec expression
         // and re-try classifying it a an induction PHI.
         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) {
           addInductionPhi(Phi, ID, AllowedExit);
           continue;
         }
 
         ORE->emit(createMissedAnalysis("NonReductionValueUsedOutsideLoop", Phi)
                   << "value that could not be identified as "
                      "reduction is used outside the loop");
         DEBUG(dbgs() << "LV: Found an unidentified PHI." << *Phi << "\n");
         return false;
       } // end of PHI handling
 
       // We handle calls that:
       //   * Are debug info intrinsics.
       //   * Have a mapping to an IR intrinsic.
       //   * Have a vector version available.
       auto *CI = dyn_cast<CallInst>(&I);
       if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
           !isa<DbgInfoIntrinsic>(CI) &&
           !(CI->getCalledFunction() && TLI &&
             TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) {
         ORE->emit(createMissedAnalysis("CantVectorizeCall", CI)
                   << "call instruction cannot be vectorized");
         DEBUG(dbgs() << "LV: Found a non-intrinsic, non-libfunc callsite.\n");
         return false;
       }
 
       // Intrinsics such as powi,cttz and ctlz are legal to vectorize if the
       // second argument is the same (i.e. loop invariant)
       if (CI && hasVectorInstrinsicScalarOpd(
                     getVectorIntrinsicIDForCall(CI, TLI), 1)) {
         auto *SE = PSE.getSE();
         if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(1)), TheLoop)) {
           ORE->emit(createMissedAnalysis("CantVectorizeIntrinsic", CI)
                     << "intrinsic instruction cannot be vectorized");
           DEBUG(dbgs() << "LV: Found unvectorizable intrinsic " << *CI << "\n");
           return false;
         }
       }
 
       // Check that the instruction return type is vectorizable.
       // Also, we can't vectorize extractelement instructions.
       if ((!VectorType::isValidElementType(I.getType()) &&
            !I.getType()->isVoidTy()) ||
           isa<ExtractElementInst>(I)) {
         ORE->emit(createMissedAnalysis("CantVectorizeInstructionReturnType", &I)
                   << "instruction return type cannot be vectorized");
         DEBUG(dbgs() << "LV: Found unvectorizable type.\n");
         return false;
       }
 
       // Check that the stored type is vectorizable.
       if (auto *ST = dyn_cast<StoreInst>(&I)) {
         Type *T = ST->getValueOperand()->getType();
         if (!VectorType::isValidElementType(T)) {
           ORE->emit(createMissedAnalysis("CantVectorizeStore", ST)
                     << "store instruction cannot be vectorized");
           return false;
         }
 
         // FP instructions can allow unsafe algebra, thus vectorizable by
         // non-IEEE-754 compliant SIMD units.
         // This applies to floating-point math operations and calls, not memory
         // operations, shuffles, or casts, as they don't change precision or
         // semantics.
       } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
                  !I.isFast()) {
         DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
         Hints->setPotentiallyUnsafe();
       }
 
       // Reduction instructions are allowed to have exit users.
       // All other instructions must not have external users.
       if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
         ORE->emit(createMissedAnalysis("ValueUsedOutsideLoop", &I)
                   << "value cannot be used outside the loop");
         return false;
       }
     } // next instr.
   }
 
   if (!PrimaryInduction) {
     DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
     if (Inductions.empty()) {
       ORE->emit(createMissedAnalysis("NoInductionVariable")
                 << "loop induction variable could not be identified");
       return false;
     }
   }
 
   // Now we know the widest induction type, check if our found induction
   // is the same size. If it's not, unset it here and InnerLoopVectorizer
   // will create another.
   if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
     PrimaryInduction = nullptr;
 
   return true;
 }
 
 void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) {
   // We should not collect Scalars more than once per VF. Right now, this
   // function is called from collectUniformsAndScalars(), which already does
   // this check. Collecting Scalars for VF=1 does not make any sense.
   assert(VF >= 2 && !Scalars.count(VF) &&
          "This function should not be visited twice for the same VF");
 
   SmallSetVector<Instruction *, 8> Worklist;
 
   // These sets are used to seed the analysis with pointers used by memory
   // accesses that will remain scalar.
   SmallSetVector<Instruction *, 8> ScalarPtrs;
   SmallPtrSet<Instruction *, 8> PossibleNonScalarPtrs;
 
   // A helper that returns true if the use of Ptr by MemAccess will be scalar.
   // The pointer operands of loads and stores will be scalar as long as the
   // memory access is not a gather or scatter operation. The value operand of a
   // store will remain scalar if the store is scalarized.
   auto isScalarUse = [&](Instruction *MemAccess, Value *Ptr) {
     InstWidening WideningDecision = getWideningDecision(MemAccess, VF);
     assert(WideningDecision != CM_Unknown &&
            "Widening decision should be ready at this moment");
     if (auto *Store = dyn_cast<StoreInst>(MemAccess))
       if (Ptr == Store->getValueOperand())
         return WideningDecision == CM_Scalarize;
     assert(Ptr == getPointerOperand(MemAccess) &&
            "Ptr is neither a value or pointer operand");
     return WideningDecision != CM_GatherScatter;
   };
 
   // A helper that returns true if the given value is a bitcast or
   // getelementptr instruction contained in the loop.
   auto isLoopVaryingBitCastOrGEP = [&](Value *V) {
     return ((isa<BitCastInst>(V) && V->getType()->isPointerTy()) ||
             isa<GetElementPtrInst>(V)) &&
            !TheLoop->isLoopInvariant(V);
   };
 
   // A helper that evaluates a memory access's use of a pointer. If the use
   // will be a scalar use, and the pointer is only used by memory accesses, we
   // place the pointer in ScalarPtrs. Otherwise, the pointer is placed in
   // PossibleNonScalarPtrs.
   auto evaluatePtrUse = [&](Instruction *MemAccess, Value *Ptr) {
     // We only care about bitcast and getelementptr instructions contained in
     // the loop.
     if (!isLoopVaryingBitCastOrGEP(Ptr))
       return;
 
     // If the pointer has already been identified as scalar (e.g., if it was
     // also identified as uniform), there's nothing to do.
     auto *I = cast<Instruction>(Ptr);
     if (Worklist.count(I))
       return;
 
     // If the use of the pointer will be a scalar use, and all users of the
     // pointer are memory accesses, place the pointer in ScalarPtrs. Otherwise,
     // place the pointer in PossibleNonScalarPtrs.
     if (isScalarUse(MemAccess, Ptr) && llvm::all_of(I->users(), [&](User *U) {
           return isa<LoadInst>(U) || isa<StoreInst>(U);
         }))
       ScalarPtrs.insert(I);
     else
       PossibleNonScalarPtrs.insert(I);
   };
 
   // We seed the scalars analysis with three classes of instructions: (1)
   // instructions marked uniform-after-vectorization, (2) bitcast and
   // getelementptr instructions used by memory accesses requiring a scalar use,
   // and (3) pointer induction variables and their update instructions (we
   // currently only scalarize these).
   //
   // (1) Add to the worklist all instructions that have been identified as
   // uniform-after-vectorization.
   Worklist.insert(Uniforms[VF].begin(), Uniforms[VF].end());
 
   // (2) Add to the worklist all bitcast and getelementptr instructions used by
   // memory accesses requiring a scalar use. The pointer operands of loads and
   // stores will be scalar as long as the memory accesses is not a gather or
   // scatter operation. The value operand of a store will remain scalar if the
   // store is scalarized.
   for (auto *BB : TheLoop->blocks())
     for (auto &I : *BB) {
       if (auto *Load = dyn_cast<LoadInst>(&I)) {
         evaluatePtrUse(Load, Load->getPointerOperand());
       } else if (auto *Store = dyn_cast<StoreInst>(&I)) {
         evaluatePtrUse(Store, Store->getPointerOperand());
         evaluatePtrUse(Store, Store->getValueOperand());
       }
     }
   for (auto *I : ScalarPtrs)
     if (!PossibleNonScalarPtrs.count(I)) {
       DEBUG(dbgs() << "LV: Found scalar instruction: " << *I << "\n");
       Worklist.insert(I);
     }
 
   // (3) Add to the worklist all pointer induction variables and their update
   // instructions.
   //
   // TODO: Once we are able to vectorize pointer induction variables we should
   //       no longer insert them into the worklist here.
   auto *Latch = TheLoop->getLoopLatch();
   for (auto &Induction : *Legal->getInductionVars()) {
     auto *Ind = Induction.first;
     auto *IndUpdate = cast<Instruction>(Ind->getIncomingValueForBlock(Latch));
     if (Induction.second.getKind() != InductionDescriptor::IK_PtrInduction)
       continue;
     Worklist.insert(Ind);
     Worklist.insert(IndUpdate);
     DEBUG(dbgs() << "LV: Found scalar instruction: " << *Ind << "\n");
     DEBUG(dbgs() << "LV: Found scalar instruction: " << *IndUpdate << "\n");
   }
 
   // Insert the forced scalars.
   // FIXME: Currently widenPHIInstruction() often creates a dead vector
   // induction variable when the PHI user is scalarized.
   if (ForcedScalars.count(VF))
     for (auto *I : ForcedScalars.find(VF)->second)
       Worklist.insert(I);
 
   // Expand the worklist by looking through any bitcasts and getelementptr
   // instructions we've already identified as scalar. This is similar to the
   // expansion step in collectLoopUniforms(); however, here we're only
   // expanding to include additional bitcasts and getelementptr instructions.
   unsigned Idx = 0;
   while (Idx != Worklist.size()) {
     Instruction *Dst = Worklist[Idx++];
     if (!isLoopVaryingBitCastOrGEP(Dst->getOperand(0)))
       continue;
     auto *Src = cast<Instruction>(Dst->getOperand(0));
     if (llvm::all_of(Src->users(), [&](User *U) -> bool {
           auto *J = cast<Instruction>(U);
           return !TheLoop->contains(J) || Worklist.count(J) ||
                  ((isa<LoadInst>(J) || isa<StoreInst>(J)) &&
                   isScalarUse(J, Src));
         })) {
       Worklist.insert(Src);
       DEBUG(dbgs() << "LV: Found scalar instruction: " << *Src << "\n");
     }
   }
 
   // An induction variable will remain scalar if all users of the induction
   // variable and induction variable update remain scalar.
   for (auto &Induction : *Legal->getInductionVars()) {
     auto *Ind = Induction.first;
     auto *IndUpdate = cast<Instruction>(Ind->getIncomingValueForBlock(Latch));
 
     // We already considered pointer induction variables, so there's no reason
     // to look at their users again.
     //
     // TODO: Once we are able to vectorize pointer induction variables we
     //       should no longer skip over them here.
     if (Induction.second.getKind() == InductionDescriptor::IK_PtrInduction)
       continue;
 
     // Determine if all users of the induction variable are scalar after
     // vectorization.
     auto ScalarInd = llvm::all_of(Ind->users(), [&](User *U) -> bool {
       auto *I = cast<Instruction>(U);
       return I == IndUpdate || !TheLoop->contains(I) || Worklist.count(I);
     });
     if (!ScalarInd)
       continue;
 
     // Determine if all users of the induction variable update instruction are
     // scalar after vectorization.
     auto ScalarIndUpdate =
         llvm::all_of(IndUpdate->users(), [&](User *U) -> bool {
           auto *I = cast<Instruction>(U);
           return I == Ind || !TheLoop->contains(I) || Worklist.count(I);
         });
     if (!ScalarIndUpdate)
       continue;
 
     // The induction variable and its update instruction will remain scalar.
     Worklist.insert(Ind);
     Worklist.insert(IndUpdate);
     DEBUG(dbgs() << "LV: Found scalar instruction: " << *Ind << "\n");
     DEBUG(dbgs() << "LV: Found scalar instruction: " << *IndUpdate << "\n");
   }
 
   Scalars[VF].insert(Worklist.begin(), Worklist.end());
 }
 
 bool LoopVectorizationLegality::isScalarWithPredication(Instruction *I) {
   if (!blockNeedsPredication(I->getParent()))
     return false;
   switch(I->getOpcode()) {
   default:
     break;
   case Instruction::Store:
     return !isMaskRequired(I);
   case Instruction::UDiv:
   case Instruction::SDiv:
   case Instruction::SRem:
   case Instruction::URem:
     return mayDivideByZero(*I);
   }
   return false;
 }
 
 bool LoopVectorizationLegality::memoryInstructionCanBeWidened(Instruction *I,
                                                               unsigned VF) {
   // Get and ensure we have a valid memory instruction.
   LoadInst *LI = dyn_cast<LoadInst>(I);
   StoreInst *SI = dyn_cast<StoreInst>(I);
   assert((LI || SI) && "Invalid memory instruction");
 
   auto *Ptr = getPointerOperand(I);
 
   // In order to be widened, the pointer should be consecutive, first of all.
   if (!isConsecutivePtr(Ptr))
     return false;
 
   // If the instruction is a store located in a predicated block, it will be
   // scalarized.
   if (isScalarWithPredication(I))
     return false;
 
   // If the instruction's allocated size doesn't equal it's type size, it
   // requires padding and will be scalarized.
   auto &DL = I->getModule()->getDataLayout();
   auto *ScalarTy = LI ? LI->getType() : SI->getValueOperand()->getType();
   if (hasIrregularType(ScalarTy, DL, VF))
     return false;
 
   return true;
 }
 
 void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) {
   // We should not collect Uniforms more than once per VF. Right now,
   // this function is called from collectUniformsAndScalars(), which
   // already does this check. Collecting Uniforms for VF=1 does not make any
   // sense.
 
   assert(VF >= 2 && !Uniforms.count(VF) &&
          "This function should not be visited twice for the same VF");
 
   // Visit the list of Uniforms. If we'll not find any uniform value, we'll
   // not analyze again.  Uniforms.count(VF) will return 1.
   Uniforms[VF].clear();
 
   // We now know that the loop is vectorizable!
   // Collect instructions inside the loop that will remain uniform after
   // vectorization.
 
   // Global values, params and instructions outside of current loop are out of
   // scope.
   auto isOutOfScope = [&](Value *V) -> bool {
     Instruction *I = dyn_cast<Instruction>(V);
     return (!I || !TheLoop->contains(I));
   };
 
   SetVector<Instruction *> Worklist;
   BasicBlock *Latch = TheLoop->getLoopLatch();
 
   // Start with the conditional branch. If the branch condition is an
   // instruction contained in the loop that is only used by the branch, it is
   // uniform.
   auto *Cmp = dyn_cast<Instruction>(Latch->getTerminator()->getOperand(0));
   if (Cmp && TheLoop->contains(Cmp) && Cmp->hasOneUse()) {
     Worklist.insert(Cmp);
     DEBUG(dbgs() << "LV: Found uniform instruction: " << *Cmp << "\n");
   }
 
   // Holds consecutive and consecutive-like pointers. Consecutive-like pointers
   // are pointers that are treated like consecutive pointers during
   // vectorization. The pointer operands of interleaved accesses are an
   // example.
   SmallSetVector<Instruction *, 8> ConsecutiveLikePtrs;
 
   // Holds pointer operands of instructions that are possibly non-uniform.
   SmallPtrSet<Instruction *, 8> PossibleNonUniformPtrs;
 
   auto isUniformDecision = [&](Instruction *I, unsigned VF) {
     InstWidening WideningDecision = getWideningDecision(I, VF);
     assert(WideningDecision != CM_Unknown &&
            "Widening decision should be ready at this moment");
 
     return (WideningDecision == CM_Widen ||
             WideningDecision == CM_Widen_Reverse ||
             WideningDecision == CM_Interleave);
   };
   // Iterate over the instructions in the loop, and collect all
   // consecutive-like pointer operands in ConsecutiveLikePtrs. If it's possible
   // that a consecutive-like pointer operand will be scalarized, we collect it
   // in PossibleNonUniformPtrs instead. We use two sets here because a single
   // getelementptr instruction can be used by both vectorized and scalarized
   // memory instructions. For example, if a loop loads and stores from the same
   // location, but the store is conditional, the store will be scalarized, and
   // the getelementptr won't remain uniform.
   for (auto *BB : TheLoop->blocks())
     for (auto &I : *BB) {
       // If there's no pointer operand, there's nothing to do.
       auto *Ptr = dyn_cast_or_null<Instruction>(getPointerOperand(&I));
       if (!Ptr)
         continue;
 
       // True if all users of Ptr are memory accesses that have Ptr as their
       // pointer operand.
       auto UsersAreMemAccesses =
           llvm::all_of(Ptr->users(), [&](User *U) -> bool {
             return getPointerOperand(U) == Ptr;
           });
 
       // Ensure the memory instruction will not be scalarized or used by
       // gather/scatter, making its pointer operand non-uniform. If the pointer
       // operand is used by any instruction other than a memory access, we
       // conservatively assume the pointer operand may be non-uniform.
       if (!UsersAreMemAccesses || !isUniformDecision(&I, VF))
         PossibleNonUniformPtrs.insert(Ptr);
 
       // If the memory instruction will be vectorized and its pointer operand
       // is consecutive-like, or interleaving - the pointer operand should
       // remain uniform.
       else
         ConsecutiveLikePtrs.insert(Ptr);
     }
 
   // Add to the Worklist all consecutive and consecutive-like pointers that
   // aren't also identified as possibly non-uniform.
   for (auto *V : ConsecutiveLikePtrs)
     if (!PossibleNonUniformPtrs.count(V)) {
       DEBUG(dbgs() << "LV: Found uniform instruction: " << *V << "\n");
       Worklist.insert(V);
     }
 
   // Expand Worklist in topological order: whenever a new instruction
   // is added , its users should be either already inside Worklist, or
   // out of scope. It ensures a uniform instruction will only be used
   // by uniform instructions or out of scope instructions.
   unsigned idx = 0;
   while (idx != Worklist.size()) {
     Instruction *I = Worklist[idx++];
 
     for (auto OV : I->operand_values()) {
       if (isOutOfScope(OV))
         continue;
       auto *OI = cast<Instruction>(OV);
       if (llvm::all_of(OI->users(), [&](User *U) -> bool {
             auto *J = cast<Instruction>(U);
             return !TheLoop->contains(J) || Worklist.count(J) ||
                    (OI == getPointerOperand(J) && isUniformDecision(J, VF));
           })) {
         Worklist.insert(OI);
         DEBUG(dbgs() << "LV: Found uniform instruction: " << *OI << "\n");
       }
     }
   }
 
   // Returns true if Ptr is the pointer operand of a memory access instruction
   // I, and I is known to not require scalarization.
   auto isVectorizedMemAccessUse = [&](Instruction *I, Value *Ptr) -> bool {
     return getPointerOperand(I) == Ptr && isUniformDecision(I, VF);
   };
 
   // For an instruction to be added into Worklist above, all its users inside
   // the loop should also be in Worklist. However, this condition cannot be
   // true for phi nodes that form a cyclic dependence. We must process phi
   // nodes separately. An induction variable will remain uniform if all users
   // of the induction variable and induction variable update remain uniform.
   // The code below handles both pointer and non-pointer induction variables.
   for (auto &Induction : *Legal->getInductionVars()) {
     auto *Ind = Induction.first;
     auto *IndUpdate = cast<Instruction>(Ind->getIncomingValueForBlock(Latch));
 
     // Determine if all users of the induction variable are uniform after
     // vectorization.
     auto UniformInd = llvm::all_of(Ind->users(), [&](User *U) -> bool {
       auto *I = cast<Instruction>(U);
       return I == IndUpdate || !TheLoop->contains(I) || Worklist.count(I) ||
              isVectorizedMemAccessUse(I, Ind);
     });
     if (!UniformInd)
       continue;
 
     // Determine if all users of the induction variable update instruction are
     // uniform after vectorization.
     auto UniformIndUpdate =
         llvm::all_of(IndUpdate->users(), [&](User *U) -> bool {
           auto *I = cast<Instruction>(U);
           return I == Ind || !TheLoop->contains(I) || Worklist.count(I) ||
                  isVectorizedMemAccessUse(I, IndUpdate);
         });
     if (!UniformIndUpdate)
       continue;
 
     // The induction variable and its update instruction will remain uniform.
     Worklist.insert(Ind);
     Worklist.insert(IndUpdate);
     DEBUG(dbgs() << "LV: Found uniform instruction: " << *Ind << "\n");
     DEBUG(dbgs() << "LV: Found uniform instruction: " << *IndUpdate << "\n");
   }
 
   Uniforms[VF].insert(Worklist.begin(), Worklist.end());
 }
 
 bool LoopVectorizationLegality::canVectorizeMemory() {
   LAI = &(*GetLAA)(*TheLoop);
   InterleaveInfo.setLAI(LAI);
   const OptimizationRemarkAnalysis *LAR = LAI->getReport();
   if (LAR) {
     ORE->emit([&]() {
       return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
                                         "loop not vectorized: ", *LAR);
     });
   }
   if (!LAI->canVectorizeMemory())
     return false;
 
   if (LAI->hasStoreToLoopInvariantAddress()) {
     ORE->emit(createMissedAnalysis("CantVectorizeStoreToLoopInvariantAddress")
               << "write to a loop invariant address could not be vectorized");
     DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n");
     return false;
   }
 
   Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
   PSE.addPredicate(LAI->getPSE().getUnionPredicate());
 
   return true;
 }
 
 bool LoopVectorizationLegality::isInductionPhi(const Value *V) {
   Value *In0 = const_cast<Value *>(V);
   PHINode *PN = dyn_cast_or_null<PHINode>(In0);
   if (!PN)
     return false;
 
   return Inductions.count(PN);
 }
 
 bool LoopVectorizationLegality::isCastedInductionVariable(const Value *V) {
   auto *Inst = dyn_cast<Instruction>(V);
   return (Inst && InductionCastsToIgnore.count(Inst));
 }
 
 bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
   return isInductionPhi(V) || isCastedInductionVariable(V);
 }
 
 bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) {
   return FirstOrderRecurrences.count(Phi);
 }
 
 bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
   return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
 }
 
 bool LoopVectorizationLegality::blockCanBePredicated(
     BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs) {
   const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
 
   for (Instruction &I : *BB) {
     // Check that we don't have a constant expression that can trap as operand.
     for (Value *Operand : I.operands()) {
       if (auto *C = dyn_cast<Constant>(Operand))
         if (C->canTrap())
           return false;
     }
     // We might be able to hoist the load.
     if (I.mayReadFromMemory()) {
       auto *LI = dyn_cast<LoadInst>(&I);
       if (!LI)
         return false;
       if (!SafePtrs.count(LI->getPointerOperand())) {
         if (isLegalMaskedLoad(LI->getType(), LI->getPointerOperand()) ||
             isLegalMaskedGather(LI->getType())) {
           MaskedOp.insert(LI);
           continue;
         }
         // !llvm.mem.parallel_loop_access implies if-conversion safety.
         if (IsAnnotatedParallel)
           continue;
         return false;
       }
     }
 
     if (I.mayWriteToMemory()) {
       auto *SI = dyn_cast<StoreInst>(&I);
       // We only support predication of stores in basic blocks with one
       // predecessor.
       if (!SI)
         return false;
 
       // Build a masked store if it is legal for the target.
       if (isLegalMaskedStore(SI->getValueOperand()->getType(),
                              SI->getPointerOperand()) ||
           isLegalMaskedScatter(SI->getValueOperand()->getType())) {
         MaskedOp.insert(SI);
         continue;
       }
 
       bool isSafePtr = (SafePtrs.count(SI->getPointerOperand()) != 0);
       bool isSinglePredecessor = SI->getParent()->getSinglePredecessor();
 
       if (++NumPredStores > NumberOfStoresToPredicate || !isSafePtr ||
           !isSinglePredecessor)
         return false;
     }
     if (I.mayThrow())
       return false;
   }
 
   return true;
 }
 
 void InterleavedAccessInfo::collectConstStrideAccesses(
     MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,
     const ValueToValueMap &Strides) {
   auto &DL = TheLoop->getHeader()->getModule()->getDataLayout();
 
   // Since it's desired that the load/store instructions be maintained in
   // "program order" for the interleaved access analysis, we have to visit the
   // blocks in the loop in reverse postorder (i.e., in a topological order).
   // Such an ordering will ensure that any load/store that may be executed
   // before a second load/store will precede the second load/store in
   // AccessStrideInfo.
   LoopBlocksDFS DFS(TheLoop);
   DFS.perform(LI);
   for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))
     for (auto &I : *BB) {
       auto *LI = dyn_cast<LoadInst>(&I);
       auto *SI = dyn_cast<StoreInst>(&I);
       if (!LI && !SI)
         continue;
 
       Value *Ptr = getPointerOperand(&I);
       // We don't check wrapping here because we don't know yet if Ptr will be
       // part of a full group or a group with gaps. Checking wrapping for all
       // pointers (even those that end up in groups with no gaps) will be overly
       // conservative. For full groups, wrapping should be ok since if we would
       // wrap around the address space we would do a memory access at nullptr
       // even without the transformation. The wrapping checks are therefore
       // deferred until after we've formed the interleaved groups.
       int64_t Stride = getPtrStride(PSE, Ptr, TheLoop, Strides,
                                     /*Assume=*/true, /*ShouldCheckWrap=*/false);
 
       const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
       PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
       uint64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
 
       // An alignment of 0 means target ABI alignment.
       unsigned Align = getMemInstAlignment(&I);
       if (!Align)
         Align = DL.getABITypeAlignment(PtrTy->getElementType());
 
       AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, Align);
     }
 }
 
 // Analyze interleaved accesses and collect them into interleaved load and
 // store groups.
 //
 // When generating code for an interleaved load group, we effectively hoist all
 // loads in the group to the location of the first load in program order. When
 // generating code for an interleaved store group, we sink all stores to the
 // location of the last store. This code motion can change the order of load
 // and store instructions and may break dependences.
 //
 // The code generation strategy mentioned above ensures that we won't violate
 // any write-after-read (WAR) dependences.
 //
 // E.g., for the WAR dependence:  a = A[i];      // (1)
 //                                A[i] = b;      // (2)
 //
 // The store group of (2) is always inserted at or below (2), and the load
 // group of (1) is always inserted at or above (1). Thus, the instructions will
 // never be reordered. All other dependences are checked to ensure the
 // correctness of the instruction reordering.
 //
 // The algorithm visits all memory accesses in the loop in bottom-up program
 // order. Program order is established by traversing the blocks in the loop in
 // reverse postorder when collecting the accesses.
 //
 // We visit the memory accesses in bottom-up order because it can simplify the
 // construction of store groups in the presence of write-after-write (WAW)
 // dependences.
 //
 // E.g., for the WAW dependence:  A[i] = a;      // (1)
 //                                A[i] = b;      // (2)
 //                                A[i + 1] = c;  // (3)
 //
 // We will first create a store group with (3) and (2). (1) can't be added to
 // this group because it and (2) are dependent. However, (1) can be grouped
 // with other accesses that may precede it in program order. Note that a
 // bottom-up order does not imply that WAW dependences should not be checked.
 void InterleavedAccessInfo::analyzeInterleaving(
     const ValueToValueMap &Strides) {
   DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n");
 
   // Holds all accesses with a constant stride.
   MapVector<Instruction *, StrideDescriptor> AccessStrideInfo;
   collectConstStrideAccesses(AccessStrideInfo, Strides);
 
   if (AccessStrideInfo.empty())
     return;
 
   // Collect the dependences in the loop.
   collectDependences();
 
   // Holds all interleaved store groups temporarily.
   SmallSetVector<InterleaveGroup *, 4> StoreGroups;
   // Holds all interleaved load groups temporarily.
   SmallSetVector<InterleaveGroup *, 4> LoadGroups;
 
   // Search in bottom-up program order for pairs of accesses (A and B) that can
   // form interleaved load or store groups. In the algorithm below, access A
   // precedes access B in program order. We initialize a group for B in the
   // outer loop of the algorithm, and then in the inner loop, we attempt to
   // insert each A into B's group if:
   //
   //  1. A and B have the same stride,
   //  2. A and B have the same memory object size, and
   //  3. A belongs in B's group according to its distance from B.
   //
   // Special care is taken to ensure group formation will not break any
   // dependences.
   for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend();
        BI != E; ++BI) {
     Instruction *B = BI->first;
     StrideDescriptor DesB = BI->second;
 
     // Initialize a group for B if it has an allowable stride. Even if we don't
     // create a group for B, we continue with the bottom-up algorithm to ensure
     // we don't break any of B's dependences.
     InterleaveGroup *Group = nullptr;
     if (isStrided(DesB.Stride)) {
       Group = getInterleaveGroup(B);
       if (!Group) {
         DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B << '\n');
         Group = createInterleaveGroup(B, DesB.Stride, DesB.Align);
       }
       if (B->mayWriteToMemory())
         StoreGroups.insert(Group);
       else
         LoadGroups.insert(Group);
     }
 
     for (auto AI = std::next(BI); AI != E; ++AI) {
       Instruction *A = AI->first;
       StrideDescriptor DesA = AI->second;
 
       // Our code motion strategy implies that we can't have dependences
       // between accesses in an interleaved group and other accesses located
       // between the first and last member of the group. Note that this also
       // means that a group can't have more than one member at a given offset.
       // The accesses in a group can have dependences with other accesses, but
       // we must ensure we don't extend the boundaries of the group such that
       // we encompass those dependent accesses.
       //
       // For example, assume we have the sequence of accesses shown below in a
       // stride-2 loop:
       //
       //  (1, 2) is a group | A[i]   = a;  // (1)
       //                    | A[i-1] = b;  // (2) |
       //                      A[i-3] = c;  // (3)
       //                      A[i]   = d;  // (4) | (2, 4) is not a group
       //
       // Because accesses (2) and (3) are dependent, we can group (2) with (1)
       // but not with (4). If we did, the dependent access (3) would be within
       // the boundaries of the (2, 4) group.
       if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI)) {
         // If a dependence exists and A is already in a group, we know that A
         // must be a store since A precedes B and WAR dependences are allowed.
         // Thus, A would be sunk below B. We release A's group to prevent this
         // illegal code motion. A will then be free to form another group with
         // instructions that precede it.
         if (isInterleaved(A)) {
           InterleaveGroup *StoreGroup = getInterleaveGroup(A);
           StoreGroups.remove(StoreGroup);
           releaseGroup(StoreGroup);
         }
 
         // If a dependence exists and A is not already in a group (or it was
         // and we just released it), B might be hoisted above A (if B is a
         // load) or another store might be sunk below A (if B is a store). In
         // either case, we can't add additional instructions to B's group. B
         // will only form a group with instructions that it precedes.
         break;
       }
 
       // At this point, we've checked for illegal code motion. If either A or B
       // isn't strided, there's nothing left to do.
       if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))
         continue;
 
       // Ignore A if it's already in a group or isn't the same kind of memory
       // operation as B.
       if (isInterleaved(A) || A->mayReadFromMemory() != B->mayReadFromMemory())
         continue;
 
       // Check rules 1 and 2. Ignore A if its stride or size is different from
       // that of B.
       if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)
         continue;
 
       // Ignore A if the memory object of A and B don't belong to the same
       // address space
       if (getMemInstAddressSpace(A) != getMemInstAddressSpace(B))
         continue;
 
       // Calculate the distance from A to B.
       const SCEVConstant *DistToB = dyn_cast<SCEVConstant>(
           PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev));
       if (!DistToB)
         continue;
       int64_t DistanceToB = DistToB->getAPInt().getSExtValue();
 
       // Check rule 3. Ignore A if its distance to B is not a multiple of the
       // size.
       if (DistanceToB % static_cast<int64_t>(DesB.Size))
         continue;
 
       // Ignore A if either A or B is in a predicated block. Although we
       // currently prevent group formation for predicated accesses, we may be
       // able to relax this limitation in the future once we handle more
       // complicated blocks.
       if (isPredicated(A->getParent()) || isPredicated(B->getParent()))
         continue;
 
       // The index of A is the index of B plus A's distance to B in multiples
       // of the size.
       int IndexA =
           Group->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size);
 
       // Try to insert A into B's group.
       if (Group->insertMember(A, IndexA, DesA.Align)) {
         DEBUG(dbgs() << "LV: Inserted:" << *A << '\n'
                      << "    into the interleave group with" << *B << '\n');
         InterleaveGroupMap[A] = Group;
 
         // Set the first load in program order as the insert position.
         if (A->mayReadFromMemory())
           Group->setInsertPos(A);
       }
     } // Iteration over A accesses.
   } // Iteration over B accesses.
 
   // Remove interleaved store groups with gaps.
   for (InterleaveGroup *Group : StoreGroups)
     if (Group->getNumMembers() != Group->getFactor()) {
       DEBUG(dbgs() << "LV: Invalidate candidate interleaved store group due "
                       "to gaps.\n");
       releaseGroup(Group);
     }
   // Remove interleaved groups with gaps (currently only loads) whose memory
   // accesses may wrap around. We have to revisit the getPtrStride analysis,
   // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does
   // not check wrapping (see documentation there).
   // FORNOW we use Assume=false;
   // TODO: Change to Assume=true but making sure we don't exceed the threshold
   // of runtime SCEV assumptions checks (thereby potentially failing to
   // vectorize altogether).
   // Additional optional optimizations:
   // TODO: If we are peeling the loop and we know that the first pointer doesn't
   // wrap then we can deduce that all pointers in the group don't wrap.
   // This means that we can forcefully peel the loop in order to only have to
   // check the first pointer for no-wrap. When we'll change to use Assume=true
   // we'll only need at most one runtime check per interleaved group.
   for (InterleaveGroup *Group : LoadGroups) {
     // Case 1: A full group. Can Skip the checks; For full groups, if the wide
     // load would wrap around the address space we would do a memory access at
     // nullptr even without the transformation.
     if (Group->getNumMembers() == Group->getFactor())
       continue;
 
     // Case 2: If first and last members of the group don't wrap this implies
     // that all the pointers in the group don't wrap.
     // So we check only group member 0 (which is always guaranteed to exist),
     // and group member Factor - 1; If the latter doesn't exist we rely on
     // peeling (if it is a non-reveresed accsess -- see Case 3).
     Value *FirstMemberPtr = getPointerOperand(Group->getMember(0));
     if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false,
                       /*ShouldCheckWrap=*/true)) {
       DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to "
                       "first group member potentially pointer-wrapping.\n");
       releaseGroup(Group);
       continue;
     }
     Instruction *LastMember = Group->getMember(Group->getFactor() - 1);
     if (LastMember) {
       Value *LastMemberPtr = getPointerOperand(LastMember);
       if (!getPtrStride(PSE, LastMemberPtr, TheLoop, Strides, /*Assume=*/false,
                         /*ShouldCheckWrap=*/true)) {
         DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to "
                         "last group member potentially pointer-wrapping.\n");
         releaseGroup(Group);
       }
     } else {
       // Case 3: A non-reversed interleaved load group with gaps: We need
       // to execute at least one scalar epilogue iteration. This will ensure
       // we don't speculatively access memory out-of-bounds. We only need
       // to look for a member at index factor - 1, since every group must have
       // a member at index zero.
       if (Group->isReverse()) {
         DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to "
                         "a reverse access with gaps.\n");
         releaseGroup(Group);
         continue;
       }
       DEBUG(dbgs() << "LV: Interleaved group requires epilogue iteration.\n");
       RequiresScalarEpilogue = true;
     }
   }
 }
 
 Optional<unsigned> LoopVectorizationCostModel::computeMaxVF(bool OptForSize) {
   if (!EnableCondStoresVectorization && Legal->getNumPredStores()) {
     ORE->emit(createMissedAnalysis("ConditionalStore")
               << "store that is conditionally executed prevents vectorization");
     DEBUG(dbgs() << "LV: No vectorization. There are conditional stores.\n");
     return None;
   }
 
   if (Legal->getRuntimePointerChecking()->Need && TTI.hasBranchDivergence()) {
     // TODO: It may by useful to do since it's still likely to be dynamically
     // uniform if the target can skip.
     DEBUG(dbgs() << "LV: Not inserting runtime ptr check for divergent target");
 
     ORE->emit(
       createMissedAnalysis("CantVersionLoopWithDivergentTarget")
       << "runtime pointer checks needed. Not enabled for divergent target");
 
     return None;
   }
 
   unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
   if (!OptForSize) // Remaining checks deal with scalar loop when OptForSize.
     return computeFeasibleMaxVF(OptForSize, TC);
 
   if (Legal->getRuntimePointerChecking()->Need) {
     ORE->emit(createMissedAnalysis("CantVersionLoopWithOptForSize")
               << "runtime pointer checks needed. Enable vectorization of this "
                  "loop with '#pragma clang loop vectorize(enable)' when "
                  "compiling with -Os/-Oz");
     DEBUG(dbgs()
           << "LV: Aborting. Runtime ptr check is required with -Os/-Oz.\n");
     return None;
   }
 
   // If we optimize the program for size, avoid creating the tail loop.
   DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n');
 
   // If we don't know the precise trip count, don't try to vectorize.
   if (TC < 2) {
     ORE->emit(
         createMissedAnalysis("UnknownLoopCountComplexCFG")
         << "unable to calculate the loop count due to complex control flow");
     DEBUG(dbgs() << "LV: Aborting. A tail loop is required with -Os/-Oz.\n");
     return None;
   }
 
   unsigned MaxVF = computeFeasibleMaxVF(OptForSize, TC);
 
   if (TC % MaxVF != 0) {
     // If the trip count that we found modulo the vectorization factor is not
     // zero then we require a tail.
     // FIXME: look for a smaller MaxVF that does divide TC rather than give up.
     // FIXME: return None if loop requiresScalarEpilog(<MaxVF>), or look for a
     //        smaller MaxVF that does not require a scalar epilog.
 
     ORE->emit(createMissedAnalysis("NoTailLoopWithOptForSize")
               << "cannot optimize for size and vectorize at the "
                  "same time. Enable vectorization of this loop "
                  "with '#pragma clang loop vectorize(enable)' "
                  "when compiling with -Os/-Oz");
     DEBUG(dbgs() << "LV: Aborting. A tail loop is required with -Os/-Oz.\n");
     return None;
   }
 
   return MaxVF;
 }
 
 unsigned
 LoopVectorizationCostModel::computeFeasibleMaxVF(bool OptForSize,
                                                  unsigned ConstTripCount) {
   MinBWs = computeMinimumValueSizes(TheLoop->getBlocks(), *DB, &TTI);
   unsigned SmallestType, WidestType;
   std::tie(SmallestType, WidestType) = getSmallestAndWidestTypes();
   unsigned WidestRegister = TTI.getRegisterBitWidth(true);
 
   // Get the maximum safe dependence distance in bits computed by LAA.
   // It is computed by MaxVF * sizeOf(type) * 8, where type is taken from
   // the memory accesses that is most restrictive (involved in the smallest
   // dependence distance).
   unsigned MaxSafeRegisterWidth = Legal->getMaxSafeRegisterWidth();
 
   WidestRegister = std::min(WidestRegister, MaxSafeRegisterWidth);
 
   unsigned MaxVectorSize = WidestRegister / WidestType;
 
   DEBUG(dbgs() << "LV: The Smallest and Widest types: " << SmallestType << " / "
                << WidestType << " bits.\n");
   DEBUG(dbgs() << "LV: The Widest register safe to use is: " << WidestRegister
                << " bits.\n");
 
   assert(MaxVectorSize <= 64 && "Did not expect to pack so many elements"
                                 " into one vector!");
   if (MaxVectorSize == 0) {
     DEBUG(dbgs() << "LV: The target has no vector registers.\n");
     MaxVectorSize = 1;
     return MaxVectorSize;
   } else if (ConstTripCount && ConstTripCount < MaxVectorSize &&
              isPowerOf2_32(ConstTripCount)) {
     // We need to clamp the VF to be the ConstTripCount. There is no point in
     // choosing a higher viable VF as done in the loop below.
     DEBUG(dbgs() << "LV: Clamping the MaxVF to the constant trip count: "
                  << ConstTripCount << "\n");
     MaxVectorSize = ConstTripCount;
     return MaxVectorSize;
   }
 
   unsigned MaxVF = MaxVectorSize;
   if (MaximizeBandwidth && !OptForSize) {
     // Collect all viable vectorization factors larger than the default MaxVF
     // (i.e. MaxVectorSize).
     SmallVector<unsigned, 8> VFs;
     unsigned NewMaxVectorSize = WidestRegister / SmallestType;
     for (unsigned VS = MaxVectorSize * 2; VS <= NewMaxVectorSize; VS *= 2)
       VFs.push_back(VS);
 
     // For each VF calculate its register usage.
     auto RUs = calculateRegisterUsage(VFs);
 
     // Select the largest VF which doesn't require more registers than existing
     // ones.
     unsigned TargetNumRegisters = TTI.getNumberOfRegisters(true);
     for (int i = RUs.size() - 1; i >= 0; --i) {
       if (RUs[i].MaxLocalUsers <= TargetNumRegisters) {
         MaxVF = VFs[i];
         break;
       }
     }
   }
   return MaxVF;
 }
 
 LoopVectorizationCostModel::VectorizationFactor
 LoopVectorizationCostModel::selectVectorizationFactor(unsigned MaxVF) {
   float Cost = expectedCost(1).first;
 #ifndef NDEBUG
   const float ScalarCost = Cost;
 #endif /* NDEBUG */
   unsigned Width = 1;
   DEBUG(dbgs() << "LV: Scalar loop costs: " << (int)ScalarCost << ".\n");
 
   bool ForceVectorization = Hints->getForce() == LoopVectorizeHints::FK_Enabled;
   // Ignore scalar width, because the user explicitly wants vectorization.
   if (ForceVectorization && MaxVF > 1) {
     Width = 2;
     Cost = expectedCost(Width).first / (float)Width;
   }
 
   for (unsigned i = 2; i <= MaxVF; i *= 2) {
     // Notice that the vector loop needs to be executed less times, so
     // we need to divide the cost of the vector loops by the width of
     // the vector elements.
     VectorizationCostTy C = expectedCost(i);
     float VectorCost = C.first / (float)i;
     DEBUG(dbgs() << "LV: Vector loop of width " << i
                  << " costs: " << (int)VectorCost << ".\n");
     if (!C.second && !ForceVectorization) {
       DEBUG(
           dbgs() << "LV: Not considering vector loop of width " << i
                  << " because it will not generate any vector instructions.\n");
       continue;
     }
     if (VectorCost < Cost) {
       Cost = VectorCost;
       Width = i;
     }
   }
 
   DEBUG(if (ForceVectorization && Width > 1 && Cost >= ScalarCost) dbgs()
         << "LV: Vectorization seems to be not beneficial, "
         << "but was forced by a user.\n");
   DEBUG(dbgs() << "LV: Selecting VF: " << Width << ".\n");
   VectorizationFactor Factor = {Width, (unsigned)(Width * Cost)};
   return Factor;
 }
 
 std::pair<unsigned, unsigned>
 LoopVectorizationCostModel::getSmallestAndWidestTypes() {
   unsigned MinWidth = -1U;
   unsigned MaxWidth = 8;
   const DataLayout &DL = TheFunction->getParent()->getDataLayout();
 
   // For each block.
   for (BasicBlock *BB : TheLoop->blocks()) {
     // For each instruction in the loop.
     for (Instruction &I : *BB) {
       Type *T = I.getType();
 
       // Skip ignored values.
       if (ValuesToIgnore.count(&I))
         continue;
 
       // Only examine Loads, Stores and PHINodes.
       if (!isa<LoadInst>(I) && !isa<StoreInst>(I) && !isa<PHINode>(I))
         continue;
 
       // Examine PHI nodes that are reduction variables. Update the type to
       // account for the recurrence type.
       if (auto *PN = dyn_cast<PHINode>(&I)) {
         if (!Legal->isReductionVariable(PN))
           continue;
         RecurrenceDescriptor RdxDesc = (*Legal->getReductionVars())[PN];
         T = RdxDesc.getRecurrenceType();
       }
 
       // Examine the stored values.
       if (auto *ST = dyn_cast<StoreInst>(&I))
         T = ST->getValueOperand()->getType();
 
       // Ignore loaded pointer types and stored pointer types that are not
       // vectorizable.
       //
       // FIXME: The check here attempts to predict whether a load or store will
       //        be vectorized. We only know this for certain after a VF has
       //        been selected. Here, we assume that if an access can be
       //        vectorized, it will be. We should also look at extending this
       //        optimization to non-pointer types.
       //
       if (T->isPointerTy() && !isConsecutiveLoadOrStore(&I) &&
           !Legal->isAccessInterleaved(&I) && !Legal->isLegalGatherOrScatter(&I))
         continue;
 
       MinWidth = std::min(MinWidth,
                           (unsigned)DL.getTypeSizeInBits(T->getScalarType()));
       MaxWidth = std::max(MaxWidth,
                           (unsigned)DL.getTypeSizeInBits(T->getScalarType()));
     }
   }
 
   return {MinWidth, MaxWidth};
 }
 
 unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize,
                                                            unsigned VF,
                                                            unsigned LoopCost) {
   // -- The interleave heuristics --
   // We interleave the loop in order to expose ILP and reduce the loop overhead.
   // There are many micro-architectural considerations that we can't predict
   // at this level. For example, frontend pressure (on decode or fetch) due to
   // code size, or the number and capabilities of the execution ports.
   //
   // We use the following heuristics to select the interleave count:
   // 1. If the code has reductions, then we interleave to break the cross
   // iteration dependency.
   // 2. If the loop is really small, then we interleave to reduce the loop
   // overhead.
   // 3. We don't interleave if we think that we will spill registers to memory
   // due to the increased register pressure.
 
   // When we optimize for size, we don't interleave.
   if (OptForSize)
     return 1;
 
   // We used the distance for the interleave count.
   if (Legal->getMaxSafeDepDistBytes() != -1U)
     return 1;
 
   // Do not interleave loops with a relatively small trip count.
   unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
   if (TC > 1 && TC < TinyTripCountInterleaveThreshold)
     return 1;
 
   unsigned TargetNumRegisters = TTI.getNumberOfRegisters(VF > 1);
   DEBUG(dbgs() << "LV: The target has " << TargetNumRegisters
                << " registers\n");
 
   if (VF == 1) {
     if (ForceTargetNumScalarRegs.getNumOccurrences() > 0)
       TargetNumRegisters = ForceTargetNumScalarRegs;
   } else {
     if (ForceTargetNumVectorRegs.getNumOccurrences() > 0)
       TargetNumRegisters = ForceTargetNumVectorRegs;
   }
 
   RegisterUsage R = calculateRegisterUsage({VF})[0];
   // We divide by these constants so assume that we have at least one
   // instruction that uses at least one register.
   R.MaxLocalUsers = std::max(R.MaxLocalUsers, 1U);
   R.NumInstructions = std::max(R.NumInstructions, 1U);
 
   // We calculate the interleave count using the following formula.
   // Subtract the number of loop invariants from the number of available
   // registers. These registers are used by all of the interleaved instances.
   // Next, divide the remaining registers by the number of registers that is
   // required by the loop, in order to estimate how many parallel instances
   // fit without causing spills. All of this is rounded down if necessary to be
   // a power of two. We want power of two interleave count to simplify any
   // addressing operations or alignment considerations.
   unsigned IC = PowerOf2Floor((TargetNumRegisters - R.LoopInvariantRegs) /
                               R.MaxLocalUsers);
 
   // Don't count the induction variable as interleaved.
   if (EnableIndVarRegisterHeur)
     IC = PowerOf2Floor((TargetNumRegisters - R.LoopInvariantRegs - 1) /
                        std::max(1U, (R.MaxLocalUsers - 1)));
 
   // Clamp the interleave ranges to reasonable counts.
   unsigned MaxInterleaveCount = TTI.getMaxInterleaveFactor(VF);
 
   // Check if the user has overridden the max.
   if (VF == 1) {
     if (ForceTargetMaxScalarInterleaveFactor.getNumOccurrences() > 0)
       MaxInterleaveCount = ForceTargetMaxScalarInterleaveFactor;
   } else {
     if (ForceTargetMaxVectorInterleaveFactor.getNumOccurrences() > 0)
       MaxInterleaveCount = ForceTargetMaxVectorInterleaveFactor;
   }
 
   // If we did not calculate the cost for VF (because the user selected the VF)
   // then we calculate the cost of VF here.
   if (LoopCost == 0)
     LoopCost = expectedCost(VF).first;
 
   // Clamp the calculated IC to be between the 1 and the max interleave count
   // that the target allows.
   if (IC > MaxInterleaveCount)
     IC = MaxInterleaveCount;
   else if (IC < 1)
     IC = 1;
 
   // Interleave if we vectorized this loop and there is a reduction that could
   // benefit from interleaving.
   if (VF > 1 && !Legal->getReductionVars()->empty()) {
     DEBUG(dbgs() << "LV: Interleaving because of reductions.\n");
     return IC;
   }
 
   // Note that if we've already vectorized the loop we will have done the
   // runtime check and so interleaving won't require further checks.
   bool InterleavingRequiresRuntimePointerCheck =
       (VF == 1 && Legal->getRuntimePointerChecking()->Need);
 
   // We want to interleave small loops in order to reduce the loop overhead and
   // potentially expose ILP opportunities.
   DEBUG(dbgs() << "LV: Loop cost is " << LoopCost << '\n');
   if (!InterleavingRequiresRuntimePointerCheck && LoopCost < SmallLoopCost) {
     // We assume that the cost overhead is 1 and we use the cost model
     // to estimate the cost of the loop and interleave until the cost of the
     // loop overhead is about 5% of the cost of the loop.
     unsigned SmallIC =
         std::min(IC, (unsigned)PowerOf2Floor(SmallLoopCost / LoopCost));
 
     // Interleave until store/load ports (estimated by max interleave count) are
     // saturated.
     unsigned NumStores = Legal->getNumStores();
     unsigned NumLoads = Legal->getNumLoads();
     unsigned StoresIC = IC / (NumStores ? NumStores : 1);
     unsigned LoadsIC = IC / (NumLoads ? NumLoads : 1);
 
     // If we have a scalar reduction (vector reductions are already dealt with
     // by this point), we can increase the critical path length if the loop
     // we're interleaving is inside another loop. Limit, by default to 2, so the
     // critical path only gets increased by one reduction operation.
     if (!Legal->getReductionVars()->empty() && TheLoop->getLoopDepth() > 1) {
       unsigned F = static_cast<unsigned>(MaxNestedScalarReductionIC);
       SmallIC = std::min(SmallIC, F);
       StoresIC = std::min(StoresIC, F);
       LoadsIC = std::min(LoadsIC, F);
     }
 
     if (EnableLoadStoreRuntimeInterleave &&
         std::max(StoresIC, LoadsIC) > SmallIC) {
       DEBUG(dbgs() << "LV: Interleaving to saturate store or load ports.\n");
       return std::max(StoresIC, LoadsIC);
     }
 
     DEBUG(dbgs() << "LV: Interleaving to reduce branch cost.\n");
     return SmallIC;
   }
 
   // Interleave if this is a large loop (small loops are already dealt with by
   // this point) that could benefit from interleaving.
   bool HasReductions = !Legal->getReductionVars()->empty();
   if (TTI.enableAggressiveInterleaving(HasReductions)) {
     DEBUG(dbgs() << "LV: Interleaving to expose ILP.\n");
     return IC;
   }
 
   DEBUG(dbgs() << "LV: Not Interleaving.\n");
   return 1;
 }
 
 SmallVector<LoopVectorizationCostModel::RegisterUsage, 8>
 LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) {
   // This function calculates the register usage by measuring the highest number
   // of values that are alive at a single location. Obviously, this is a very
   // rough estimation. We scan the loop in a topological order in order and
   // assign a number to each instruction. We use RPO to ensure that defs are
   // met before their users. We assume that each instruction that has in-loop
   // users starts an interval. We record every time that an in-loop value is
   // used, so we have a list of the first and last occurrences of each
   // instruction. Next, we transpose this data structure into a multi map that
   // holds the list of intervals that *end* at a specific location. This multi
   // map allows us to perform a linear search. We scan the instructions linearly
   // and record each time that a new interval starts, by placing it in a set.
   // If we find this value in the multi-map then we remove it from the set.
   // The max register usage is the maximum size of the set.
   // We also search for instructions that are defined outside the loop, but are
   // used inside the loop. We need this number separately from the max-interval
   // usage number because when we unroll, loop-invariant values do not take
   // more register.
   LoopBlocksDFS DFS(TheLoop);
   DFS.perform(LI);
 
   RegisterUsage RU;
   RU.NumInstructions = 0;
 
   // Each 'key' in the map opens a new interval. The values
   // of the map are the index of the 'last seen' usage of the
   // instruction that is the key.
   using IntervalMap = DenseMap<Instruction *, unsigned>;
 
   // Maps instruction to its index.
   DenseMap<unsigned, Instruction *> IdxToInstr;
   // Marks the end of each interval.
   IntervalMap EndPoint;
   // Saves the list of instruction indices that are used in the loop.
   SmallSet<Instruction *, 8> Ends;
   // Saves the list of values that are used in the loop but are
   // defined outside the loop, such as arguments and constants.
   SmallPtrSet<Value *, 8> LoopInvariants;
 
   unsigned Index = 0;
   for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) {
     RU.NumInstructions += BB->size();
     for (Instruction &I : *BB) {
       IdxToInstr[Index++] = &I;
 
       // Save the end location of each USE.
       for (Value *U : I.operands()) {
         auto *Instr = dyn_cast<Instruction>(U);
 
         // Ignore non-instruction values such as arguments, constants, etc.
         if (!Instr)
           continue;
 
         // If this instruction is outside the loop then record it and continue.
         if (!TheLoop->contains(Instr)) {
           LoopInvariants.insert(Instr);
           continue;
         }
 
         // Overwrite previous end points.
         EndPoint[Instr] = Index;
         Ends.insert(Instr);
       }
     }
   }
 
   // Saves the list of intervals that end with the index in 'key'.
   using InstrList = SmallVector<Instruction *, 2>;
   DenseMap<unsigned, InstrList> TransposeEnds;
 
   // Transpose the EndPoints to a list of values that end at each index.
   for (auto &Interval : EndPoint)
     TransposeEnds[Interval.second].push_back(Interval.first);
 
   SmallSet<Instruction *, 8> OpenIntervals;
 
   // Get the size of the widest register.
   unsigned MaxSafeDepDist = -1U;
   if (Legal->getMaxSafeDepDistBytes() != -1U)
     MaxSafeDepDist = Legal->getMaxSafeDepDistBytes() * 8;
   unsigned WidestRegister =
       std::min(TTI.getRegisterBitWidth(true), MaxSafeDepDist);
   const DataLayout &DL = TheFunction->getParent()->getDataLayout();
 
   SmallVector<RegisterUsage, 8> RUs(VFs.size());
   SmallVector<unsigned, 8> MaxUsages(VFs.size(), 0);
 
   DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n");
 
   // A lambda that gets the register usage for the given type and VF.
   auto GetRegUsage = [&DL, WidestRegister](Type *Ty, unsigned VF) {
     if (Ty->isTokenTy())
       return 0U;
     unsigned TypeSize = DL.getTypeSizeInBits(Ty->getScalarType());
     return std::max<unsigned>(1, VF * TypeSize / WidestRegister);
   };
 
   for (unsigned int i = 0; i < Index; ++i) {
     Instruction *I = IdxToInstr[i];
 
     // Remove all of the instructions that end at this location.
     InstrList &List = TransposeEnds[i];
     for (Instruction *ToRemove : List)
       OpenIntervals.erase(ToRemove);
 
     // Ignore instructions that are never used within the loop.
     if (!Ends.count(I))
       continue;
 
     // Skip ignored values.
     if (ValuesToIgnore.count(I))
       continue;
 
     // For each VF find the maximum usage of registers.
     for (unsigned j = 0, e = VFs.size(); j < e; ++j) {
       if (VFs[j] == 1) {
         MaxUsages[j] = std::max(MaxUsages[j], OpenIntervals.size());
         continue;
       }
       collectUniformsAndScalars(VFs[j]);
       // Count the number of live intervals.
       unsigned RegUsage = 0;
       for (auto Inst : OpenIntervals) {
         // Skip ignored values for VF > 1.
         if (VecValuesToIgnore.count(Inst) ||
             isScalarAfterVectorization(Inst, VFs[j]))
           continue;
         RegUsage += GetRegUsage(Inst->getType(), VFs[j]);
       }
       MaxUsages[j] = std::max(MaxUsages[j], RegUsage);
     }
 
     DEBUG(dbgs() << "LV(REG): At #" << i << " Interval # "
                  << OpenIntervals.size() << '\n');
 
     // Add the current instruction to the list of open intervals.
     OpenIntervals.insert(I);
   }
 
   for (unsigned i = 0, e = VFs.size(); i < e; ++i) {
     unsigned Invariant = 0;
     if (VFs[i] == 1)
       Invariant = LoopInvariants.size();
     else {
       for (auto Inst : LoopInvariants)
         Invariant += GetRegUsage(Inst->getType(), VFs[i]);
     }
 
     DEBUG(dbgs() << "LV(REG): VF = " << VFs[i] << '\n');
     DEBUG(dbgs() << "LV(REG): Found max usage: " << MaxUsages[i] << '\n');
     DEBUG(dbgs() << "LV(REG): Found invariant usage: " << Invariant << '\n');
     DEBUG(dbgs() << "LV(REG): LoopSize: " << RU.NumInstructions << '\n');
 
     RU.LoopInvariantRegs = Invariant;
     RU.MaxLocalUsers = MaxUsages[i];
     RUs[i] = RU;
   }
 
   return RUs;
 }
 
 void LoopVectorizationCostModel::collectInstsToScalarize(unsigned VF) {
   // If we aren't vectorizing the loop, or if we've already collected the
   // instructions to scalarize, there's nothing to do. Collection may already
   // have occurred if we have a user-selected VF and are now computing the
   // expected cost for interleaving.
   if (VF < 2 || InstsToScalarize.count(VF))
     return;
 
   // Initialize a mapping for VF in InstsToScalalarize. If we find that it's
   // not profitable to scalarize any instructions, the presence of VF in the
   // map will indicate that we've analyzed it already.
   ScalarCostsTy &ScalarCostsVF = InstsToScalarize[VF];
 
   // Find all the instructions that are scalar with predication in the loop and
   // determine if it would be better to not if-convert the blocks they are in.
   // If so, we also record the instructions to scalarize.
   for (BasicBlock *BB : TheLoop->blocks()) {
     if (!Legal->blockNeedsPredication(BB))
       continue;
     for (Instruction &I : *BB)
       if (Legal->isScalarWithPredication(&I)) {
         ScalarCostsTy ScalarCosts;
         if (computePredInstDiscount(&I, ScalarCosts, VF) >= 0)
           ScalarCostsVF.insert(ScalarCosts.begin(), ScalarCosts.end());
 
         // Remember that BB will remain after vectorization.
         PredicatedBBsAfterVectorization.insert(BB);
       }
   }
 }
 
 int LoopVectorizationCostModel::computePredInstDiscount(
     Instruction *PredInst, DenseMap<Instruction *, unsigned> &ScalarCosts,
     unsigned VF) {
   assert(!isUniformAfterVectorization(PredInst, VF) &&
          "Instruction marked uniform-after-vectorization will be predicated");
 
   // Initialize the discount to zero, meaning that the scalar version and the
   // vector version cost the same.
   int Discount = 0;
 
   // Holds instructions to analyze. The instructions we visit are mapped in
   // ScalarCosts. Those instructions are the ones that would be scalarized if
   // we find that the scalar version costs less.
   SmallVector<Instruction *, 8> Worklist;
 
   // Returns true if the given instruction can be scalarized.
   auto canBeScalarized = [&](Instruction *I) -> bool {
     // We only attempt to scalarize instructions forming a single-use chain
     // from the original predicated block that would otherwise be vectorized.
     // Although not strictly necessary, we give up on instructions we know will
     // already be scalar to avoid traversing chains that are unlikely to be
     // beneficial.
     if (!I->hasOneUse() || PredInst->getParent() != I->getParent() ||
         isScalarAfterVectorization(I, VF))
       return false;
 
     // If the instruction is scalar with predication, it will be analyzed
     // separately. We ignore it within the context of PredInst.
     if (Legal->isScalarWithPredication(I))
       return false;
 
     // If any of the instruction's operands are uniform after vectorization,
     // the instruction cannot be scalarized. This prevents, for example, a
     // masked load from being scalarized.
     //
     // We assume we will only emit a value for lane zero of an instruction
     // marked uniform after vectorization, rather than VF identical values.
     // Thus, if we scalarize an instruction that uses a uniform, we would
     // create uses of values corresponding to the lanes we aren't emitting code
     // for. This behavior can be changed by allowing getScalarValue to clone
     // the lane zero values for uniforms rather than asserting.
     for (Use &U : I->operands())
       if (auto *J = dyn_cast<Instruction>(U.get()))
         if (isUniformAfterVectorization(J, VF))
           return false;
 
     // Otherwise, we can scalarize the instruction.
     return true;
   };
 
   // Returns true if an operand that cannot be scalarized must be extracted
   // from a vector. We will account for this scalarization overhead below. Note
   // that the non-void predicated instructions are placed in their own blocks,
   // and their return values are inserted into vectors. Thus, an extract would
   // still be required.
   auto needsExtract = [&](Instruction *I) -> bool {
     return TheLoop->contains(I) && !isScalarAfterVectorization(I, VF);
   };
 
   // Compute the expected cost discount from scalarizing the entire expression
   // feeding the predicated instruction. We currently only consider expressions
   // that are single-use instruction chains.
   Worklist.push_back(PredInst);
   while (!Worklist.empty()) {
     Instruction *I = Worklist.pop_back_val();
 
     // If we've already analyzed the instruction, there's nothing to do.
     if (ScalarCosts.count(I))
       continue;
 
     // Compute the cost of the vector instruction. Note that this cost already
     // includes the scalarization overhead of the predicated instruction.
     unsigned VectorCost = getInstructionCost(I, VF).first;
 
     // Compute the cost of the scalarized instruction. This cost is the cost of
     // the instruction as if it wasn't if-converted and instead remained in the
     // predicated block. We will scale this cost by block probability after
     // computing the scalarization overhead.
     unsigned ScalarCost = VF * getInstructionCost(I, 1).first;
 
     // Compute the scalarization overhead of needed insertelement instructions
     // and phi nodes.
     if (Legal->isScalarWithPredication(I) && !I->getType()->isVoidTy()) {
       ScalarCost += TTI.getScalarizationOverhead(ToVectorTy(I->getType(), VF),
                                                  true, false);
       ScalarCost += VF * TTI.getCFInstrCost(Instruction::PHI);
     }
 
     // Compute the scalarization overhead of needed extractelement
     // instructions. For each of the instruction's operands, if the operand can
     // be scalarized, add it to the worklist; otherwise, account for the
     // overhead.
     for (Use &U : I->operands())
       if (auto *J = dyn_cast<Instruction>(U.get())) {
         assert(VectorType::isValidElementType(J->getType()) &&
                "Instruction has non-scalar type");
         if (canBeScalarized(J))
           Worklist.push_back(J);
         else if (needsExtract(J))
           ScalarCost += TTI.getScalarizationOverhead(
                               ToVectorTy(J->getType(),VF), false, true);
       }
 
     // Scale the total scalar cost by block probability.
     ScalarCost /= getReciprocalPredBlockProb();
 
     // Compute the discount. A non-negative discount means the vector version
     // of the instruction costs more, and scalarizing would be beneficial.
     Discount += VectorCost - ScalarCost;
     ScalarCosts[I] = ScalarCost;
   }
 
   return Discount;
 }
 
 LoopVectorizationCostModel::VectorizationCostTy
 LoopVectorizationCostModel::expectedCost(unsigned VF) {
   VectorizationCostTy Cost;
 
   // For each block.
   for (BasicBlock *BB : TheLoop->blocks()) {
     VectorizationCostTy BlockCost;
 
     // For each instruction in the old loop.
     for (Instruction &I : *BB) {
       // Skip dbg intrinsics.
       if (isa<DbgInfoIntrinsic>(I))
         continue;
 
       // Skip ignored values.
       if (ValuesToIgnore.count(&I) ||
           (VF > 1 && VecValuesToIgnore.count(&I)))
         continue;
 
       VectorizationCostTy C = getInstructionCost(&I, VF);
 
       // Check if we should override the cost.
       if (ForceTargetInstructionCost.getNumOccurrences() > 0)
         C.first = ForceTargetInstructionCost;
 
       BlockCost.first += C.first;
       BlockCost.second |= C.second;
       DEBUG(dbgs() << "LV: Found an estimated cost of " << C.first << " for VF "
                    << VF << " For instruction: " << I << '\n');
     }
 
     // If we are vectorizing a predicated block, it will have been
     // if-converted. This means that the block's instructions (aside from
     // stores and instructions that may divide by zero) will now be
     // unconditionally executed. For the scalar case, we may not always execute
     // the predicated block. Thus, scale the block's cost by the probability of
     // executing it.
     if (VF == 1 && Legal->blockNeedsPredication(BB))
       BlockCost.first /= getReciprocalPredBlockProb();
 
     Cost.first += BlockCost.first;
     Cost.second |= BlockCost.second;
   }
 
   return Cost;
 }
 
 /// \brief Gets Address Access SCEV after verifying that the access pattern
 /// is loop invariant except the induction variable dependence.
 ///
 /// This SCEV can be sent to the Target in order to estimate the address
 /// calculation cost.
 static const SCEV *getAddressAccessSCEV(
               Value *Ptr,
               LoopVectorizationLegality *Legal,
               PredicatedScalarEvolution &PSE,
               const Loop *TheLoop) {
 
   auto *Gep = dyn_cast<GetElementPtrInst>(Ptr);
   if (!Gep)
     return nullptr;
 
   // We are looking for a gep with all loop invariant indices except for one
   // which should be an induction variable.
   auto SE = PSE.getSE();
   unsigned NumOperands = Gep->getNumOperands();
   for (unsigned i = 1; i < NumOperands; ++i) {
     Value *Opd = Gep->getOperand(i);
     if (!SE->isLoopInvariant(SE->getSCEV(Opd), TheLoop) &&
         !Legal->isInductionVariable(Opd))
       return nullptr;
   }
 
   // Now we know we have a GEP ptr, %inv, %ind, %inv. return the Ptr SCEV.
   return PSE.getSCEV(Ptr);
 }
 
 static bool isStrideMul(Instruction *I, LoopVectorizationLegality *Legal) {
   return Legal->hasStride(I->getOperand(0)) ||
          Legal->hasStride(I->getOperand(1));
 }
 
 unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
                                                                  unsigned VF) {
   Type *ValTy = getMemInstValueType(I);
   auto SE = PSE.getSE();
 
   unsigned Alignment = getMemInstAlignment(I);
   unsigned AS = getMemInstAddressSpace(I);
   Value *Ptr = getPointerOperand(I);
   Type *PtrTy = ToVectorTy(Ptr->getType(), VF);
 
   // Figure out whether the access is strided and get the stride value
   // if it's known in compile time
   const SCEV *PtrSCEV = getAddressAccessSCEV(Ptr, Legal, PSE, TheLoop);
 
   // Get the cost of the scalar memory instruction and address computation.
   unsigned Cost = VF * TTI.getAddressComputationCost(PtrTy, SE, PtrSCEV);
 
   Cost += VF *
           TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(), Alignment,
                               AS, I);
 
   // Get the overhead of the extractelement and insertelement instructions
   // we might create due to scalarization.
   Cost += getScalarizationOverhead(I, VF, TTI);
 
   // If we have a predicated store, it may not be executed for each vector
   // lane. Scale the cost by the probability of executing the predicated
   // block.
   if (Legal->isScalarWithPredication(I))
     Cost /= getReciprocalPredBlockProb();
 
   return Cost;
 }
 
 unsigned LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
                                                              unsigned VF) {
   Type *ValTy = getMemInstValueType(I);
   Type *VectorTy = ToVectorTy(ValTy, VF);
   unsigned Alignment = getMemInstAlignment(I);
   Value *Ptr = getPointerOperand(I);
   unsigned AS = getMemInstAddressSpace(I);
   int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
 
   assert((ConsecutiveStride == 1 || ConsecutiveStride == -1) &&
          "Stride should be 1 or -1 for consecutive memory access");
   unsigned Cost = 0;
   if (Legal->isMaskRequired(I))
     Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);
   else
     Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS, I);
 
   bool Reverse = ConsecutiveStride < 0;
   if (Reverse)
     Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, 0);
   return Cost;
 }
 
 unsigned LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I,
                                                          unsigned VF) {
   LoadInst *LI = cast<LoadInst>(I);
   Type *ValTy = LI->getType();
   Type *VectorTy = ToVectorTy(ValTy, VF);
   unsigned Alignment = LI->getAlignment();
   unsigned AS = LI->getPointerAddressSpace();
 
   return TTI.getAddressComputationCost(ValTy) +
          TTI.getMemoryOpCost(Instruction::Load, ValTy, Alignment, AS) +
          TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VectorTy);
 }
 
 unsigned LoopVectorizationCostModel::getGatherScatterCost(Instruction *I,
                                                           unsigned VF) {
   Type *ValTy = getMemInstValueType(I);
   Type *VectorTy = ToVectorTy(ValTy, VF);
   unsigned Alignment = getMemInstAlignment(I);
   Value *Ptr = getPointerOperand(I);
 
   return TTI.getAddressComputationCost(VectorTy) +
          TTI.getGatherScatterOpCost(I->getOpcode(), VectorTy, Ptr,
                                     Legal->isMaskRequired(I), Alignment);
 }
 
 unsigned LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
                                                             unsigned VF) {
   Type *ValTy = getMemInstValueType(I);
   Type *VectorTy = ToVectorTy(ValTy, VF);
   unsigned AS = getMemInstAddressSpace(I);
 
   auto Group = Legal->getInterleavedAccessGroup(I);
   assert(Group && "Fail to get an interleaved access group.");
 
   unsigned InterleaveFactor = Group->getFactor();
   Type *WideVecTy = VectorType::get(ValTy, VF * InterleaveFactor);
 
   // Holds the indices of existing members in an interleaved load group.
   // An interleaved store group doesn't need this as it doesn't allow gaps.
   SmallVector<unsigned, 4> Indices;
   if (isa<LoadInst>(I)) {
     for (unsigned i = 0; i < InterleaveFactor; i++)
       if (Group->getMember(i))
         Indices.push_back(i);
   }
 
   // Calculate the cost of the whole interleaved group.
   unsigned Cost = TTI.getInterleavedMemoryOpCost(I->getOpcode(), WideVecTy,
                                                  Group->getFactor(), Indices,
                                                  Group->getAlignment(), AS);
 
   if (Group->isReverse())
     Cost += Group->getNumMembers() *
             TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, 0);
   return Cost;
 }
 
 unsigned LoopVectorizationCostModel::getMemoryInstructionCost(Instruction *I,
                                                               unsigned VF) {
   // Calculate scalar cost only. Vectorization cost should be ready at this
   // moment.
   if (VF == 1) {
     Type *ValTy = getMemInstValueType(I);
     unsigned Alignment = getMemInstAlignment(I);
     unsigned AS = getMemInstAddressSpace(I);
 
     return TTI.getAddressComputationCost(ValTy) +
            TTI.getMemoryOpCost(I->getOpcode(), ValTy, Alignment, AS, I);
   }
   return getWideningCost(I, VF);
 }
 
 LoopVectorizationCostModel::VectorizationCostTy
 LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
   // If we know that this instruction will remain uniform, check the cost of
   // the scalar version.
   if (isUniformAfterVectorization(I, VF))
     VF = 1;
 
   if (VF > 1 && isProfitableToScalarize(I, VF))
     return VectorizationCostTy(InstsToScalarize[VF][I], false);
 
   // Forced scalars do not have any scalarization overhead.
   if (VF > 1 && ForcedScalars.count(VF) &&
       ForcedScalars.find(VF)->second.count(I))
     return VectorizationCostTy((getInstructionCost(I, 1).first * VF), false);
 
   Type *VectorTy;
   unsigned C = getInstructionCost(I, VF, VectorTy);
 
   bool TypeNotScalarized =
       VF > 1 && VectorTy->isVectorTy() && TTI.getNumberOfParts(VectorTy) < VF;
   return VectorizationCostTy(C, TypeNotScalarized);
 }
 
 void LoopVectorizationCostModel::setCostBasedWideningDecision(unsigned VF) {
   if (VF == 1)
     return;
   for (BasicBlock *BB : TheLoop->blocks()) {
     // For each instruction in the old loop.
     for (Instruction &I : *BB) {
       Value *Ptr = getPointerOperand(&I);
       if (!Ptr)
         continue;
 
       if (isa<LoadInst>(&I) && Legal->isUniform(Ptr)) {
         // Scalar load + broadcast
         unsigned Cost = getUniformMemOpCost(&I, VF);
         setWideningDecision(&I, VF, CM_Scalarize, Cost);
         continue;
       }
 
       // We assume that widening is the best solution when possible.
       if (Legal->memoryInstructionCanBeWidened(&I, VF)) {
         unsigned Cost = getConsecutiveMemOpCost(&I, VF);
         int ConsecutiveStride = Legal->isConsecutivePtr(getPointerOperand(&I));
         assert((ConsecutiveStride == 1 || ConsecutiveStride == -1) &&
                "Expected consecutive stride.");
         InstWidening Decision =
             ConsecutiveStride == 1 ? CM_Widen : CM_Widen_Reverse;
         setWideningDecision(&I, VF, Decision, Cost);
         continue;
       }
 
       // Choose between Interleaving, Gather/Scatter or Scalarization.
       unsigned InterleaveCost = std::numeric_limits<unsigned>::max();
       unsigned NumAccesses = 1;
       if (Legal->isAccessInterleaved(&I)) {
         auto Group = Legal->getInterleavedAccessGroup(&I);
         assert(Group && "Fail to get an interleaved access group.");
 
         // Make one decision for the whole group.
         if (getWideningDecision(&I, VF) != CM_Unknown)
           continue;
 
         NumAccesses = Group->getNumMembers();
         InterleaveCost = getInterleaveGroupCost(&I, VF);
       }
 
       unsigned GatherScatterCost =
           Legal->isLegalGatherOrScatter(&I)
               ? getGatherScatterCost(&I, VF) * NumAccesses
               : std::numeric_limits<unsigned>::max();
 
       unsigned ScalarizationCost =
           getMemInstScalarizationCost(&I, VF) * NumAccesses;
 
       // Choose better solution for the current VF,
       // write down this decision and use it during vectorization.
       unsigned Cost;
       InstWidening Decision;
       if (InterleaveCost <= GatherScatterCost &&
           InterleaveCost < ScalarizationCost) {
         Decision = CM_Interleave;
         Cost = InterleaveCost;
       } else if (GatherScatterCost < ScalarizationCost) {
         Decision = CM_GatherScatter;
         Cost = GatherScatterCost;
       } else {
         Decision = CM_Scalarize;
         Cost = ScalarizationCost;
       }
       // If the instructions belongs to an interleave group, the whole group
       // receives the same decision. The whole group receives the cost, but
       // the cost will actually be assigned to one instruction.
       if (auto Group = Legal->getInterleavedAccessGroup(&I))
         setWideningDecision(Group, VF, Decision, Cost);
       else
         setWideningDecision(&I, VF, Decision, Cost);
     }
   }
 
   // Make sure that any load of address and any other address computation
   // remains scalar unless there is gather/scatter support. This avoids
   // inevitable extracts into address registers, and also has the benefit of
   // activating LSR more, since that pass can't optimize vectorized
   // addresses.
   if (TTI.prefersVectorizedAddressing())
     return;
 
   // Start with all scalar pointer uses.
   SmallPtrSet<Instruction *, 8> AddrDefs;
   for (BasicBlock *BB : TheLoop->blocks())
     for (Instruction &I : *BB) {
       Instruction *PtrDef =
         dyn_cast_or_null<Instruction>(getPointerOperand(&I));
       if (PtrDef && TheLoop->contains(PtrDef) &&
           getWideningDecision(&I, VF) != CM_GatherScatter)
         AddrDefs.insert(PtrDef);
     }
 
   // Add all instructions used to generate the addresses.
   SmallVector<Instruction *, 4> Worklist;
   for (auto *I : AddrDefs)
     Worklist.push_back(I);
   while (!Worklist.empty()) {
     Instruction *I = Worklist.pop_back_val();
     for (auto &Op : I->operands())
       if (auto *InstOp = dyn_cast<Instruction>(Op))
         if ((InstOp->getParent() == I->getParent()) && !isa<PHINode>(InstOp) &&
             AddrDefs.insert(InstOp).second)
           Worklist.push_back(InstOp);
   }
 
   for (auto *I : AddrDefs) {
     if (isa<LoadInst>(I)) {
       // Setting the desired widening decision should ideally be handled in
       // by cost functions, but since this involves the task of finding out
       // if the loaded register is involved in an address computation, it is
       // instead changed here when we know this is the case.
       InstWidening Decision = getWideningDecision(I, VF);
       if (Decision == CM_Widen || Decision == CM_Widen_Reverse)
         // Scalarize a widened load of address.
         setWideningDecision(I, VF, CM_Scalarize,
                             (VF * getMemoryInstructionCost(I, 1)));
       else if (auto Group = Legal->getInterleavedAccessGroup(I)) {
         // Scalarize an interleave group of address loads.
         for (unsigned I = 0; I < Group->getFactor(); ++I) {
           if (Instruction *Member = Group->getMember(I))
             setWideningDecision(Member, VF, CM_Scalarize,
                                 (VF * getMemoryInstructionCost(Member, 1)));
         }
       }
     } else
       // Make sure I gets scalarized and a cost estimate without
       // scalarization overhead.
       ForcedScalars[VF].insert(I);
   }
 }
 
 unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
                                                         unsigned VF,
                                                         Type *&VectorTy) {
   Type *RetTy = I->getType();
   if (canTruncateToMinimalBitwidth(I, VF))
     RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]);
   VectorTy = isScalarAfterVectorization(I, VF) ? RetTy : ToVectorTy(RetTy, VF);
   auto SE = PSE.getSE();
 
   // TODO: We need to estimate the cost of intrinsic calls.
   switch (I->getOpcode()) {
   case Instruction::GetElementPtr:
     // We mark this instruction as zero-cost because the cost of GEPs in
     // vectorized code depends on whether the corresponding memory instruction
     // is scalarized or not. Therefore, we handle GEPs with the memory
     // instruction cost.
     return 0;
   case Instruction::Br: {
     // In cases of scalarized and predicated instructions, there will be VF
     // predicated blocks in the vectorized loop. Each branch around these
     // blocks requires also an extract of its vector compare i1 element.
     bool ScalarPredicatedBB = false;
     BranchInst *BI = cast<BranchInst>(I);
     if (VF > 1 && BI->isConditional() &&
         (PredicatedBBsAfterVectorization.count(BI->getSuccessor(0)) ||
          PredicatedBBsAfterVectorization.count(BI->getSuccessor(1))))
       ScalarPredicatedBB = true;
 
     if (ScalarPredicatedBB) {
       // Return cost for branches around scalarized and predicated blocks.
       Type *Vec_i1Ty =
           VectorType::get(IntegerType::getInt1Ty(RetTy->getContext()), VF);
       return (TTI.getScalarizationOverhead(Vec_i1Ty, false, true) +
               (TTI.getCFInstrCost(Instruction::Br) * VF));
     } else if (I->getParent() == TheLoop->getLoopLatch() || VF == 1)
       // The back-edge branch will remain, as will all scalar branches.
       return TTI.getCFInstrCost(Instruction::Br);
     else
       // This branch will be eliminated by if-conversion.
       return 0;
     // Note: We currently assume zero cost for an unconditional branch inside
     // a predicated block since it will become a fall-through, although we
     // may decide in the future to call TTI for all branches.
   }
   case Instruction::PHI: {
     auto *Phi = cast<PHINode>(I);
 
     // First-order recurrences are replaced by vector shuffles inside the loop.
     if (VF > 1 && Legal->isFirstOrderRecurrence(Phi))
       return TTI.getShuffleCost(TargetTransformInfo::SK_ExtractSubvector,
                                 VectorTy, VF - 1, VectorTy);
 
     // Phi nodes in non-header blocks (not inductions, reductions, etc.) are
     // converted into select instructions. We require N - 1 selects per phi
     // node, where N is the number of incoming values.
     if (VF > 1 && Phi->getParent() != TheLoop->getHeader())
       return (Phi->getNumIncomingValues() - 1) *
              TTI.getCmpSelInstrCost(
                  Instruction::Select, ToVectorTy(Phi->getType(), VF),
                  ToVectorTy(Type::getInt1Ty(Phi->getContext()), VF));
 
     return TTI.getCFInstrCost(Instruction::PHI);
   }
   case Instruction::UDiv:
   case Instruction::SDiv:
   case Instruction::URem:
   case Instruction::SRem:
     // If we have a predicated instruction, it may not be executed for each
     // vector lane. Get the scalarization cost and scale this amount by the
     // probability of executing the predicated block. If the instruction is not
     // predicated, we fall through to the next case.
     if (VF > 1 && Legal->isScalarWithPredication(I)) {
       unsigned Cost = 0;
 
       // These instructions have a non-void type, so account for the phi nodes
       // that we will create. This cost is likely to be zero. The phi node
       // cost, if any, should be scaled by the block probability because it
       // models a copy at the end of each predicated block.
       Cost += VF * TTI.getCFInstrCost(Instruction::PHI);
 
       // The cost of the non-predicated instruction.
       Cost += VF * TTI.getArithmeticInstrCost(I->getOpcode(), RetTy);
 
       // The cost of insertelement and extractelement instructions needed for
       // scalarization.
       Cost += getScalarizationOverhead(I, VF, TTI);
 
       // Scale the cost by the probability of executing the predicated blocks.
       // This assumes the predicated block for each vector lane is equally
       // likely.
       return Cost / getReciprocalPredBlockProb();
     }
     LLVM_FALLTHROUGH;
   case Instruction::Add:
   case Instruction::FAdd:
   case Instruction::Sub:
   case Instruction::FSub:
   case Instruction::Mul:
   case Instruction::FMul:
   case Instruction::FDiv:
   case Instruction::FRem:
   case Instruction::Shl:
   case Instruction::LShr:
   case Instruction::AShr:
   case Instruction::And:
   case Instruction::Or:
   case Instruction::Xor: {
     // Since we will replace the stride by 1 the multiplication should go away.
     if (I->getOpcode() == Instruction::Mul && isStrideMul(I, Legal))
       return 0;
     // Certain instructions can be cheaper to vectorize if they have a constant
     // second vector operand. One example of this are shifts on x86.
     TargetTransformInfo::OperandValueKind Op1VK =
         TargetTransformInfo::OK_AnyValue;
     TargetTransformInfo::OperandValueKind Op2VK =
         TargetTransformInfo::OK_AnyValue;
     TargetTransformInfo::OperandValueProperties Op1VP =
         TargetTransformInfo::OP_None;
     TargetTransformInfo::OperandValueProperties Op2VP =
         TargetTransformInfo::OP_None;
     Value *Op2 = I->getOperand(1);
 
     // Check for a splat or for a non uniform vector of constants.
     if (isa<ConstantInt>(Op2)) {
       ConstantInt *CInt = cast<ConstantInt>(Op2);
       if (CInt && CInt->getValue().isPowerOf2())
         Op2VP = TargetTransformInfo::OP_PowerOf2;
       Op2VK = TargetTransformInfo::OK_UniformConstantValue;
     } else if (isa<ConstantVector>(Op2) || isa<ConstantDataVector>(Op2)) {
       Op2VK = TargetTransformInfo::OK_NonUniformConstantValue;
       Constant *SplatValue = cast<Constant>(Op2)->getSplatValue();
       if (SplatValue) {
         ConstantInt *CInt = dyn_cast<ConstantInt>(SplatValue);
         if (CInt && CInt->getValue().isPowerOf2())
           Op2VP = TargetTransformInfo::OP_PowerOf2;
         Op2VK = TargetTransformInfo::OK_UniformConstantValue;
       }
     } else if (Legal->isUniform(Op2)) {
       Op2VK = TargetTransformInfo::OK_UniformValue;
     }
     SmallVector<const Value *, 4> Operands(I->operand_values());
     unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1;
     return N * TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK,
                                           Op2VK, Op1VP, Op2VP, Operands);
   }
   case Instruction::Select: {
     SelectInst *SI = cast<SelectInst>(I);
     const SCEV *CondSCEV = SE->getSCEV(SI->getCondition());
     bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop));
     Type *CondTy = SI->getCondition()->getType();
     if (!ScalarCond)
       CondTy = VectorType::get(CondTy, VF);
 
     return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy, I);
   }
   case Instruction::ICmp:
   case Instruction::FCmp: {
     Type *ValTy = I->getOperand(0)->getType();
     Instruction *Op0AsInstruction = dyn_cast<Instruction>(I->getOperand(0));
     if (canTruncateToMinimalBitwidth(Op0AsInstruction, VF))
       ValTy = IntegerType::get(ValTy->getContext(), MinBWs[Op0AsInstruction]);
     VectorTy = ToVectorTy(ValTy, VF);
     return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, nullptr, I);
   }
   case Instruction::Store:
   case Instruction::Load: {
     unsigned Width = VF;
     if (Width > 1) {
       InstWidening Decision = getWideningDecision(I, Width);
       assert(Decision != CM_Unknown &&
              "CM decision should be taken at this point");
       if (Decision == CM_Scalarize)
         Width = 1;
     }
     VectorTy = ToVectorTy(getMemInstValueType(I), Width);
     return getMemoryInstructionCost(I, VF);
   }
   case Instruction::ZExt:
   case Instruction::SExt:
   case Instruction::FPToUI:
   case Instruction::FPToSI:
   case Instruction::FPExt:
   case Instruction::PtrToInt:
   case Instruction::IntToPtr:
   case Instruction::SIToFP:
   case Instruction::UIToFP:
   case Instruction::Trunc:
   case Instruction::FPTrunc:
   case Instruction::BitCast: {
     // We optimize the truncation of induction variables having constant
     // integer steps. The cost of these truncations is the same as the scalar
     // operation.
     if (isOptimizableIVTruncate(I, VF)) {
       auto *Trunc = cast<TruncInst>(I);
       return TTI.getCastInstrCost(Instruction::Trunc, Trunc->getDestTy(),
                                   Trunc->getSrcTy(), Trunc);
     }
 
     Type *SrcScalarTy = I->getOperand(0)->getType();
     Type *SrcVecTy =
         VectorTy->isVectorTy() ? ToVectorTy(SrcScalarTy, VF) : SrcScalarTy;
     if (canTruncateToMinimalBitwidth(I, VF)) {
       // This cast is going to be shrunk. This may remove the cast or it might
       // turn it into slightly different cast. For example, if MinBW == 16,
       // "zext i8 %1 to i32" becomes "zext i8 %1 to i16".
       //
       // Calculate the modified src and dest types.
       Type *MinVecTy = VectorTy;
       if (I->getOpcode() == Instruction::Trunc) {
         SrcVecTy = smallestIntegerVectorType(SrcVecTy, MinVecTy);
         VectorTy =
             largestIntegerVectorType(ToVectorTy(I->getType(), VF), MinVecTy);
       } else if (I->getOpcode() == Instruction::ZExt ||
                  I->getOpcode() == Instruction::SExt) {
         SrcVecTy = largestIntegerVectorType(SrcVecTy, MinVecTy);
         VectorTy =
             smallestIntegerVectorType(ToVectorTy(I->getType(), VF), MinVecTy);
       }
     }
 
     unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1;
     return N * TTI.getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy, I);
   }
   case Instruction::Call: {
     bool NeedToScalarize;
     CallInst *CI = cast<CallInst>(I);
     unsigned CallCost = getVectorCallCost(CI, VF, TTI, TLI, NeedToScalarize);
     if (getVectorIntrinsicIDForCall(CI, TLI))
       return std::min(CallCost, getVectorIntrinsicCost(CI, VF, TTI, TLI));
     return CallCost;
   }
   default:
     // The cost of executing VF copies of the scalar instruction. This opcode
     // is unknown. Assume that it is the same as 'mul'.
     return VF * TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy) +
            getScalarizationOverhead(I, VF, TTI);
   } // end of switch.
 }
 
 char LoopVectorize::ID = 0;
 
 static const char lv_name[] = "Loop Vectorization";
 
 INITIALIZE_PASS_BEGIN(LoopVectorize, LV_NAME, lv_name, false, false)
 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis)
 INITIALIZE_PASS_DEPENDENCY(DemandedBitsWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
 INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)
 
 namespace llvm {
 
 Pass *createLoopVectorizePass(bool NoUnrolling, bool AlwaysVectorize) {
   return new LoopVectorize(NoUnrolling, AlwaysVectorize);
 }
 
 } // end namespace llvm
 
 bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) {
   // Check if the pointer operand of a load or store instruction is
   // consecutive.
   if (auto *Ptr = getPointerOperand(Inst))
     return Legal->isConsecutivePtr(Ptr);
   return false;
 }
 
 void LoopVectorizationCostModel::collectValuesToIgnore() {
   // Ignore ephemeral values.
   CodeMetrics::collectEphemeralValues(TheLoop, AC, ValuesToIgnore);
 
   // Ignore type-promoting instructions we identified during reduction
   // detection.
   for (auto &Reduction : *Legal->getReductionVars()) {
     RecurrenceDescriptor &RedDes = Reduction.second;
     SmallPtrSetImpl<Instruction *> &Casts = RedDes.getCastInsts();
     VecValuesToIgnore.insert(Casts.begin(), Casts.end());
   }
   // Ignore type-casting instructions we identified during induction
   // detection.
   for (auto &Induction : *Legal->getInductionVars()) {
     InductionDescriptor &IndDes = Induction.second;
     const SmallVectorImpl<Instruction *> &Casts = IndDes.getCastInsts();
     VecValuesToIgnore.insert(Casts.begin(), Casts.end());
   }
 }
 
 LoopVectorizationCostModel::VectorizationFactor
 LoopVectorizationPlanner::plan(bool OptForSize, unsigned UserVF) {
   // Width 1 means no vectorize, cost 0 means uncomputed cost.
   const LoopVectorizationCostModel::VectorizationFactor NoVectorization = {1U,
                                                                            0U};
   Optional<unsigned> MaybeMaxVF = CM.computeMaxVF(OptForSize);
   if (!MaybeMaxVF.hasValue()) // Cases considered too costly to vectorize.
     return NoVectorization;
 
   if (UserVF) {
     DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n");
     assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two");
     // Collect the instructions (and their associated costs) that will be more
     // profitable to scalarize.
     CM.selectUserVectorizationFactor(UserVF);
     buildVPlans(UserVF, UserVF);
     DEBUG(printPlans(dbgs()));
     return {UserVF, 0};
   }
 
   unsigned MaxVF = MaybeMaxVF.getValue();
   assert(MaxVF != 0 && "MaxVF is zero.");
 
   for (unsigned VF = 1; VF <= MaxVF; VF *= 2) {
     // Collect Uniform and Scalar instructions after vectorization with VF.
     CM.collectUniformsAndScalars(VF);
 
     // Collect the instructions (and their associated costs) that will be more
     // profitable to scalarize.
     if (VF > 1)
       CM.collectInstsToScalarize(VF);
   }
 
   buildVPlans(1, MaxVF);
   DEBUG(printPlans(dbgs()));
   if (MaxVF == 1)
     return NoVectorization;
 
   // Select the optimal vectorization factor.
   return CM.selectVectorizationFactor(MaxVF);
 }
 
 void LoopVectorizationPlanner::setBestPlan(unsigned VF, unsigned UF) {
   DEBUG(dbgs() << "Setting best plan to VF=" << VF << ", UF=" << UF << '\n');
   BestVF = VF;
   BestUF = UF;
 
   erase_if(VPlans, [VF](const VPlanPtr &Plan) {
     return !Plan->hasVF(VF);
   });
   assert(VPlans.size() == 1 && "Best VF has not a single VPlan.");
 }
 
 void LoopVectorizationPlanner::executePlan(InnerLoopVectorizer &ILV,
                                            DominatorTree *DT) {
   // Perform the actual loop transformation.
 
   // 1. Create a new empty loop. Unlink the old loop and connect the new one.
   VPCallbackILV CallbackILV(ILV);
 
   VPTransformState State{BestVF, BestUF,      LI,
                          DT,     ILV.Builder, ILV.VectorLoopValueMap,
                          &ILV,   CallbackILV};
   State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton();
 
   //===------------------------------------------------===//
   //
   // Notice: any optimization or new instruction that go
   // into the code below should also be implemented in
   // the cost-model.
   //
   //===------------------------------------------------===//
 
   // 2. Copy and widen instructions from the old loop into the new loop.
   assert(VPlans.size() == 1 && "Not a single VPlan to execute.");
   VPlans.front()->execute(&State);
 
   // 3. Fix the vectorized code: take care of header phi's, live-outs,
   //    predication, updating analyses.
   ILV.fixVectorizedLoop();
 }
 
 void LoopVectorizationPlanner::collectTriviallyDeadInstructions(
     SmallPtrSetImpl<Instruction *> &DeadInstructions) {
   BasicBlock *Latch = OrigLoop->getLoopLatch();
 
   // We create new control-flow for the vectorized loop, so the original
   // condition will be dead after vectorization if it's only used by the
   // branch.
   auto *Cmp = dyn_cast<Instruction>(Latch->getTerminator()->getOperand(0));
   if (Cmp && Cmp->hasOneUse())
     DeadInstructions.insert(Cmp);
 
   // We create new "steps" for induction variable updates to which the original
   // induction variables map. An original update instruction will be dead if
   // all its users except the induction variable are dead.
   for (auto &Induction : *Legal->getInductionVars()) {
     PHINode *Ind = Induction.first;
     auto *IndUpdate = cast<Instruction>(Ind->getIncomingValueForBlock(Latch));
     if (llvm::all_of(IndUpdate->users(), [&](User *U) -> bool {
           return U == Ind || DeadInstructions.count(cast<Instruction>(U));
         }))
       DeadInstructions.insert(IndUpdate);
 
     // We record as "Dead" also the type-casting instructions we had identified 
     // during induction analysis. We don't need any handling for them in the
     // vectorized loop because we have proven that, under a proper runtime 
     // test guarding the vectorized loop, the value of the phi, and the casted 
     // value of the phi, are the same. The last instruction in this casting chain
     // will get its scalar/vector/widened def from the scalar/vector/widened def 
     // of the respective phi node. Any other casts in the induction def-use chain
     // have no other uses outside the phi update chain, and will be ignored.
     InductionDescriptor &IndDes = Induction.second;
     const SmallVectorImpl<Instruction *> &Casts = IndDes.getCastInsts();
     DeadInstructions.insert(Casts.begin(), Casts.end());
   }
 }
 
 Value *InnerLoopUnroller::reverseVector(Value *Vec) { return Vec; }
 
 Value *InnerLoopUnroller::getBroadcastInstrs(Value *V) { return V; }
 
 Value *InnerLoopUnroller::getStepVector(Value *Val, int StartIdx, Value *Step,
                                         Instruction::BinaryOps BinOp) {
   // When unrolling and the VF is 1, we only need to add a simple scalar.
   Type *Ty = Val->getType();
   assert(!Ty->isVectorTy() && "Val must be a scalar");
 
   if (Ty->isFloatingPointTy()) {
     Constant *C = ConstantFP::get(Ty, (double)StartIdx);
 
     // Floating point operations had to be 'fast' to enable the unrolling.
     Value *MulOp = addFastMathFlag(Builder.CreateFMul(C, Step));
     return addFastMathFlag(Builder.CreateBinOp(BinOp, Val, MulOp));
   }
   Constant *C = ConstantInt::get(Ty, StartIdx);
   return Builder.CreateAdd(Val, Builder.CreateMul(C, Step), "induction");
 }
 
 static void AddRuntimeUnrollDisableMetaData(Loop *L) {
   SmallVector<Metadata *, 4> MDs;
   // Reserve first location for self reference to the LoopID metadata node.
   MDs.push_back(nullptr);
   bool IsUnrollMetadata = false;
   MDNode *LoopID = L->getLoopID();
   if (LoopID) {
     // First find existing loop unrolling disable metadata.
     for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
       auto *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
       if (MD) {
         const auto *S = dyn_cast<MDString>(MD->getOperand(0));
         IsUnrollMetadata =
             S && S->getString().startswith("llvm.loop.unroll.disable");
       }
       MDs.push_back(LoopID->getOperand(i));
     }
   }
 
   if (!IsUnrollMetadata) {
     // Add runtime unroll disable metadata.
     LLVMContext &Context = L->getHeader()->getContext();
     SmallVector<Metadata *, 1> DisableOperands;
     DisableOperands.push_back(
         MDString::get(Context, "llvm.loop.unroll.runtime.disable"));
     MDNode *DisableNode = MDNode::get(Context, DisableOperands);
     MDs.push_back(DisableNode);
     MDNode *NewLoopID = MDNode::get(Context, MDs);
     // Set operand 0 to refer to the loop id itself.
     NewLoopID->replaceOperandWith(0, NewLoopID);
     L->setLoopID(NewLoopID);
   }
 }
 
 bool LoopVectorizationPlanner::getDecisionAndClampRange(
     const std::function<bool(unsigned)> &Predicate, VFRange &Range) {
   assert(Range.End > Range.Start && "Trying to test an empty VF range.");
   bool PredicateAtRangeStart = Predicate(Range.Start);
 
   for (unsigned TmpVF = Range.Start * 2; TmpVF < Range.End; TmpVF *= 2)
     if (Predicate(TmpVF) != PredicateAtRangeStart) {
       Range.End = TmpVF;
       break;
     }
 
   return PredicateAtRangeStart;
 }
 
 /// Build VPlans for the full range of feasible VF's = {\p MinVF, 2 * \p MinVF,
 /// 4 * \p MinVF, ..., \p MaxVF} by repeatedly building a VPlan for a sub-range
 /// of VF's starting at a given VF and extending it as much as possible. Each
 /// vectorization decision can potentially shorten this sub-range during
 /// buildVPlan().
 void LoopVectorizationPlanner::buildVPlans(unsigned MinVF, unsigned MaxVF) {
 
   // Collect conditions feeding internal conditional branches; they need to be
   // represented in VPlan for it to model masking.
   SmallPtrSet<Value *, 1> NeedDef;
 
   auto *Latch = OrigLoop->getLoopLatch();
   for (BasicBlock *BB : OrigLoop->blocks()) {
     if (BB == Latch)
       continue;
     BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator());
     if (Branch && Branch->isConditional())
       NeedDef.insert(Branch->getCondition());
   }
 
   for (unsigned VF = MinVF; VF < MaxVF + 1;) {
     VFRange SubRange = {VF, MaxVF + 1};
     VPlans.push_back(buildVPlan(SubRange, NeedDef));
     VF = SubRange.End;
   }
 }
 
 VPValue *LoopVectorizationPlanner::createEdgeMask(BasicBlock *Src,
                                                   BasicBlock *Dst,
                                                   VPlanPtr &Plan) {
   assert(is_contained(predecessors(Dst), Src) && "Invalid edge");
 
   // Look for cached value.
   std::pair<BasicBlock *, BasicBlock *> Edge(Src, Dst);
   EdgeMaskCacheTy::iterator ECEntryIt = EdgeMaskCache.find(Edge);
   if (ECEntryIt != EdgeMaskCache.end())
     return ECEntryIt->second;
 
   VPValue *SrcMask = createBlockInMask(Src, Plan);
 
   // The terminator has to be a branch inst!
   BranchInst *BI = dyn_cast<BranchInst>(Src->getTerminator());
   assert(BI && "Unexpected terminator found");
 
   if (!BI->isConditional())
     return EdgeMaskCache[Edge] = SrcMask;
 
   VPValue *EdgeMask = Plan->getVPValue(BI->getCondition());
   assert(EdgeMask && "No Edge Mask found for condition");
 
   if (BI->getSuccessor(0) != Dst)
     EdgeMask = Builder.createNot(EdgeMask);
 
   if (SrcMask) // Otherwise block in-mask is all-one, no need to AND.
     EdgeMask = Builder.createAnd(EdgeMask, SrcMask);
 
   return EdgeMaskCache[Edge] = EdgeMask;
 }
 
 VPValue *LoopVectorizationPlanner::createBlockInMask(BasicBlock *BB,
                                                      VPlanPtr &Plan) {
   assert(OrigLoop->contains(BB) && "Block is not a part of a loop");
 
   // Look for cached value.
   BlockMaskCacheTy::iterator BCEntryIt = BlockMaskCache.find(BB);
   if (BCEntryIt != BlockMaskCache.end())
     return BCEntryIt->second;
 
   // All-one mask is modelled as no-mask following the convention for masked
   // load/store/gather/scatter. Initialize BlockMask to no-mask.
   VPValue *BlockMask = nullptr;
 
   // Loop incoming mask is all-one.
   if (OrigLoop->getHeader() == BB)
     return BlockMaskCache[BB] = BlockMask;
 
   // This is the block mask. We OR all incoming edges.
   for (auto *Predecessor : predecessors(BB)) {
     VPValue *EdgeMask = createEdgeMask(Predecessor, BB, Plan);
     if (!EdgeMask) // Mask of predecessor is all-one so mask of block is too.
       return BlockMaskCache[BB] = EdgeMask;
 
     if (!BlockMask) { // BlockMask has its initialized nullptr value.
       BlockMask = EdgeMask;
       continue;
     }
 
     BlockMask = Builder.createOr(BlockMask, EdgeMask);
   }
 
   return BlockMaskCache[BB] = BlockMask;
 }
 
 VPInterleaveRecipe *
 LoopVectorizationPlanner::tryToInterleaveMemory(Instruction *I,
                                                 VFRange &Range) {
   const InterleaveGroup *IG = Legal->getInterleavedAccessGroup(I);
   if (!IG)
     return nullptr;
 
   // Now check if IG is relevant for VF's in the given range.
   auto isIGMember = [&](Instruction *I) -> std::function<bool(unsigned)> {
     return [=](unsigned VF) -> bool {
       return (VF >= 2 && // Query is illegal for VF == 1
               CM.getWideningDecision(I, VF) ==
                   LoopVectorizationCostModel::CM_Interleave);
     };
   };
   if (!getDecisionAndClampRange(isIGMember(I), Range))
     return nullptr;
 
   // I is a member of an InterleaveGroup for VF's in the (possibly trimmed)
   // range. If it's the primary member of the IG construct a VPInterleaveRecipe.
   // Otherwise, it's an adjunct member of the IG, do not construct any Recipe.
   assert(I == IG->getInsertPos() &&
          "Generating a recipe for an adjunct member of an interleave group");
 
   return new VPInterleaveRecipe(IG);
 }
 
 VPWidenMemoryInstructionRecipe *
 LoopVectorizationPlanner::tryToWidenMemory(Instruction *I, VFRange &Range,
                                            VPlanPtr &Plan) {
   if (!isa<LoadInst>(I) && !isa<StoreInst>(I))
     return nullptr;
 
   auto willWiden = [&](unsigned VF) -> bool {
     if (VF == 1)
       return false;
     if (CM.isScalarAfterVectorization(I, VF) ||
         CM.isProfitableToScalarize(I, VF))
       return false;
     LoopVectorizationCostModel::InstWidening Decision =
         CM.getWideningDecision(I, VF);
     assert(Decision != LoopVectorizationCostModel::CM_Unknown &&
            "CM decision should be taken at this point.");
     assert(Decision != LoopVectorizationCostModel::CM_Interleave &&
            "Interleave memory opportunity should be caught earlier.");
     return Decision != LoopVectorizationCostModel::CM_Scalarize;
   };
 
   if (!getDecisionAndClampRange(willWiden, Range))
     return nullptr;
 
   VPValue *Mask = nullptr;
   if (Legal->isMaskRequired(I))
     Mask = createBlockInMask(I->getParent(), Plan);
 
   return new VPWidenMemoryInstructionRecipe(*I, Mask);
 }
 
 VPWidenIntOrFpInductionRecipe *
 LoopVectorizationPlanner::tryToOptimizeInduction(Instruction *I,
                                                  VFRange &Range) {
   if (PHINode *Phi = dyn_cast<PHINode>(I)) {
     // Check if this is an integer or fp induction. If so, build the recipe that
     // produces its scalar and vector values.
     InductionDescriptor II = Legal->getInductionVars()->lookup(Phi);
     if (II.getKind() == InductionDescriptor::IK_IntInduction ||
         II.getKind() == InductionDescriptor::IK_FpInduction)
       return new VPWidenIntOrFpInductionRecipe(Phi);
 
     return nullptr;
   }
 
   // Optimize the special case where the source is a constant integer
   // induction variable. Notice that we can only optimize the 'trunc' case
   // because (a) FP conversions lose precision, (b) sext/zext may wrap, and
   // (c) other casts depend on pointer size.
 
   // Determine whether \p K is a truncation based on an induction variable that
   // can be optimized.
   auto isOptimizableIVTruncate =
       [&](Instruction *K) -> std::function<bool(unsigned)> {
     return
         [=](unsigned VF) -> bool { return CM.isOptimizableIVTruncate(K, VF); };
   };
 
   if (isa<TruncInst>(I) &&
       getDecisionAndClampRange(isOptimizableIVTruncate(I), Range))
     return new VPWidenIntOrFpInductionRecipe(cast<PHINode>(I->getOperand(0)),
                                              cast<TruncInst>(I));
   return nullptr;
 }
 
 VPBlendRecipe *
 LoopVectorizationPlanner::tryToBlend(Instruction *I, VPlanPtr &Plan) {
   PHINode *Phi = dyn_cast<PHINode>(I);
   if (!Phi || Phi->getParent() == OrigLoop->getHeader())
     return nullptr;
 
   // We know that all PHIs in non-header blocks are converted into selects, so
   // we don't have to worry about the insertion order and we can just use the
   // builder. At this point we generate the predication tree. There may be
   // duplications since this is a simple recursive scan, but future
   // optimizations will clean it up.
 
   SmallVector<VPValue *, 2> Masks;
   unsigned NumIncoming = Phi->getNumIncomingValues();
   for (unsigned In = 0; In < NumIncoming; In++) {
     VPValue *EdgeMask =
       createEdgeMask(Phi->getIncomingBlock(In), Phi->getParent(), Plan);
     assert((EdgeMask || NumIncoming == 1) &&
            "Multiple predecessors with one having a full mask");
     if (EdgeMask)
       Masks.push_back(EdgeMask);
   }
   return new VPBlendRecipe(Phi, Masks);
 }
 
 bool LoopVectorizationPlanner::tryToWiden(Instruction *I, VPBasicBlock *VPBB,
                                           VFRange &Range) {
   if (Legal->isScalarWithPredication(I))
     return false;
 
   auto IsVectorizableOpcode = [](unsigned Opcode) {
     switch (Opcode) {
     case Instruction::Add:
     case Instruction::And:
     case Instruction::AShr:
     case Instruction::BitCast:
     case Instruction::Br:
     case Instruction::Call:
     case Instruction::FAdd:
     case Instruction::FCmp:
     case Instruction::FDiv:
     case Instruction::FMul:
     case Instruction::FPExt:
     case Instruction::FPToSI:
     case Instruction::FPToUI:
     case Instruction::FPTrunc:
     case Instruction::FRem:
     case Instruction::FSub:
     case Instruction::GetElementPtr:
     case Instruction::ICmp:
     case Instruction::IntToPtr:
     case Instruction::Load:
     case Instruction::LShr:
     case Instruction::Mul:
     case Instruction::Or:
     case Instruction::PHI:
     case Instruction::PtrToInt:
     case Instruction::SDiv:
     case Instruction::Select:
     case Instruction::SExt:
     case Instruction::Shl:
     case Instruction::SIToFP:
     case Instruction::SRem:
     case Instruction::Store:
     case Instruction::Sub:
     case Instruction::Trunc:
     case Instruction::UDiv:
     case Instruction::UIToFP:
     case Instruction::URem:
     case Instruction::Xor:
     case Instruction::ZExt:
       return true;
     }
     return false;
   };
 
   if (!IsVectorizableOpcode(I->getOpcode()))
     return false;
 
   if (CallInst *CI = dyn_cast<CallInst>(I)) {
     Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
     if (ID && (ID == Intrinsic::assume || ID == Intrinsic::lifetime_end ||
                ID == Intrinsic::lifetime_start || ID == Intrinsic::sideeffect))
       return false;
   }
 
   auto willWiden = [&](unsigned VF) -> bool {
     if (!isa<PHINode>(I) && (CM.isScalarAfterVectorization(I, VF) ||
                              CM.isProfitableToScalarize(I, VF)))
       return false;
     if (CallInst *CI = dyn_cast<CallInst>(I)) {
       Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
       // The following case may be scalarized depending on the VF.
       // The flag shows whether we use Intrinsic or a usual Call for vectorized
       // version of the instruction.
       // Is it beneficial to perform intrinsic call compared to lib call?
       bool NeedToScalarize;
       unsigned CallCost = getVectorCallCost(CI, VF, *TTI, TLI, NeedToScalarize);
       bool UseVectorIntrinsic =
           ID && getVectorIntrinsicCost(CI, VF, *TTI, TLI) <= CallCost;
       return UseVectorIntrinsic || !NeedToScalarize;
     }
     if (isa<LoadInst>(I) || isa<StoreInst>(I)) {
       assert(CM.getWideningDecision(I, VF) ==
                  LoopVectorizationCostModel::CM_Scalarize &&
              "Memory widening decisions should have been taken care by now");
       return false;
     }
     return true;
   };
 
   if (!getDecisionAndClampRange(willWiden, Range))
     return false;
 
   // Success: widen this instruction. We optimize the common case where
   // consecutive instructions can be represented by a single recipe.
   if (!VPBB->empty()) {
     VPWidenRecipe *LastWidenRecipe = dyn_cast<VPWidenRecipe>(&VPBB->back());
     if (LastWidenRecipe && LastWidenRecipe->appendInstruction(I))
       return true;
   }
 
   VPBB->appendRecipe(new VPWidenRecipe(I));
   return true;
 }
 
 VPBasicBlock *LoopVectorizationPlanner::handleReplication(
     Instruction *I, VFRange &Range, VPBasicBlock *VPBB,
     DenseMap<Instruction *, VPReplicateRecipe *> &PredInst2Recipe,
     VPlanPtr &Plan) {
   bool IsUniform = getDecisionAndClampRange(
       [&](unsigned VF) { return CM.isUniformAfterVectorization(I, VF); },
       Range);
 
   bool IsPredicated = Legal->isScalarWithPredication(I);
   auto *Recipe = new VPReplicateRecipe(I, IsUniform, IsPredicated);
 
   // Find if I uses a predicated instruction. If so, it will use its scalar
   // value. Avoid hoisting the insert-element which packs the scalar value into
   // a vector value, as that happens iff all users use the vector value.
   for (auto &Op : I->operands())
     if (auto *PredInst = dyn_cast<Instruction>(Op))
       if (PredInst2Recipe.find(PredInst) != PredInst2Recipe.end())
         PredInst2Recipe[PredInst]->setAlsoPack(false);
 
   // Finalize the recipe for Instr, first if it is not predicated.
   if (!IsPredicated) {
     DEBUG(dbgs() << "LV: Scalarizing:" << *I << "\n");
     VPBB->appendRecipe(Recipe);
     return VPBB;
   }
   DEBUG(dbgs() << "LV: Scalarizing and predicating:" << *I << "\n");
   assert(VPBB->getSuccessors().empty() &&
          "VPBB has successors when handling predicated replication.");
   // Record predicated instructions for above packing optimizations.
   PredInst2Recipe[I] = Recipe;
   VPBlockBase *Region =
     VPBB->setOneSuccessor(createReplicateRegion(I, Recipe, Plan));
   return cast<VPBasicBlock>(Region->setOneSuccessor(new VPBasicBlock()));
 }
 
 VPRegionBlock *
 LoopVectorizationPlanner::createReplicateRegion(Instruction *Instr,
                                                 VPRecipeBase *PredRecipe,
                                                 VPlanPtr &Plan) {
   // Instructions marked for predication are replicated and placed under an
   // if-then construct to prevent side-effects.
 
   // Generate recipes to compute the block mask for this region.
   VPValue *BlockInMask = createBlockInMask(Instr->getParent(), Plan);
 
   // Build the triangular if-then region.
   std::string RegionName = (Twine("pred.") + Instr->getOpcodeName()).str();
   assert(Instr->getParent() && "Predicated instruction not in any basic block");
   auto *BOMRecipe = new VPBranchOnMaskRecipe(BlockInMask);
   auto *Entry = new VPBasicBlock(Twine(RegionName) + ".entry", BOMRecipe);
   auto *PHIRecipe =
       Instr->getType()->isVoidTy() ? nullptr : new VPPredInstPHIRecipe(Instr);
   auto *Exit = new VPBasicBlock(Twine(RegionName) + ".continue", PHIRecipe);
   auto *Pred = new VPBasicBlock(Twine(RegionName) + ".if", PredRecipe);
   VPRegionBlock *Region = new VPRegionBlock(Entry, Exit, RegionName, true);
 
   // Note: first set Entry as region entry and then connect successors starting
   // from it in order, to propagate the "parent" of each VPBasicBlock.
   Entry->setTwoSuccessors(Pred, Exit);
   Pred->setOneSuccessor(Exit);
 
   return Region;
 }
 
 LoopVectorizationPlanner::VPlanPtr
 LoopVectorizationPlanner::buildVPlan(VFRange &Range,
                                      const SmallPtrSetImpl<Value *> &NeedDef) {
   EdgeMaskCache.clear();
   BlockMaskCache.clear();
   DenseMap<Instruction *, Instruction *> &SinkAfter = Legal->getSinkAfter();
   DenseMap<Instruction *, Instruction *> SinkAfterInverse;
 
   // Collect instructions from the original loop that will become trivially dead
   // in the vectorized loop. We don't need to vectorize these instructions. For
   // example, original induction update instructions can become dead because we
   // separately emit induction "steps" when generating code for the new loop.
   // Similarly, we create a new latch condition when setting up the structure
   // of the new loop, so the old one can become dead.
   SmallPtrSet<Instruction *, 4> DeadInstructions;
   collectTriviallyDeadInstructions(DeadInstructions);
 
   // Hold a mapping from predicated instructions to their recipes, in order to
   // fix their AlsoPack behavior if a user is determined to replicate and use a
   // scalar instead of vector value.
   DenseMap<Instruction *, VPReplicateRecipe *> PredInst2Recipe;
 
   // Create a dummy pre-entry VPBasicBlock to start building the VPlan.
   VPBasicBlock *VPBB = new VPBasicBlock("Pre-Entry");
   auto Plan = llvm::make_unique<VPlan>(VPBB);
 
   // Represent values that will have defs inside VPlan.
   for (Value *V : NeedDef)
     Plan->addVPValue(V);
 
   // Scan the body of the loop in a topological order to visit each basic block
   // after having visited its predecessor basic blocks.
   LoopBlocksDFS DFS(OrigLoop);
   DFS.perform(LI);
 
   for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) {
     // Relevant instructions from basic block BB will be grouped into VPRecipe
     // ingredients and fill a new VPBasicBlock.
     unsigned VPBBsForBB = 0;
     auto *FirstVPBBForBB = new VPBasicBlock(BB->getName());
     VPBB->setOneSuccessor(FirstVPBBForBB);
     VPBB = FirstVPBBForBB;
     Builder.setInsertPoint(VPBB);
 
     std::vector<Instruction *> Ingredients;
 
     // Organize the ingredients to vectorize from current basic block in the
     // right order.
     for (Instruction &I : *BB) {
       Instruction *Instr = &I;
 
       // First filter out irrelevant instructions, to ensure no recipes are
       // built for them.
       if (isa<BranchInst>(Instr) || isa<DbgInfoIntrinsic>(Instr) ||
           DeadInstructions.count(Instr))
         continue;
 
       // I is a member of an InterleaveGroup for Range.Start. If it's an adjunct
       // member of the IG, do not construct any Recipe for it.
       const InterleaveGroup *IG = Legal->getInterleavedAccessGroup(Instr);
       if (IG && Instr != IG->getInsertPos() &&
           Range.Start >= 2 && // Query is illegal for VF == 1
           CM.getWideningDecision(Instr, Range.Start) ==
               LoopVectorizationCostModel::CM_Interleave) {
         if (SinkAfterInverse.count(Instr))
           Ingredients.push_back(SinkAfterInverse.find(Instr)->second);
         continue;
       }
 
       // Move instructions to handle first-order recurrences, step 1: avoid
       // handling this instruction until after we've handled the instruction it
       // should follow.
       auto SAIt = SinkAfter.find(Instr);
       if (SAIt != SinkAfter.end()) {
         DEBUG(dbgs() << "Sinking" << *SAIt->first << " after" << *SAIt->second
                      << " to vectorize a 1st order recurrence.\n");
         SinkAfterInverse[SAIt->second] = Instr;
         continue;
       }
 
       Ingredients.push_back(Instr);
 
       // Move instructions to handle first-order recurrences, step 2: push the
       // instruction to be sunk at its insertion point.
       auto SAInvIt = SinkAfterInverse.find(Instr);
       if (SAInvIt != SinkAfterInverse.end())
         Ingredients.push_back(SAInvIt->second);
     }
 
     // Introduce each ingredient into VPlan.
     for (Instruction *Instr : Ingredients) {
       VPRecipeBase *Recipe = nullptr;
 
       // Check if Instr should belong to an interleave memory recipe, or already
       // does. In the latter case Instr is irrelevant.
       if ((Recipe = tryToInterleaveMemory(Instr, Range))) {
         VPBB->appendRecipe(Recipe);
         continue;
       }
 
       // Check if Instr is a memory operation that should be widened.
       if ((Recipe = tryToWidenMemory(Instr, Range, Plan))) {
         VPBB->appendRecipe(Recipe);
         continue;
       }
 
       // Check if Instr should form some PHI recipe.
       if ((Recipe = tryToOptimizeInduction(Instr, Range))) {
         VPBB->appendRecipe(Recipe);
         continue;
       }
       if ((Recipe = tryToBlend(Instr, Plan))) {
         VPBB->appendRecipe(Recipe);
         continue;
       }
       if (PHINode *Phi = dyn_cast<PHINode>(Instr)) {
         VPBB->appendRecipe(new VPWidenPHIRecipe(Phi));
         continue;
       }
 
       // Check if Instr is to be widened by a general VPWidenRecipe, after
       // having first checked for specific widening recipes that deal with
       // Interleave Groups, Inductions and Phi nodes.
       if (tryToWiden(Instr, VPBB, Range))
         continue;
 
       // Otherwise, if all widening options failed, Instruction is to be
       // replicated. This may create a successor for VPBB.
       VPBasicBlock *NextVPBB =
         handleReplication(Instr, Range, VPBB, PredInst2Recipe, Plan);
       if (NextVPBB != VPBB) {
         VPBB = NextVPBB;
         VPBB->setName(BB->hasName() ? BB->getName() + "." + Twine(VPBBsForBB++)
                                     : "");
       }
     }
   }
 
   // Discard empty dummy pre-entry VPBasicBlock. Note that other VPBasicBlocks
   // may also be empty, such as the last one VPBB, reflecting original
   // basic-blocks with no recipes.
   VPBasicBlock *PreEntry = cast<VPBasicBlock>(Plan->getEntry());
   assert(PreEntry->empty() && "Expecting empty pre-entry block.");
   VPBlockBase *Entry = Plan->setEntry(PreEntry->getSingleSuccessor());
   PreEntry->disconnectSuccessor(Entry);
   delete PreEntry;
 
   std::string PlanName;
   raw_string_ostream RSO(PlanName);
   unsigned VF = Range.Start;
   Plan->addVF(VF);
   RSO << "Initial VPlan for VF={" << VF;
   for (VF *= 2; VF < Range.End; VF *= 2) {
     Plan->addVF(VF);
     RSO << "," << VF;
   }
   RSO << "},UF>=1";
   RSO.flush();
   Plan->setName(PlanName);
 
   return Plan;
 }
 
 void VPInterleaveRecipe::print(raw_ostream &O, const Twine &Indent) const {
   O << " +\n"
     << Indent << "\"INTERLEAVE-GROUP with factor " << IG->getFactor() << " at ";
   IG->getInsertPos()->printAsOperand(O, false);
   O << "\\l\"";
   for (unsigned i = 0; i < IG->getFactor(); ++i)
     if (Instruction *I = IG->getMember(i))
       O << " +\n"
         << Indent << "\"  " << VPlanIngredient(I) << " " << i << "\\l\"";
 }
 
 void VPWidenRecipe::execute(VPTransformState &State) {
   for (auto &Instr : make_range(Begin, End))
     State.ILV->widenInstruction(Instr);
 }
 
 void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
   assert(!State.Instance && "Int or FP induction being replicated.");
   State.ILV->widenIntOrFpInduction(IV, Trunc);
 }
 
 void VPWidenPHIRecipe::execute(VPTransformState &State) {
   State.ILV->widenPHIInstruction(Phi, State.UF, State.VF);
 }
 
 void VPBlendRecipe::execute(VPTransformState &State) {
   State.ILV->setDebugLocFromInst(State.Builder, Phi);
   // We know that all PHIs in non-header blocks are converted into
   // selects, so we don't have to worry about the insertion order and we
   // can just use the builder.
   // At this point we generate the predication tree. There may be
   // duplications since this is a simple recursive scan, but future
   // optimizations will clean it up.
 
   unsigned NumIncoming = Phi->getNumIncomingValues();
 
   assert((User || NumIncoming == 1) &&
          "Multiple predecessors with predecessors having a full mask");
   // Generate a sequence of selects of the form:
   // SELECT(Mask3, In3,
   //      SELECT(Mask2, In2,
   //                   ( ...)))
   InnerLoopVectorizer::VectorParts Entry(State.UF);
   for (unsigned In = 0; In < NumIncoming; ++In) {
     for (unsigned Part = 0; Part < State.UF; ++Part) {
       // We might have single edge PHIs (blocks) - use an identity
       // 'select' for the first PHI operand.
       Value *In0 =
           State.ILV->getOrCreateVectorValue(Phi->getIncomingValue(In), Part);
       if (In == 0)
         Entry[Part] = In0; // Initialize with the first incoming value.
       else {
         // Select between the current value and the previous incoming edge
         // based on the incoming mask.
         Value *Cond = State.get(User->getOperand(In), Part);
         Entry[Part] =
             State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi");
       }
     }
   }
   for (unsigned Part = 0; Part < State.UF; ++Part)
     State.ValueMap.setVectorValue(Phi, Part, Entry[Part]);
 }
 
 void VPInterleaveRecipe::execute(VPTransformState &State) {
   assert(!State.Instance && "Interleave group being replicated.");
   State.ILV->vectorizeInterleaveGroup(IG->getInsertPos());
 }
 
 void VPReplicateRecipe::execute(VPTransformState &State) {
   if (State.Instance) { // Generate a single instance.
     State.ILV->scalarizeInstruction(Ingredient, *State.Instance, IsPredicated);
     // Insert scalar instance packing it into a vector.
     if (AlsoPack && State.VF > 1) {
       // If we're constructing lane 0, initialize to start from undef.
       if (State.Instance->Lane == 0) {
         Value *Undef =
             UndefValue::get(VectorType::get(Ingredient->getType(), State.VF));
         State.ValueMap.setVectorValue(Ingredient, State.Instance->Part, Undef);
       }
       State.ILV->packScalarIntoVectorValue(Ingredient, *State.Instance);
     }
     return;
   }
 
   // Generate scalar instances for all VF lanes of all UF parts, unless the
   // instruction is uniform inwhich case generate only the first lane for each
   // of the UF parts.
   unsigned EndLane = IsUniform ? 1 : State.VF;
   for (unsigned Part = 0; Part < State.UF; ++Part)
     for (unsigned Lane = 0; Lane < EndLane; ++Lane)
       State.ILV->scalarizeInstruction(Ingredient, {Part, Lane}, IsPredicated);
 }
 
 void VPBranchOnMaskRecipe::execute(VPTransformState &State) {
   assert(State.Instance && "Branch on Mask works only on single instance.");
 
   unsigned Part = State.Instance->Part;
   unsigned Lane = State.Instance->Lane;
 
   Value *ConditionBit = nullptr;
   if (!User) // Block in mask is all-one.
     ConditionBit = State.Builder.getTrue();
   else {
     VPValue *BlockInMask = User->getOperand(0);
     ConditionBit = State.get(BlockInMask, Part);
     if (ConditionBit->getType()->isVectorTy())
       ConditionBit = State.Builder.CreateExtractElement(
           ConditionBit, State.Builder.getInt32(Lane));
   }
 
   // Replace the temporary unreachable terminator with a new conditional branch,
   // whose two destinations will be set later when they are created.
   auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
   assert(isa<UnreachableInst>(CurrentTerminator) &&
          "Expected to replace unreachable terminator with conditional branch.");
   auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit);
   CondBr->setSuccessor(0, nullptr);
   ReplaceInstWithInst(CurrentTerminator, CondBr);
 }
 
 void VPPredInstPHIRecipe::execute(VPTransformState &State) {
   assert(State.Instance && "Predicated instruction PHI works per instance.");
   Instruction *ScalarPredInst = cast<Instruction>(
       State.ValueMap.getScalarValue(PredInst, *State.Instance));
   BasicBlock *PredicatedBB = ScalarPredInst->getParent();
   BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();
   assert(PredicatingBB && "Predicated block has no single predecessor.");
 
   // By current pack/unpack logic we need to generate only a single phi node: if
   // a vector value for the predicated instruction exists at this point it means
   // the instruction has vector users only, and a phi for the vector value is
   // needed. In this case the recipe of the predicated instruction is marked to
   // also do that packing, thereby "hoisting" the insert-element sequence.
   // Otherwise, a phi node for the scalar value is needed.
   unsigned Part = State.Instance->Part;
   if (State.ValueMap.hasVectorValue(PredInst, Part)) {
     Value *VectorValue = State.ValueMap.getVectorValue(PredInst, Part);
     InsertElementInst *IEI = cast<InsertElementInst>(VectorValue);
     PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2);
     VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector.
     VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element.
     State.ValueMap.resetVectorValue(PredInst, Part, VPhi); // Update cache.
   } else {
     Type *PredInstType = PredInst->getType();
     PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);
     Phi->addIncoming(UndefValue::get(ScalarPredInst->getType()), PredicatingBB);
     Phi->addIncoming(ScalarPredInst, PredicatedBB);
     State.ValueMap.resetScalarValue(PredInst, *State.Instance, Phi);
   }
 }
 
 void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
   if (!User)
     return State.ILV->vectorizeMemoryInstruction(&Instr);
 
   // Last (and currently only) operand is a mask.
   InnerLoopVectorizer::VectorParts MaskValues(State.UF);
   VPValue *Mask = User->getOperand(User->getNumOperands() - 1);
   for (unsigned Part = 0; Part < State.UF; ++Part)
     MaskValues[Part] = State.get(Mask, Part);
   State.ILV->vectorizeMemoryInstruction(&Instr, &MaskValues);
 }
 
 bool LoopVectorizePass::processLoop(Loop *L) {
   assert(L->empty() && "Only process inner loops.");
 
 #ifndef NDEBUG
   const std::string DebugLocStr = getDebugLocString(L);
 #endif /* NDEBUG */
 
   DEBUG(dbgs() << "\nLV: Checking a loop in \""
                << L->getHeader()->getParent()->getName() << "\" from "
                << DebugLocStr << "\n");
 
   LoopVectorizeHints Hints(L, DisableUnrolling, *ORE);
 
   DEBUG(dbgs() << "LV: Loop hints:"
                << " force="
                << (Hints.getForce() == LoopVectorizeHints::FK_Disabled
                        ? "disabled"
                        : (Hints.getForce() == LoopVectorizeHints::FK_Enabled
                               ? "enabled"
                               : "?"))
                << " width=" << Hints.getWidth()
                << " unroll=" << Hints.getInterleave() << "\n");
 
   // Function containing loop
   Function *F = L->getHeader()->getParent();
 
   // Looking at the diagnostic output is the only way to determine if a loop
   // was vectorized (other than looking at the IR or machine code), so it
   // is important to generate an optimization remark for each loop. Most of
   // these messages are generated as OptimizationRemarkAnalysis. Remarks
   // generated as OptimizationRemark and OptimizationRemarkMissed are
   // less verbose reporting vectorized loops and unvectorized loops that may
   // benefit from vectorization, respectively.
 
   if (!Hints.allowVectorization(F, L, AlwaysVectorize)) {
     DEBUG(dbgs() << "LV: Loop hints prevent vectorization.\n");
     return false;
   }
 
   PredicatedScalarEvolution PSE(*SE, *L);
 
   // Check if it is legal to vectorize the loop.
   LoopVectorizationRequirements Requirements(*ORE);
   LoopVectorizationLegality LVL(L, PSE, DT, TLI, AA, F, TTI, GetLAA, LI, ORE,
-                                &Requirements, &Hints);
+                                &Requirements, &Hints, DB, AC);
   if (!LVL.canVectorize()) {
     DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
     emitMissedWarning(F, L, Hints, ORE);
     return false;
   }
 
   // Check the function attributes to find out if this function should be
   // optimized for size.
   bool OptForSize =
       Hints.getForce() != LoopVectorizeHints::FK_Enabled && F->optForSize();
 
   // Check the loop for a trip count threshold: vectorize loops with a tiny trip
   // count by optimizing for size, to minimize overheads.
   unsigned ExpectedTC = SE->getSmallConstantMaxTripCount(L);
   bool HasExpectedTC = (ExpectedTC > 0);
 
   if (!HasExpectedTC && LoopVectorizeWithBlockFrequency) {
     auto EstimatedTC = getLoopEstimatedTripCount(L);
     if (EstimatedTC) {
       ExpectedTC = *EstimatedTC;
       HasExpectedTC = true;
     }
   }
 
   if (HasExpectedTC && ExpectedTC < TinyTripCountVectorThreshold) {
     DEBUG(dbgs() << "LV: Found a loop with a very small trip count. "
                  << "This loop is worth vectorizing only if no scalar "
                  << "iteration overheads are incurred.");
     if (Hints.getForce() == LoopVectorizeHints::FK_Enabled)
       DEBUG(dbgs() << " But vectorizing was explicitly forced.\n");
     else {
       DEBUG(dbgs() << "\n");
       // Loops with a very small trip count are considered for vectorization
       // under OptForSize, thereby making sure the cost of their loop body is
       // dominant, free of runtime guards and scalar iteration overheads.
       OptForSize = true;
     }
   }
 
   // Check the function attributes to see if implicit floats are allowed.
   // FIXME: This check doesn't seem possibly correct -- what if the loop is
   // an integer loop and the vector instructions selected are purely integer
   // vector instructions?
   if (F->hasFnAttribute(Attribute::NoImplicitFloat)) {
     DEBUG(dbgs() << "LV: Can't vectorize when the NoImplicitFloat"
                     "attribute is used.\n");
     ORE->emit(createMissedAnalysis(Hints.vectorizeAnalysisPassName(),
                                    "NoImplicitFloat", L)
               << "loop not vectorized due to NoImplicitFloat attribute");
     emitMissedWarning(F, L, Hints, ORE);
     return false;
   }
 
   // Check if the target supports potentially unsafe FP vectorization.
   // FIXME: Add a check for the type of safety issue (denormal, signaling)
   // for the target we're vectorizing for, to make sure none of the
   // additional fp-math flags can help.
   if (Hints.isPotentiallyUnsafe() &&
       TTI->isFPVectorizationPotentiallyUnsafe()) {
     DEBUG(dbgs() << "LV: Potentially unsafe FP op prevents vectorization.\n");
     ORE->emit(
         createMissedAnalysis(Hints.vectorizeAnalysisPassName(), "UnsafeFP", L)
         << "loop not vectorized due to unsafe FP support.");
     emitMissedWarning(F, L, Hints, ORE);
     return false;
   }
 
   // Use the cost model.
   LoopVectorizationCostModel CM(L, PSE, LI, &LVL, *TTI, TLI, DB, AC, ORE, F,
                                 &Hints);
   CM.collectValuesToIgnore();
 
   // Use the planner for vectorization.
   LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM);
 
   // Get user vectorization factor.
   unsigned UserVF = Hints.getWidth();
 
   // Plan how to best vectorize, return the best VF and its cost.
   LoopVectorizationCostModel::VectorizationFactor VF =
       LVP.plan(OptForSize, UserVF);
 
   // Select the interleave count.
   unsigned IC = CM.selectInterleaveCount(OptForSize, VF.Width, VF.Cost);
 
   // Get user interleave count.
   unsigned UserIC = Hints.getInterleave();
 
   // Identify the diagnostic messages that should be produced.
   std::pair<StringRef, std::string> VecDiagMsg, IntDiagMsg;
   bool VectorizeLoop = true, InterleaveLoop = true;
   if (Requirements.doesNotMeet(F, L, Hints)) {
     DEBUG(dbgs() << "LV: Not vectorizing: loop did not meet vectorization "
                     "requirements.\n");
     emitMissedWarning(F, L, Hints, ORE);
     return false;
   }
 
   if (VF.Width == 1) {
     DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
     VecDiagMsg = std::make_pair(
         "VectorizationNotBeneficial",
         "the cost-model indicates that vectorization is not beneficial");
     VectorizeLoop = false;
   }
 
   if (IC == 1 && UserIC <= 1) {
     // Tell the user interleaving is not beneficial.
     DEBUG(dbgs() << "LV: Interleaving is not beneficial.\n");
     IntDiagMsg = std::make_pair(
         "InterleavingNotBeneficial",
         "the cost-model indicates that interleaving is not beneficial");
     InterleaveLoop = false;
     if (UserIC == 1) {
       IntDiagMsg.first = "InterleavingNotBeneficialAndDisabled";
       IntDiagMsg.second +=
           " and is explicitly disabled or interleave count is set to 1";
     }
   } else if (IC > 1 && UserIC == 1) {
     // Tell the user interleaving is beneficial, but it explicitly disabled.
     DEBUG(dbgs()
           << "LV: Interleaving is beneficial but is explicitly disabled.");
     IntDiagMsg = std::make_pair(
         "InterleavingBeneficialButDisabled",
         "the cost-model indicates that interleaving is beneficial "
         "but is explicitly disabled or interleave count is set to 1");
     InterleaveLoop = false;
   }
 
   // Override IC if user provided an interleave count.
   IC = UserIC > 0 ? UserIC : IC;
 
   // Emit diagnostic messages, if any.
   const char *VAPassName = Hints.vectorizeAnalysisPassName();
   if (!VectorizeLoop && !InterleaveLoop) {
     // Do not vectorize or interleaving the loop.
     ORE->emit([&]() {
       return OptimizationRemarkMissed(VAPassName, VecDiagMsg.first,
                                       L->getStartLoc(), L->getHeader())
              << VecDiagMsg.second;
     });
     ORE->emit([&]() {
       return OptimizationRemarkMissed(LV_NAME, IntDiagMsg.first,
                                       L->getStartLoc(), L->getHeader())
              << IntDiagMsg.second;
     });
     return false;
   } else if (!VectorizeLoop && InterleaveLoop) {
     DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n');
     ORE->emit([&]() {
       return OptimizationRemarkAnalysis(VAPassName, VecDiagMsg.first,
                                         L->getStartLoc(), L->getHeader())
              << VecDiagMsg.second;
     });
   } else if (VectorizeLoop && !InterleaveLoop) {
     DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in "
                  << DebugLocStr << '\n');
     ORE->emit([&]() {
       return OptimizationRemarkAnalysis(LV_NAME, IntDiagMsg.first,
                                         L->getStartLoc(), L->getHeader())
              << IntDiagMsg.second;
     });
   } else if (VectorizeLoop && InterleaveLoop) {
     DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in "
                  << DebugLocStr << '\n');
     DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n');
   }
 
   LVP.setBestPlan(VF.Width, IC);
 
   using namespace ore;
 
   if (!VectorizeLoop) {
     assert(IC > 1 && "interleave count should not be 1 or 0");
     // If we decided that it is not legal to vectorize the loop, then
     // interleave it.
     InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL,
                                &CM);
     LVP.executePlan(Unroller, DT);
 
     ORE->emit([&]() {
       return OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(),
                                 L->getHeader())
              << "interleaved loop (interleaved count: "
              << NV("InterleaveCount", IC) << ")";
     });
   } else {
     // If we decided that it is *legal* to vectorize the loop, then do it.
     InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, IC,
                            &LVL, &CM);
     LVP.executePlan(LB, DT);
     ++LoopsVectorized;
 
     // Add metadata to disable runtime unrolling a scalar loop when there are
     // no runtime checks about strides and memory. A scalar loop that is
     // rarely used is not worth unrolling.
     if (!LB.areSafetyChecksAdded())
       AddRuntimeUnrollDisableMetaData(L);
 
     // Report the vectorization decision.
     ORE->emit([&]() {
       return OptimizationRemark(LV_NAME, "Vectorized", L->getStartLoc(),
                                 L->getHeader())
              << "vectorized loop (vectorization width: "
              << NV("VectorizationFactor", VF.Width)
              << ", interleaved count: " << NV("InterleaveCount", IC) << ")";
     });
   }
 
   // Mark the loop as already vectorized to avoid vectorizing again.
   Hints.setAlreadyVectorized();
 
   DEBUG(verifyFunction(*L->getHeader()->getParent()));
   return true;
 }
 
 bool LoopVectorizePass::runImpl(
     Function &F, ScalarEvolution &SE_, LoopInfo &LI_, TargetTransformInfo &TTI_,
     DominatorTree &DT_, BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_,
     DemandedBits &DB_, AliasAnalysis &AA_, AssumptionCache &AC_,
     std::function<const LoopAccessInfo &(Loop &)> &GetLAA_,
     OptimizationRemarkEmitter &ORE_) {
   SE = &SE_;
   LI = &LI_;
   TTI = &TTI_;
   DT = &DT_;
   BFI = &BFI_;
   TLI = TLI_;
   AA = &AA_;
   AC = &AC_;
   GetLAA = &GetLAA_;
   DB = &DB_;
   ORE = &ORE_;
 
   // Don't attempt if
   // 1. the target claims to have no vector registers, and
   // 2. interleaving won't help ILP.
   //
   // The second condition is necessary because, even if the target has no
   // vector registers, loop vectorization may still enable scalar
   // interleaving.
   if (!TTI->getNumberOfRegisters(true) && TTI->getMaxInterleaveFactor(1) < 2)
     return false;
 
   bool Changed = false;
 
   // The vectorizer requires loops to be in simplified form.
   // Since simplification may add new inner loops, it has to run before the
   // legality and profitability checks. This means running the loop vectorizer
   // will simplify all loops, regardless of whether anything end up being
   // vectorized.
   for (auto &L : *LI)
     Changed |= simplifyLoop(L, DT, LI, SE, AC, false /* PreserveLCSSA */);
 
   // Build up a worklist of inner-loops to vectorize. This is necessary as
   // the act of vectorizing or partially unrolling a loop creates new loops
   // and can invalidate iterators across the loops.
   SmallVector<Loop *, 8> Worklist;
 
   for (Loop *L : *LI)
     addAcyclicInnerLoop(*L, Worklist);
 
   LoopsAnalyzed += Worklist.size();
 
   // Now walk the identified inner loops.
   while (!Worklist.empty()) {
     Loop *L = Worklist.pop_back_val();
 
     // For the inner loops we actually process, form LCSSA to simplify the
     // transform.
     Changed |= formLCSSARecursively(*L, *DT, LI, SE);
 
     Changed |= processLoop(L);
   }
 
   // Process each loop nest in the function.
   return Changed;
 }
 
 PreservedAnalyses LoopVectorizePass::run(Function &F,
                                          FunctionAnalysisManager &AM) {
     auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
     auto &LI = AM.getResult<LoopAnalysis>(F);
     auto &TTI = AM.getResult<TargetIRAnalysis>(F);
     auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
     auto &BFI = AM.getResult<BlockFrequencyAnalysis>(F);
     auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
     auto &AA = AM.getResult<AAManager>(F);
     auto &AC = AM.getResult<AssumptionAnalysis>(F);
     auto &DB = AM.getResult<DemandedBitsAnalysis>(F);
     auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
 
     auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
     std::function<const LoopAccessInfo &(Loop &)> GetLAA =
         [&](Loop &L) -> const LoopAccessInfo & {
       LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, nullptr};
       return LAM.getResult<LoopAccessAnalysis>(L, AR);
     };
     bool Changed =
         runImpl(F, SE, LI, TTI, DT, BFI, &TLI, DB, AA, AC, GetLAA, ORE);
     if (!Changed)
       return PreservedAnalyses::all();
     PreservedAnalyses PA;
     PA.preserve<LoopAnalysis>();
     PA.preserve<DominatorTreeAnalysis>();
     PA.preserve<BasicAA>();
     PA.preserve<GlobalsAA>();
     return PA;
 }
Index: head/contrib/llvm/tools/clang/lib/Basic/Targets/X86.cpp
===================================================================
--- head/contrib/llvm/tools/clang/lib/Basic/Targets/X86.cpp	(revision 329982)
+++ head/contrib/llvm/tools/clang/lib/Basic/Targets/X86.cpp	(revision 329983)
@@ -1,1602 +1,1603 @@
 //===--- X86.cpp - Implement X86 target feature support -------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // This file implements X86 TargetInfo objects.
 //
 //===----------------------------------------------------------------------===//
 
 #include "X86.h"
 #include "clang/Basic/Builtins.h"
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/TargetBuiltins.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
 
 namespace clang {
 namespace targets {
 
 const Builtin::Info BuiltinInfoX86[] = {
 #define BUILTIN(ID, TYPE, ATTRS)                                               \
   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
 #define TARGET_HEADER_BUILTIN(ID, TYPE, ATTRS, HEADER, LANGS, FEATURE)         \
   {#ID, TYPE, ATTRS, HEADER, LANGS, FEATURE},
 #include "clang/Basic/BuiltinsX86.def"
 
 #define BUILTIN(ID, TYPE, ATTRS)                                               \
   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
 #define TARGET_HEADER_BUILTIN(ID, TYPE, ATTRS, HEADER, LANGS, FEATURE)         \
   {#ID, TYPE, ATTRS, HEADER, LANGS, FEATURE},
 #include "clang/Basic/BuiltinsX86_64.def"
 };
 
 static const char *const GCCRegNames[] = {
     "ax",    "dx",    "cx",    "bx",    "si",      "di",    "bp",    "sp",
     "st",    "st(1)", "st(2)", "st(3)", "st(4)",   "st(5)", "st(6)", "st(7)",
     "argp",  "flags", "fpcr",  "fpsr",  "dirflag", "frame", "xmm0",  "xmm1",
     "xmm2",  "xmm3",  "xmm4",  "xmm5",  "xmm6",    "xmm7",  "mm0",   "mm1",
     "mm2",   "mm3",   "mm4",   "mm5",   "mm6",     "mm7",   "r8",    "r9",
     "r10",   "r11",   "r12",   "r13",   "r14",     "r15",   "xmm8",  "xmm9",
     "xmm10", "xmm11", "xmm12", "xmm13", "xmm14",   "xmm15", "ymm0",  "ymm1",
     "ymm2",  "ymm3",  "ymm4",  "ymm5",  "ymm6",    "ymm7",  "ymm8",  "ymm9",
     "ymm10", "ymm11", "ymm12", "ymm13", "ymm14",   "ymm15", "xmm16", "xmm17",
     "xmm18", "xmm19", "xmm20", "xmm21", "xmm22",   "xmm23", "xmm24", "xmm25",
     "xmm26", "xmm27", "xmm28", "xmm29", "xmm30",   "xmm31", "ymm16", "ymm17",
     "ymm18", "ymm19", "ymm20", "ymm21", "ymm22",   "ymm23", "ymm24", "ymm25",
     "ymm26", "ymm27", "ymm28", "ymm29", "ymm30",   "ymm31", "zmm0",  "zmm1",
     "zmm2",  "zmm3",  "zmm4",  "zmm5",  "zmm6",    "zmm7",  "zmm8",  "zmm9",
     "zmm10", "zmm11", "zmm12", "zmm13", "zmm14",   "zmm15", "zmm16", "zmm17",
     "zmm18", "zmm19", "zmm20", "zmm21", "zmm22",   "zmm23", "zmm24", "zmm25",
     "zmm26", "zmm27", "zmm28", "zmm29", "zmm30",   "zmm31", "k0",    "k1",
     "k2",    "k3",    "k4",    "k5",    "k6",      "k7",
     "cr0",   "cr2",   "cr3",   "cr4",   "cr8",
     "dr0",   "dr1",   "dr2",   "dr3",   "dr6",     "dr7",
     "bnd0",  "bnd1",  "bnd2",  "bnd3",
 };
 
 const TargetInfo::AddlRegName AddlRegNames[] = {
     {{"al", "ah", "eax", "rax"}, 0},
     {{"bl", "bh", "ebx", "rbx"}, 3},
     {{"cl", "ch", "ecx", "rcx"}, 2},
     {{"dl", "dh", "edx", "rdx"}, 1},
     {{"esi", "rsi"}, 4},
     {{"edi", "rdi"}, 5},
     {{"esp", "rsp"}, 7},
     {{"ebp", "rbp"}, 6},
     {{"r8d", "r8w", "r8b"}, 38},
     {{"r9d", "r9w", "r9b"}, 39},
     {{"r10d", "r10w", "r10b"}, 40},
     {{"r11d", "r11w", "r11b"}, 41},
     {{"r12d", "r12w", "r12b"}, 42},
     {{"r13d", "r13w", "r13b"}, 43},
     {{"r14d", "r14w", "r14b"}, 44},
     {{"r15d", "r15w", "r15b"}, 45},
 };
 
 } // namespace targets
 } // namespace clang
 
 using namespace clang;
 using namespace clang::targets;
 
 bool X86TargetInfo::setFPMath(StringRef Name) {
   if (Name == "387") {
     FPMath = FP_387;
     return true;
   }
   if (Name == "sse") {
     FPMath = FP_SSE;
     return true;
   }
   return false;
 }
 
 bool X86TargetInfo::initFeatureMap(
     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
     const std::vector<std::string> &FeaturesVec) const {
   // FIXME: This *really* should not be here.
   // X86_64 always has SSE2.
   if (getTriple().getArch() == llvm::Triple::x86_64)
     setFeatureEnabledImpl(Features, "sse2", true);
 
   const CPUKind Kind = getCPUKind(CPU);
 
   // Enable X87 for all X86 processors but Lakemont.
   if (Kind != CK_Lakemont)
     setFeatureEnabledImpl(Features, "x87", true);
 
   switch (Kind) {
   case CK_Generic:
   case CK_i386:
   case CK_i486:
   case CK_i586:
   case CK_Pentium:
   case CK_PentiumPro:
   case CK_Lakemont:
     break;
 
   case CK_PentiumMMX:
   case CK_Pentium2:
   case CK_K6:
   case CK_WinChipC6:
     setFeatureEnabledImpl(Features, "mmx", true);
     break;
 
   case CK_Icelake:
     setFeatureEnabledImpl(Features, "vaes", true);
     setFeatureEnabledImpl(Features, "gfni", true);
     setFeatureEnabledImpl(Features, "vpclmulqdq", true);
     setFeatureEnabledImpl(Features, "avx512bitalg", true);
     setFeatureEnabledImpl(Features, "avx512vnni", true);
     setFeatureEnabledImpl(Features, "avx512vbmi2", true);
     setFeatureEnabledImpl(Features, "avx512vpopcntdq", true);
     LLVM_FALLTHROUGH;
   case CK_Cannonlake:
     setFeatureEnabledImpl(Features, "avx512ifma", true);
     setFeatureEnabledImpl(Features, "avx512vbmi", true);
     setFeatureEnabledImpl(Features, "sha", true);
     LLVM_FALLTHROUGH;
   case CK_SkylakeServer:
     setFeatureEnabledImpl(Features, "avx512f", true);
     setFeatureEnabledImpl(Features, "avx512cd", true);
     setFeatureEnabledImpl(Features, "avx512dq", true);
     setFeatureEnabledImpl(Features, "avx512bw", true);
     setFeatureEnabledImpl(Features, "avx512vl", true);
     setFeatureEnabledImpl(Features, "pku", true);
-    setFeatureEnabledImpl(Features, "clwb", true);
+    if (Kind != CK_Cannonlake) // CNL inherits all SKX features, except CLWB
+      setFeatureEnabledImpl(Features, "clwb", true);
     LLVM_FALLTHROUGH;
   case CK_SkylakeClient:
     setFeatureEnabledImpl(Features, "xsavec", true);
     setFeatureEnabledImpl(Features, "xsaves", true);
     setFeatureEnabledImpl(Features, "mpx", true);
     setFeatureEnabledImpl(Features, "sgx", true);
     setFeatureEnabledImpl(Features, "clflushopt", true);
     setFeatureEnabledImpl(Features, "rtm", true);
     LLVM_FALLTHROUGH;
   case CK_Broadwell:
     setFeatureEnabledImpl(Features, "rdseed", true);
     setFeatureEnabledImpl(Features, "adx", true);
     setFeatureEnabledImpl(Features, "prfchw", true);
     LLVM_FALLTHROUGH;
   case CK_Haswell:
     setFeatureEnabledImpl(Features, "avx2", true);
     setFeatureEnabledImpl(Features, "lzcnt", true);
     setFeatureEnabledImpl(Features, "bmi", true);
     setFeatureEnabledImpl(Features, "bmi2", true);
     setFeatureEnabledImpl(Features, "fma", true);
     setFeatureEnabledImpl(Features, "movbe", true);
     LLVM_FALLTHROUGH;
   case CK_IvyBridge:
     setFeatureEnabledImpl(Features, "rdrnd", true);
     setFeatureEnabledImpl(Features, "f16c", true);
     setFeatureEnabledImpl(Features, "fsgsbase", true);
     LLVM_FALLTHROUGH;
   case CK_SandyBridge:
     setFeatureEnabledImpl(Features, "avx", true);
     setFeatureEnabledImpl(Features, "xsave", true);
     setFeatureEnabledImpl(Features, "xsaveopt", true);
     LLVM_FALLTHROUGH;
   case CK_Westmere:
     setFeatureEnabledImpl(Features, "aes", true);
     setFeatureEnabledImpl(Features, "pclmul", true);
     LLVM_FALLTHROUGH;
   case CK_Nehalem:
     setFeatureEnabledImpl(Features, "sse4.2", true);
     LLVM_FALLTHROUGH;
   case CK_Penryn:
     setFeatureEnabledImpl(Features, "sse4.1", true);
     LLVM_FALLTHROUGH;
   case CK_Core2:
     setFeatureEnabledImpl(Features, "ssse3", true);
     LLVM_FALLTHROUGH;
   case CK_Yonah:
   case CK_Prescott:
   case CK_Nocona:
     setFeatureEnabledImpl(Features, "sse3", true);
     setFeatureEnabledImpl(Features, "cx16", true);
     LLVM_FALLTHROUGH;
   case CK_PentiumM:
   case CK_Pentium4:
   case CK_x86_64:
     setFeatureEnabledImpl(Features, "sse2", true);
     LLVM_FALLTHROUGH;
   case CK_Pentium3:
   case CK_C3_2:
     setFeatureEnabledImpl(Features, "sse", true);
     setFeatureEnabledImpl(Features, "fxsr", true);
     break;
 
   case CK_Goldmont:
     setFeatureEnabledImpl(Features, "sha", true);
     setFeatureEnabledImpl(Features, "rdrnd", true);
     setFeatureEnabledImpl(Features, "rdseed", true);
     setFeatureEnabledImpl(Features, "xsave", true);
     setFeatureEnabledImpl(Features, "xsaveopt", true);
     setFeatureEnabledImpl(Features, "xsavec", true);
     setFeatureEnabledImpl(Features, "xsaves", true);
     setFeatureEnabledImpl(Features, "clflushopt", true);
     setFeatureEnabledImpl(Features, "mpx", true);
     setFeatureEnabledImpl(Features, "fsgsbase", true);
     LLVM_FALLTHROUGH;
   case CK_Silvermont:
     setFeatureEnabledImpl(Features, "aes", true);
     setFeatureEnabledImpl(Features, "pclmul", true);
     setFeatureEnabledImpl(Features, "sse4.2", true);
     setFeatureEnabledImpl(Features, "prfchw", true);
     LLVM_FALLTHROUGH;
   case CK_Bonnell:
     setFeatureEnabledImpl(Features, "movbe", true);
     setFeatureEnabledImpl(Features, "ssse3", true);
     setFeatureEnabledImpl(Features, "fxsr", true);
     setFeatureEnabledImpl(Features, "cx16", true);
     break;
 
   case CK_KNM:
     // TODO: Add avx5124fmaps/avx5124vnniw.
     setFeatureEnabledImpl(Features, "avx512vpopcntdq", true);
     LLVM_FALLTHROUGH;
   case CK_KNL:
     setFeatureEnabledImpl(Features, "avx512f", true);
     setFeatureEnabledImpl(Features, "avx512cd", true);
     setFeatureEnabledImpl(Features, "avx512er", true);
     setFeatureEnabledImpl(Features, "avx512pf", true);
     setFeatureEnabledImpl(Features, "prfchw", true);
     setFeatureEnabledImpl(Features, "prefetchwt1", true);
     setFeatureEnabledImpl(Features, "fxsr", true);
     setFeatureEnabledImpl(Features, "rdseed", true);
     setFeatureEnabledImpl(Features, "adx", true);
     setFeatureEnabledImpl(Features, "lzcnt", true);
     setFeatureEnabledImpl(Features, "bmi", true);
     setFeatureEnabledImpl(Features, "bmi2", true);
     setFeatureEnabledImpl(Features, "rtm", true);
     setFeatureEnabledImpl(Features, "fma", true);
     setFeatureEnabledImpl(Features, "rdrnd", true);
     setFeatureEnabledImpl(Features, "f16c", true);
     setFeatureEnabledImpl(Features, "fsgsbase", true);
     setFeatureEnabledImpl(Features, "aes", true);
     setFeatureEnabledImpl(Features, "pclmul", true);
     setFeatureEnabledImpl(Features, "cx16", true);
     setFeatureEnabledImpl(Features, "xsaveopt", true);
     setFeatureEnabledImpl(Features, "xsave", true);
     setFeatureEnabledImpl(Features, "movbe", true);
     break;
 
   case CK_K6_2:
   case CK_K6_3:
   case CK_WinChip2:
   case CK_C3:
     setFeatureEnabledImpl(Features, "3dnow", true);
     break;
 
   case CK_AMDFAM10:
     setFeatureEnabledImpl(Features, "sse4a", true);
     setFeatureEnabledImpl(Features, "lzcnt", true);
     setFeatureEnabledImpl(Features, "popcnt", true);
     LLVM_FALLTHROUGH;
   case CK_K8SSE3:
     setFeatureEnabledImpl(Features, "sse3", true);
     LLVM_FALLTHROUGH;
   case CK_K8:
     setFeatureEnabledImpl(Features, "sse2", true);
     LLVM_FALLTHROUGH;
   case CK_AthlonXP:
     setFeatureEnabledImpl(Features, "sse", true);
     setFeatureEnabledImpl(Features, "fxsr", true);
     LLVM_FALLTHROUGH;
   case CK_Athlon:
   case CK_Geode:
     setFeatureEnabledImpl(Features, "3dnowa", true);
     break;
 
   case CK_BTVER2:
     setFeatureEnabledImpl(Features, "avx", true);
     setFeatureEnabledImpl(Features, "aes", true);
     setFeatureEnabledImpl(Features, "pclmul", true);
     setFeatureEnabledImpl(Features, "bmi", true);
     setFeatureEnabledImpl(Features, "f16c", true);
     setFeatureEnabledImpl(Features, "xsaveopt", true);
     setFeatureEnabledImpl(Features, "movbe", true);
     LLVM_FALLTHROUGH;
   case CK_BTVER1:
     setFeatureEnabledImpl(Features, "ssse3", true);
     setFeatureEnabledImpl(Features, "sse4a", true);
     setFeatureEnabledImpl(Features, "lzcnt", true);
     setFeatureEnabledImpl(Features, "popcnt", true);
     setFeatureEnabledImpl(Features, "prfchw", true);
     setFeatureEnabledImpl(Features, "cx16", true);
     setFeatureEnabledImpl(Features, "fxsr", true);
     break;
 
   case CK_ZNVER1:
     setFeatureEnabledImpl(Features, "adx", true);
     setFeatureEnabledImpl(Features, "aes", true);
     setFeatureEnabledImpl(Features, "avx2", true);
     setFeatureEnabledImpl(Features, "bmi", true);
     setFeatureEnabledImpl(Features, "bmi2", true);
     setFeatureEnabledImpl(Features, "clflushopt", true);
     setFeatureEnabledImpl(Features, "clzero", true);
     setFeatureEnabledImpl(Features, "cx16", true);
     setFeatureEnabledImpl(Features, "f16c", true);
     setFeatureEnabledImpl(Features, "fma", true);
     setFeatureEnabledImpl(Features, "fsgsbase", true);
     setFeatureEnabledImpl(Features, "fxsr", true);
     setFeatureEnabledImpl(Features, "lzcnt", true);
     setFeatureEnabledImpl(Features, "mwaitx", true);
     setFeatureEnabledImpl(Features, "movbe", true);
     setFeatureEnabledImpl(Features, "pclmul", true);
     setFeatureEnabledImpl(Features, "popcnt", true);
     setFeatureEnabledImpl(Features, "prfchw", true);
     setFeatureEnabledImpl(Features, "rdrnd", true);
     setFeatureEnabledImpl(Features, "rdseed", true);
     setFeatureEnabledImpl(Features, "sha", true);
     setFeatureEnabledImpl(Features, "sse4a", true);
     setFeatureEnabledImpl(Features, "xsave", true);
     setFeatureEnabledImpl(Features, "xsavec", true);
     setFeatureEnabledImpl(Features, "xsaveopt", true);
     setFeatureEnabledImpl(Features, "xsaves", true);
     break;
 
   case CK_BDVER4:
     setFeatureEnabledImpl(Features, "avx2", true);
     setFeatureEnabledImpl(Features, "bmi2", true);
     setFeatureEnabledImpl(Features, "mwaitx", true);
     LLVM_FALLTHROUGH;
   case CK_BDVER3:
     setFeatureEnabledImpl(Features, "fsgsbase", true);
     setFeatureEnabledImpl(Features, "xsaveopt", true);
     LLVM_FALLTHROUGH;
   case CK_BDVER2:
     setFeatureEnabledImpl(Features, "bmi", true);
     setFeatureEnabledImpl(Features, "fma", true);
     setFeatureEnabledImpl(Features, "f16c", true);
     setFeatureEnabledImpl(Features, "tbm", true);
     LLVM_FALLTHROUGH;
   case CK_BDVER1:
     // xop implies avx, sse4a and fma4.
     setFeatureEnabledImpl(Features, "xop", true);
     setFeatureEnabledImpl(Features, "lwp", true);
     setFeatureEnabledImpl(Features, "lzcnt", true);
     setFeatureEnabledImpl(Features, "aes", true);
     setFeatureEnabledImpl(Features, "pclmul", true);
     setFeatureEnabledImpl(Features, "prfchw", true);
     setFeatureEnabledImpl(Features, "cx16", true);
     setFeatureEnabledImpl(Features, "fxsr", true);
     setFeatureEnabledImpl(Features, "xsave", true);
     break;
   }
   if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec))
     return false;
 
   // Can't do this earlier because we need to be able to explicitly enable
   // or disable these features and the things that they depend upon.
 
   // Enable popcnt if sse4.2 is enabled and popcnt is not explicitly disabled.
   auto I = Features.find("sse4.2");
   if (I != Features.end() && I->getValue() &&
       std::find(FeaturesVec.begin(), FeaturesVec.end(), "-popcnt") ==
           FeaturesVec.end())
     Features["popcnt"] = true;
 
   // Enable prfchw if 3DNow! is enabled and prfchw is not explicitly disabled.
   I = Features.find("3dnow");
   if (I != Features.end() && I->getValue() &&
       std::find(FeaturesVec.begin(), FeaturesVec.end(), "-prfchw") ==
           FeaturesVec.end())
     Features["prfchw"] = true;
 
   // Additionally, if SSE is enabled and mmx is not explicitly disabled,
   // then enable MMX.
   I = Features.find("sse");
   if (I != Features.end() && I->getValue() &&
       std::find(FeaturesVec.begin(), FeaturesVec.end(), "-mmx") ==
           FeaturesVec.end())
     Features["mmx"] = true;
 
   return true;
 }
 
 void X86TargetInfo::setSSELevel(llvm::StringMap<bool> &Features,
                                 X86SSEEnum Level, bool Enabled) {
   if (Enabled) {
     switch (Level) {
     case AVX512F:
       Features["avx512f"] = Features["fma"] = Features["f16c"] = true;
       LLVM_FALLTHROUGH;
     case AVX2:
       Features["avx2"] = true;
       LLVM_FALLTHROUGH;
     case AVX:
       Features["avx"] = true;
       Features["xsave"] = true;
       LLVM_FALLTHROUGH;
     case SSE42:
       Features["sse4.2"] = true;
       LLVM_FALLTHROUGH;
     case SSE41:
       Features["sse4.1"] = true;
       LLVM_FALLTHROUGH;
     case SSSE3:
       Features["ssse3"] = true;
       LLVM_FALLTHROUGH;
     case SSE3:
       Features["sse3"] = true;
       LLVM_FALLTHROUGH;
     case SSE2:
       Features["sse2"] = true;
       LLVM_FALLTHROUGH;
     case SSE1:
       Features["sse"] = true;
       LLVM_FALLTHROUGH;
     case NoSSE:
       break;
     }
     return;
   }
 
   switch (Level) {
   case NoSSE:
   case SSE1:
     Features["sse"] = false;
     LLVM_FALLTHROUGH;
   case SSE2:
     Features["sse2"] = Features["pclmul"] = Features["aes"] = Features["sha"] =
         Features["gfni"] = false;
     LLVM_FALLTHROUGH;
   case SSE3:
     Features["sse3"] = false;
     setXOPLevel(Features, NoXOP, false);
     LLVM_FALLTHROUGH;
   case SSSE3:
     Features["ssse3"] = false;
     LLVM_FALLTHROUGH;
   case SSE41:
     Features["sse4.1"] = false;
     LLVM_FALLTHROUGH;
   case SSE42:
     Features["sse4.2"] = false;
     LLVM_FALLTHROUGH;
   case AVX:
     Features["fma"] = Features["avx"] = Features["f16c"] = Features["xsave"] =
         Features["xsaveopt"] = Features["vaes"] = Features["vpclmulqdq"] = false;
     setXOPLevel(Features, FMA4, false);
     LLVM_FALLTHROUGH;
   case AVX2:
     Features["avx2"] = false;
     LLVM_FALLTHROUGH;
   case AVX512F:
     Features["avx512f"] = Features["avx512cd"] = Features["avx512er"] =
         Features["avx512pf"] = Features["avx512dq"] = Features["avx512bw"] =
             Features["avx512vl"] = Features["avx512vbmi"] =
                 Features["avx512ifma"] = Features["avx512vpopcntdq"] =
                     Features["avx512bitalg"] = Features["avx512vnni"] =
                         Features["avx512vbmi2"] = false;
     break;
   }
 }
 
 void X86TargetInfo::setMMXLevel(llvm::StringMap<bool> &Features,
                                 MMX3DNowEnum Level, bool Enabled) {
   if (Enabled) {
     switch (Level) {
     case AMD3DNowAthlon:
       Features["3dnowa"] = true;
       LLVM_FALLTHROUGH;
     case AMD3DNow:
       Features["3dnow"] = true;
       LLVM_FALLTHROUGH;
     case MMX:
       Features["mmx"] = true;
       LLVM_FALLTHROUGH;
     case NoMMX3DNow:
       break;
     }
     return;
   }
 
   switch (Level) {
   case NoMMX3DNow:
   case MMX:
     Features["mmx"] = false;
     LLVM_FALLTHROUGH;
   case AMD3DNow:
     Features["3dnow"] = false;
     LLVM_FALLTHROUGH;
   case AMD3DNowAthlon:
     Features["3dnowa"] = false;
     break;
   }
 }
 
 void X86TargetInfo::setXOPLevel(llvm::StringMap<bool> &Features, XOPEnum Level,
                                 bool Enabled) {
   if (Enabled) {
     switch (Level) {
     case XOP:
       Features["xop"] = true;
       LLVM_FALLTHROUGH;
     case FMA4:
       Features["fma4"] = true;
       setSSELevel(Features, AVX, true);
       LLVM_FALLTHROUGH;
     case SSE4A:
       Features["sse4a"] = true;
       setSSELevel(Features, SSE3, true);
       LLVM_FALLTHROUGH;
     case NoXOP:
       break;
     }
     return;
   }
 
   switch (Level) {
   case NoXOP:
   case SSE4A:
     Features["sse4a"] = false;
     LLVM_FALLTHROUGH;
   case FMA4:
     Features["fma4"] = false;
     LLVM_FALLTHROUGH;
   case XOP:
     Features["xop"] = false;
     break;
   }
 }
 
 void X86TargetInfo::setFeatureEnabledImpl(llvm::StringMap<bool> &Features,
                                           StringRef Name, bool Enabled) {
   // This is a bit of a hack to deal with the sse4 target feature when used
   // as part of the target attribute. We handle sse4 correctly everywhere
   // else. See below for more information on how we handle the sse4 options.
   if (Name != "sse4")
     Features[Name] = Enabled;
 
   if (Name == "mmx") {
     setMMXLevel(Features, MMX, Enabled);
   } else if (Name == "sse") {
     setSSELevel(Features, SSE1, Enabled);
   } else if (Name == "sse2") {
     setSSELevel(Features, SSE2, Enabled);
   } else if (Name == "sse3") {
     setSSELevel(Features, SSE3, Enabled);
   } else if (Name == "ssse3") {
     setSSELevel(Features, SSSE3, Enabled);
   } else if (Name == "sse4.2") {
     setSSELevel(Features, SSE42, Enabled);
   } else if (Name == "sse4.1") {
     setSSELevel(Features, SSE41, Enabled);
   } else if (Name == "3dnow") {
     setMMXLevel(Features, AMD3DNow, Enabled);
   } else if (Name == "3dnowa") {
     setMMXLevel(Features, AMD3DNowAthlon, Enabled);
   } else if (Name == "aes") {
     if (Enabled)
       setSSELevel(Features, SSE2, Enabled);
     else
       Features["vaes"] = false;
   } else if (Name == "vaes") {
     if (Enabled) {
       setSSELevel(Features, AVX, Enabled);
       Features["aes"] = true;
     }
   } else if (Name == "pclmul") {
     if (Enabled)
       setSSELevel(Features, SSE2, Enabled);
     else
       Features["vpclmulqdq"] = false;
   } else if (Name == "vpclmulqdq") {
     if (Enabled) {
       setSSELevel(Features, AVX, Enabled);
       Features["pclmul"] = true;
     }
   } else if (Name == "gfni") {
      if (Enabled)
       setSSELevel(Features, SSE2, Enabled);
   } else if (Name == "avx") {
     setSSELevel(Features, AVX, Enabled);
   } else if (Name == "avx2") {
     setSSELevel(Features, AVX2, Enabled);
   } else if (Name == "avx512f") {
     setSSELevel(Features, AVX512F, Enabled);
   } else if (Name == "avx512cd" || Name == "avx512er" || Name == "avx512pf" ||
              Name == "avx512dq" || Name == "avx512bw" || Name == "avx512vl" ||
              Name == "avx512vbmi" || Name == "avx512ifma" ||
              Name == "avx512vpopcntdq" || Name == "avx512bitalg" ||
              Name == "avx512vnni" || Name == "avx512vbmi2") {
     if (Enabled)
       setSSELevel(Features, AVX512F, Enabled);
     // Enable BWI instruction if VBMI/VBMI2/BITALG is being enabled.
     if ((Name.startswith("avx512vbmi") || Name == "avx512bitalg") && Enabled)
       Features["avx512bw"] = true;
     // Also disable VBMI/VBMI2/BITALG if BWI is being disabled.
     if (Name == "avx512bw" && !Enabled)
       Features["avx512vbmi"] = Features["avx512vbmi2"] =
       Features["avx512bitalg"] = false;
   } else if (Name == "fma") {
     if (Enabled)
       setSSELevel(Features, AVX, Enabled);
     else
       setSSELevel(Features, AVX512F, Enabled);
   } else if (Name == "fma4") {
     setXOPLevel(Features, FMA4, Enabled);
   } else if (Name == "xop") {
     setXOPLevel(Features, XOP, Enabled);
   } else if (Name == "sse4a") {
     setXOPLevel(Features, SSE4A, Enabled);
   } else if (Name == "f16c") {
     if (Enabled)
       setSSELevel(Features, AVX, Enabled);
     else
       setSSELevel(Features, AVX512F, Enabled);
   } else if (Name == "sha") {
     if (Enabled)
       setSSELevel(Features, SSE2, Enabled);
   } else if (Name == "sse4") {
     // We can get here via the __target__ attribute since that's not controlled
     // via the -msse4/-mno-sse4 command line alias. Handle this the same way
     // here - turn on the sse4.2 if enabled, turn off the sse4.1 level if
     // disabled.
     if (Enabled)
       setSSELevel(Features, SSE42, Enabled);
     else
       setSSELevel(Features, SSE41, Enabled);
   } else if (Name == "xsave") {
     if (!Enabled)
       Features["xsaveopt"] = false;
   } else if (Name == "xsaveopt" || Name == "xsavec" || Name == "xsaves") {
     if (Enabled)
       Features["xsave"] = true;
   }
 }
 
 /// handleTargetFeatures - Perform initialization based on the user
 /// configured set of features.
 bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
                                          DiagnosticsEngine &Diags) {
   for (const auto &Feature : Features) {
     if (Feature[0] != '+')
       continue;
 
     if (Feature == "+aes") {
       HasAES = true;
     } else if (Feature == "+vaes") {
       HasVAES = true;
     } else if (Feature == "+pclmul") {
       HasPCLMUL = true;
     } else if (Feature == "+vpclmulqdq") {
       HasVPCLMULQDQ = true;
     } else if (Feature == "+lzcnt") {
       HasLZCNT = true;
     } else if (Feature == "+rdrnd") {
       HasRDRND = true;
     } else if (Feature == "+fsgsbase") {
       HasFSGSBASE = true;
     } else if (Feature == "+bmi") {
       HasBMI = true;
     } else if (Feature == "+bmi2") {
       HasBMI2 = true;
     } else if (Feature == "+popcnt") {
       HasPOPCNT = true;
     } else if (Feature == "+rtm") {
       HasRTM = true;
     } else if (Feature == "+prfchw") {
       HasPRFCHW = true;
     } else if (Feature == "+rdseed") {
       HasRDSEED = true;
     } else if (Feature == "+adx") {
       HasADX = true;
     } else if (Feature == "+tbm") {
       HasTBM = true;
     } else if (Feature == "+lwp") {
       HasLWP = true;
     } else if (Feature == "+fma") {
       HasFMA = true;
     } else if (Feature == "+f16c") {
       HasF16C = true;
     } else if (Feature == "+gfni") {
       HasGFNI = true;
     } else if (Feature == "+avx512cd") {
       HasAVX512CD = true;
     } else if (Feature == "+avx512vpopcntdq") {
       HasAVX512VPOPCNTDQ = true;
     } else if (Feature == "+avx512vnni") {
       HasAVX512VNNI = true;
     } else if (Feature == "+avx512er") {
       HasAVX512ER = true;
     } else if (Feature == "+avx512pf") {
       HasAVX512PF = true;
     } else if (Feature == "+avx512dq") {
       HasAVX512DQ = true;
     } else if (Feature == "+avx512bitalg") {
       HasAVX512BITALG = true;
     } else if (Feature == "+avx512bw") {
       HasAVX512BW = true;
     } else if (Feature == "+avx512vl") {
       HasAVX512VL = true;
     } else if (Feature == "+avx512vbmi") {
       HasAVX512VBMI = true;
     } else if (Feature == "+avx512vbmi2") {
       HasAVX512VBMI2 = true;
     } else if (Feature == "+avx512ifma") {
       HasAVX512IFMA = true;
     } else if (Feature == "+sha") {
       HasSHA = true;
     } else if (Feature == "+mpx") {
       HasMPX = true;
     } else if (Feature == "+shstk") {
       HasSHSTK = true;
     } else if (Feature == "+ibt") {
       HasIBT = true;
     } else if (Feature == "+movbe") {
       HasMOVBE = true;
     } else if (Feature == "+sgx") {
       HasSGX = true;
     } else if (Feature == "+cx16") {
       HasCX16 = true;
     } else if (Feature == "+fxsr") {
       HasFXSR = true;
     } else if (Feature == "+xsave") {
       HasXSAVE = true;
     } else if (Feature == "+xsaveopt") {
       HasXSAVEOPT = true;
     } else if (Feature == "+xsavec") {
       HasXSAVEC = true;
     } else if (Feature == "+xsaves") {
       HasXSAVES = true;
     } else if (Feature == "+mwaitx") {
       HasMWAITX = true;
     } else if (Feature == "+pku") {
       HasPKU = true;
     } else if (Feature == "+clflushopt") {
       HasCLFLUSHOPT = true;
     } else if (Feature == "+clwb") {
       HasCLWB = true;
     } else if (Feature == "+prefetchwt1") {
       HasPREFETCHWT1 = true;
     } else if (Feature == "+clzero") {
       HasCLZERO = true;
     } else if (Feature == "+retpoline") {
       HasRetpoline = true;
     } else if (Feature == "+retpoline-external-thunk") {
       HasRetpolineExternalThunk = true;
     }
 
     X86SSEEnum Level = llvm::StringSwitch<X86SSEEnum>(Feature)
                            .Case("+avx512f", AVX512F)
                            .Case("+avx2", AVX2)
                            .Case("+avx", AVX)
                            .Case("+sse4.2", SSE42)
                            .Case("+sse4.1", SSE41)
                            .Case("+ssse3", SSSE3)
                            .Case("+sse3", SSE3)
                            .Case("+sse2", SSE2)
                            .Case("+sse", SSE1)
                            .Default(NoSSE);
     SSELevel = std::max(SSELevel, Level);
 
     MMX3DNowEnum ThreeDNowLevel = llvm::StringSwitch<MMX3DNowEnum>(Feature)
                                       .Case("+3dnowa", AMD3DNowAthlon)
                                       .Case("+3dnow", AMD3DNow)
                                       .Case("+mmx", MMX)
                                       .Default(NoMMX3DNow);
     MMX3DNowLevel = std::max(MMX3DNowLevel, ThreeDNowLevel);
 
     XOPEnum XLevel = llvm::StringSwitch<XOPEnum>(Feature)
                          .Case("+xop", XOP)
                          .Case("+fma4", FMA4)
                          .Case("+sse4a", SSE4A)
                          .Default(NoXOP);
     XOPLevel = std::max(XOPLevel, XLevel);
   }
 
   // LLVM doesn't have a separate switch for fpmath, so only accept it if it
   // matches the selected sse level.
   if ((FPMath == FP_SSE && SSELevel < SSE1) ||
       (FPMath == FP_387 && SSELevel >= SSE1)) {
     Diags.Report(diag::err_target_unsupported_fpmath)
         << (FPMath == FP_SSE ? "sse" : "387");
     return false;
   }
 
   SimdDefaultAlign =
       hasFeature("avx512f") ? 512 : hasFeature("avx") ? 256 : 128;
   return true;
 }
 
 /// X86TargetInfo::getTargetDefines - Return the set of the X86-specific macro
 /// definitions for this particular subtarget.
 void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
                                      MacroBuilder &Builder) const {
   // Target identification.
   if (getTriple().getArch() == llvm::Triple::x86_64) {
     Builder.defineMacro("__amd64__");
     Builder.defineMacro("__amd64");
     Builder.defineMacro("__x86_64");
     Builder.defineMacro("__x86_64__");
     if (getTriple().getArchName() == "x86_64h") {
       Builder.defineMacro("__x86_64h");
       Builder.defineMacro("__x86_64h__");
     }
   } else {
     DefineStd(Builder, "i386", Opts);
   }
 
   // Subtarget options.
   // FIXME: We are hard-coding the tune parameters based on the CPU, but they
   // truly should be based on -mtune options.
   switch (CPU) {
   case CK_Generic:
     break;
   case CK_i386:
     // The rest are coming from the i386 define above.
     Builder.defineMacro("__tune_i386__");
     break;
   case CK_i486:
   case CK_WinChipC6:
   case CK_WinChip2:
   case CK_C3:
     defineCPUMacros(Builder, "i486");
     break;
   case CK_PentiumMMX:
     Builder.defineMacro("__pentium_mmx__");
     Builder.defineMacro("__tune_pentium_mmx__");
     LLVM_FALLTHROUGH;
   case CK_i586:
   case CK_Pentium:
     defineCPUMacros(Builder, "i586");
     defineCPUMacros(Builder, "pentium");
     break;
   case CK_Pentium3:
   case CK_PentiumM:
     Builder.defineMacro("__tune_pentium3__");
     LLVM_FALLTHROUGH;
   case CK_Pentium2:
   case CK_C3_2:
     Builder.defineMacro("__tune_pentium2__");
     LLVM_FALLTHROUGH;
   case CK_PentiumPro:
     defineCPUMacros(Builder, "i686");
     defineCPUMacros(Builder, "pentiumpro");
     break;
   case CK_Pentium4:
     defineCPUMacros(Builder, "pentium4");
     break;
   case CK_Yonah:
   case CK_Prescott:
   case CK_Nocona:
     defineCPUMacros(Builder, "nocona");
     break;
   case CK_Core2:
   case CK_Penryn:
     defineCPUMacros(Builder, "core2");
     break;
   case CK_Bonnell:
     defineCPUMacros(Builder, "atom");
     break;
   case CK_Silvermont:
     defineCPUMacros(Builder, "slm");
     break;
   case CK_Goldmont:
     defineCPUMacros(Builder, "goldmont");
     break;
   case CK_Nehalem:
   case CK_Westmere:
   case CK_SandyBridge:
   case CK_IvyBridge:
   case CK_Haswell:
   case CK_Broadwell:
   case CK_SkylakeClient:
   case CK_SkylakeServer:
   case CK_Cannonlake:
   case CK_Icelake:
     // FIXME: Historically, we defined this legacy name, it would be nice to
     // remove it at some point. We've never exposed fine-grained names for
     // recent primary x86 CPUs, and we should keep it that way.
     defineCPUMacros(Builder, "corei7");
     break;
   case CK_KNL:
     defineCPUMacros(Builder, "knl");
     break;
   case CK_KNM:
     break;
   case CK_Lakemont:
     defineCPUMacros(Builder, "i586", /*Tuning*/false);
     defineCPUMacros(Builder, "pentium", /*Tuning*/false);
     Builder.defineMacro("__tune_lakemont__");
     break;
   case CK_K6_2:
     Builder.defineMacro("__k6_2__");
     Builder.defineMacro("__tune_k6_2__");
     LLVM_FALLTHROUGH;
   case CK_K6_3:
     if (CPU != CK_K6_2) { // In case of fallthrough
       // FIXME: GCC may be enabling these in cases where some other k6
       // architecture is specified but -m3dnow is explicitly provided. The
       // exact semantics need to be determined and emulated here.
       Builder.defineMacro("__k6_3__");
       Builder.defineMacro("__tune_k6_3__");
     }
     LLVM_FALLTHROUGH;
   case CK_K6:
     defineCPUMacros(Builder, "k6");
     break;
   case CK_Athlon:
   case CK_AthlonXP:
     defineCPUMacros(Builder, "athlon");
     if (SSELevel != NoSSE) {
       Builder.defineMacro("__athlon_sse__");
       Builder.defineMacro("__tune_athlon_sse__");
     }
     break;
   case CK_K8:
   case CK_K8SSE3:
   case CK_x86_64:
     defineCPUMacros(Builder, "k8");
     break;
   case CK_AMDFAM10:
     defineCPUMacros(Builder, "amdfam10");
     break;
   case CK_BTVER1:
     defineCPUMacros(Builder, "btver1");
     break;
   case CK_BTVER2:
     defineCPUMacros(Builder, "btver2");
     break;
   case CK_BDVER1:
     defineCPUMacros(Builder, "bdver1");
     break;
   case CK_BDVER2:
     defineCPUMacros(Builder, "bdver2");
     break;
   case CK_BDVER3:
     defineCPUMacros(Builder, "bdver3");
     break;
   case CK_BDVER4:
     defineCPUMacros(Builder, "bdver4");
     break;
   case CK_ZNVER1:
     defineCPUMacros(Builder, "znver1");
     break;
   case CK_Geode:
     defineCPUMacros(Builder, "geode");
     break;
   }
 
   // Target properties.
   Builder.defineMacro("__REGISTER_PREFIX__", "");
 
   // Define __NO_MATH_INLINES on linux/x86 so that we don't get inline
   // functions in glibc header files that use FP Stack inline asm which the
   // backend can't deal with (PR879).
   Builder.defineMacro("__NO_MATH_INLINES");
 
   if (HasAES)
     Builder.defineMacro("__AES__");
 
   if (HasVAES)
     Builder.defineMacro("__VAES__");
 
   if (HasPCLMUL)
     Builder.defineMacro("__PCLMUL__");
 
   if (HasVPCLMULQDQ)
     Builder.defineMacro("__VPCLMULQDQ__");
 
   if (HasLZCNT)
     Builder.defineMacro("__LZCNT__");
 
   if (HasRDRND)
     Builder.defineMacro("__RDRND__");
 
   if (HasFSGSBASE)
     Builder.defineMacro("__FSGSBASE__");
 
   if (HasBMI)
     Builder.defineMacro("__BMI__");
 
   if (HasBMI2)
     Builder.defineMacro("__BMI2__");
 
   if (HasPOPCNT)
     Builder.defineMacro("__POPCNT__");
 
   if (HasRTM)
     Builder.defineMacro("__RTM__");
 
   if (HasPRFCHW)
     Builder.defineMacro("__PRFCHW__");
 
   if (HasRDSEED)
     Builder.defineMacro("__RDSEED__");
 
   if (HasADX)
     Builder.defineMacro("__ADX__");
 
   if (HasTBM)
     Builder.defineMacro("__TBM__");
 
   if (HasLWP)
     Builder.defineMacro("__LWP__");
 
   if (HasMWAITX)
     Builder.defineMacro("__MWAITX__");
 
   switch (XOPLevel) {
   case XOP:
     Builder.defineMacro("__XOP__");
     LLVM_FALLTHROUGH;
   case FMA4:
     Builder.defineMacro("__FMA4__");
     LLVM_FALLTHROUGH;
   case SSE4A:
     Builder.defineMacro("__SSE4A__");
     LLVM_FALLTHROUGH;
   case NoXOP:
     break;
   }
 
   if (HasFMA)
     Builder.defineMacro("__FMA__");
 
   if (HasF16C)
     Builder.defineMacro("__F16C__");
 
   if (HasGFNI)
     Builder.defineMacro("__GFNI__");
 
   if (HasAVX512CD)
     Builder.defineMacro("__AVX512CD__");
   if (HasAVX512VPOPCNTDQ)
     Builder.defineMacro("__AVX512VPOPCNTDQ__");
   if (HasAVX512VNNI)
     Builder.defineMacro("__AVX512VNNI__");
   if (HasAVX512ER)
     Builder.defineMacro("__AVX512ER__");
   if (HasAVX512PF)
     Builder.defineMacro("__AVX512PF__");
   if (HasAVX512DQ)
     Builder.defineMacro("__AVX512DQ__");
   if (HasAVX512BITALG)
     Builder.defineMacro("__AVX512BITALG__");
   if (HasAVX512BW)
     Builder.defineMacro("__AVX512BW__");
   if (HasAVX512VL)
     Builder.defineMacro("__AVX512VL__");
   if (HasAVX512VBMI)
     Builder.defineMacro("__AVX512VBMI__");
   if (HasAVX512VBMI2)
     Builder.defineMacro("__AVX512VBMI2__");
   if (HasAVX512IFMA)
     Builder.defineMacro("__AVX512IFMA__");
 
   if (HasSHA)
     Builder.defineMacro("__SHA__");
 
   if (HasFXSR)
     Builder.defineMacro("__FXSR__");
   if (HasXSAVE)
     Builder.defineMacro("__XSAVE__");
   if (HasXSAVEOPT)
     Builder.defineMacro("__XSAVEOPT__");
   if (HasXSAVEC)
     Builder.defineMacro("__XSAVEC__");
   if (HasXSAVES)
     Builder.defineMacro("__XSAVES__");
   if (HasPKU)
     Builder.defineMacro("__PKU__");
   if (HasCX16)
     Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16");
   if (HasCLFLUSHOPT)
     Builder.defineMacro("__CLFLUSHOPT__");
   if (HasCLWB)
     Builder.defineMacro("__CLWB__");
   if (HasMPX)
     Builder.defineMacro("__MPX__");
   if (HasSHSTK)
     Builder.defineMacro("__SHSTK__");
   if (HasSGX)
     Builder.defineMacro("__SGX__");
   if (HasPREFETCHWT1)
     Builder.defineMacro("__PREFETCHWT1__");
   if (HasCLZERO)
     Builder.defineMacro("__CLZERO__");
 
   // Each case falls through to the previous one here.
   switch (SSELevel) {
   case AVX512F:
     Builder.defineMacro("__AVX512F__");
     LLVM_FALLTHROUGH;
   case AVX2:
     Builder.defineMacro("__AVX2__");
     LLVM_FALLTHROUGH;
   case AVX:
     Builder.defineMacro("__AVX__");
     LLVM_FALLTHROUGH;
   case SSE42:
     Builder.defineMacro("__SSE4_2__");
     LLVM_FALLTHROUGH;
   case SSE41:
     Builder.defineMacro("__SSE4_1__");
     LLVM_FALLTHROUGH;
   case SSSE3:
     Builder.defineMacro("__SSSE3__");
     LLVM_FALLTHROUGH;
   case SSE3:
     Builder.defineMacro("__SSE3__");
     LLVM_FALLTHROUGH;
   case SSE2:
     Builder.defineMacro("__SSE2__");
     Builder.defineMacro("__SSE2_MATH__"); // -mfp-math=sse always implied.
     LLVM_FALLTHROUGH;
   case SSE1:
     Builder.defineMacro("__SSE__");
     Builder.defineMacro("__SSE_MATH__"); // -mfp-math=sse always implied.
     LLVM_FALLTHROUGH;
   case NoSSE:
     break;
   }
 
   if (Opts.MicrosoftExt && getTriple().getArch() == llvm::Triple::x86) {
     switch (SSELevel) {
     case AVX512F:
     case AVX2:
     case AVX:
     case SSE42:
     case SSE41:
     case SSSE3:
     case SSE3:
     case SSE2:
       Builder.defineMacro("_M_IX86_FP", Twine(2));
       break;
     case SSE1:
       Builder.defineMacro("_M_IX86_FP", Twine(1));
       break;
     default:
       Builder.defineMacro("_M_IX86_FP", Twine(0));
       break;
     }
   }
 
   // Each case falls through to the previous one here.
   switch (MMX3DNowLevel) {
   case AMD3DNowAthlon:
     Builder.defineMacro("__3dNOW_A__");
     LLVM_FALLTHROUGH;
   case AMD3DNow:
     Builder.defineMacro("__3dNOW__");
     LLVM_FALLTHROUGH;
   case MMX:
     Builder.defineMacro("__MMX__");
     LLVM_FALLTHROUGH;
   case NoMMX3DNow:
     break;
   }
 
   if (CPU >= CK_i486) {
     Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
     Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
     Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4");
   }
   if (CPU >= CK_i586)
     Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8");
 
   if (HasFloat128)
     Builder.defineMacro("__SIZEOF_FLOAT128__", "16");
 }
 
 bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
   return llvm::StringSwitch<bool>(Name)
       .Case("3dnow", true)
       .Case("3dnowa", true)
       .Case("adx", true)
       .Case("aes", true)
       .Case("avx", true)
       .Case("avx2", true)
       .Case("avx512f", true)
       .Case("avx512cd", true)
       .Case("avx512vpopcntdq", true)
       .Case("avx512vnni", true)
       .Case("avx512er", true)
       .Case("avx512pf", true)
       .Case("avx512dq", true)
       .Case("avx512bitalg", true)
       .Case("avx512bw", true)
       .Case("avx512vl", true)
       .Case("avx512vbmi", true)
       .Case("avx512vbmi2", true)
       .Case("avx512ifma", true)
       .Case("bmi", true)
       .Case("bmi2", true)
       .Case("clflushopt", true)
       .Case("clwb", true)
       .Case("clzero", true)
       .Case("cx16", true)
       .Case("f16c", true)
       .Case("fma", true)
       .Case("fma4", true)
       .Case("fsgsbase", true)
       .Case("fxsr", true)
       .Case("gfni", true)
       .Case("lwp", true)
       .Case("lzcnt", true)
       .Case("mmx", true)
       .Case("movbe", true)
       .Case("mpx", true)
       .Case("mwaitx", true)
       .Case("pclmul", true)
       .Case("pku", true)
       .Case("popcnt", true)
       .Case("prefetchwt1", true)
       .Case("prfchw", true)
       .Case("rdrnd", true)
       .Case("rdseed", true)
       .Case("rtm", true)
       .Case("sgx", true)
       .Case("sha", true)
       .Case("shstk", true)
       .Case("sse", true)
       .Case("sse2", true)
       .Case("sse3", true)
       .Case("ssse3", true)
       .Case("sse4", true)
       .Case("sse4.1", true)
       .Case("sse4.2", true)
       .Case("sse4a", true)
       .Case("tbm", true)
       .Case("vaes", true)
       .Case("vpclmulqdq", true)
       .Case("x87", true)
       .Case("xop", true)
       .Case("xsave", true)
       .Case("xsavec", true)
       .Case("xsaves", true)
       .Case("xsaveopt", true)
       .Default(false);
 }
 
 bool X86TargetInfo::hasFeature(StringRef Feature) const {
   return llvm::StringSwitch<bool>(Feature)
       .Case("adx", HasADX)
       .Case("aes", HasAES)
       .Case("avx", SSELevel >= AVX)
       .Case("avx2", SSELevel >= AVX2)
       .Case("avx512f", SSELevel >= AVX512F)
       .Case("avx512cd", HasAVX512CD)
       .Case("avx512vpopcntdq", HasAVX512VPOPCNTDQ)
       .Case("avx512vnni", HasAVX512VNNI)
       .Case("avx512er", HasAVX512ER)
       .Case("avx512pf", HasAVX512PF)
       .Case("avx512dq", HasAVX512DQ)
       .Case("avx512bitalg", HasAVX512BITALG)
       .Case("avx512bw", HasAVX512BW)
       .Case("avx512vl", HasAVX512VL)
       .Case("avx512vbmi", HasAVX512VBMI)
       .Case("avx512vbmi2", HasAVX512VBMI2)
       .Case("avx512ifma", HasAVX512IFMA)
       .Case("bmi", HasBMI)
       .Case("bmi2", HasBMI2)
       .Case("clflushopt", HasCLFLUSHOPT)
       .Case("clwb", HasCLWB)
       .Case("clzero", HasCLZERO)
       .Case("cx16", HasCX16)
       .Case("f16c", HasF16C)
       .Case("fma", HasFMA)
       .Case("fma4", XOPLevel >= FMA4)
       .Case("fsgsbase", HasFSGSBASE)
       .Case("fxsr", HasFXSR)
       .Case("gfni", HasGFNI)
       .Case("ibt", HasIBT)
       .Case("lwp", HasLWP)
       .Case("lzcnt", HasLZCNT)
       .Case("mm3dnow", MMX3DNowLevel >= AMD3DNow)
       .Case("mm3dnowa", MMX3DNowLevel >= AMD3DNowAthlon)
       .Case("mmx", MMX3DNowLevel >= MMX)
       .Case("movbe", HasMOVBE)
       .Case("mpx", HasMPX)
       .Case("mwaitx", HasMWAITX)
       .Case("pclmul", HasPCLMUL)
       .Case("pku", HasPKU)
       .Case("popcnt", HasPOPCNT)
       .Case("prefetchwt1", HasPREFETCHWT1)
       .Case("prfchw", HasPRFCHW)
       .Case("rdrnd", HasRDRND)
       .Case("rdseed", HasRDSEED)
       .Case("retpoline", HasRetpoline)
       .Case("retpoline-external-thunk", HasRetpolineExternalThunk)
       .Case("rtm", HasRTM)
       .Case("sgx", HasSGX)
       .Case("sha", HasSHA)
       .Case("shstk", HasSHSTK)
       .Case("sse", SSELevel >= SSE1)
       .Case("sse2", SSELevel >= SSE2)
       .Case("sse3", SSELevel >= SSE3)
       .Case("ssse3", SSELevel >= SSSE3)
       .Case("sse4.1", SSELevel >= SSE41)
       .Case("sse4.2", SSELevel >= SSE42)
       .Case("sse4a", XOPLevel >= SSE4A)
       .Case("tbm", HasTBM)
       .Case("vaes", HasVAES)
       .Case("vpclmulqdq", HasVPCLMULQDQ)
       .Case("x86", true)
       .Case("x86_32", getTriple().getArch() == llvm::Triple::x86)
       .Case("x86_64", getTriple().getArch() == llvm::Triple::x86_64)
       .Case("xop", XOPLevel >= XOP)
       .Case("xsave", HasXSAVE)
       .Case("xsavec", HasXSAVEC)
       .Case("xsaves", HasXSAVES)
       .Case("xsaveopt", HasXSAVEOPT)
       .Default(false);
 }
 
 // We can't use a generic validation scheme for the features accepted here
 // versus subtarget features accepted in the target attribute because the
 // bitfield structure that's initialized in the runtime only supports the
 // below currently rather than the full range of subtarget features. (See
 // X86TargetInfo::hasFeature for a somewhat comprehensive list).
 bool X86TargetInfo::validateCpuSupports(StringRef FeatureStr) const {
   return llvm::StringSwitch<bool>(FeatureStr)
 #define X86_FEATURE_COMPAT(VAL, ENUM, STR) .Case(STR, true)
 #include "llvm/Support/X86TargetParser.def"
       .Default(false);
 }
 
 // We can't use a generic validation scheme for the cpus accepted here
 // versus subtarget cpus accepted in the target attribute because the
 // variables intitialized by the runtime only support the below currently
 // rather than the full range of cpus.
 bool X86TargetInfo::validateCpuIs(StringRef FeatureStr) const {
   return llvm::StringSwitch<bool>(FeatureStr)
 #define X86_VENDOR(ENUM, STRING) .Case(STRING, true)
 #define X86_CPU_TYPE_COMPAT_WITH_ALIAS(ARCHNAME, ENUM, STR, ALIAS)             \
   .Cases(STR, ALIAS, true)
 #define X86_CPU_TYPE_COMPAT(ARCHNAME, ENUM, STR) .Case(STR, true)
 #define X86_CPU_SUBTYPE_COMPAT(ARCHNAME, ENUM, STR) .Case(STR, true)
 #include "llvm/Support/X86TargetParser.def"
       .Default(false);
 }
 
 bool X86TargetInfo::validateAsmConstraint(
     const char *&Name, TargetInfo::ConstraintInfo &Info) const {
   switch (*Name) {
   default:
     return false;
   // Constant constraints.
   case 'e': // 32-bit signed integer constant for use with sign-extending x86_64
             // instructions.
   case 'Z': // 32-bit unsigned integer constant for use with zero-extending
             // x86_64 instructions.
   case 's':
     Info.setRequiresImmediate();
     return true;
   case 'I':
     Info.setRequiresImmediate(0, 31);
     return true;
   case 'J':
     Info.setRequiresImmediate(0, 63);
     return true;
   case 'K':
     Info.setRequiresImmediate(-128, 127);
     return true;
   case 'L':
     Info.setRequiresImmediate({int(0xff), int(0xffff), int(0xffffffff)});
     return true;
   case 'M':
     Info.setRequiresImmediate(0, 3);
     return true;
   case 'N':
     Info.setRequiresImmediate(0, 255);
     return true;
   case 'O':
     Info.setRequiresImmediate(0, 127);
     return true;
   // Register constraints.
   case 'Y': // 'Y' is the first character for several 2-character constraints.
     // Shift the pointer to the second character of the constraint.
     Name++;
     switch (*Name) {
     default:
       return false;
     case 'z':
     case '0': // First SSE register.
     case '2':
     case 't': // Any SSE register, when SSE2 is enabled.
     case 'i': // Any SSE register, when SSE2 and inter-unit moves enabled.
     case 'm': // Any MMX register, when inter-unit moves enabled.
     case 'k': // AVX512 arch mask registers: k1-k7.
       Info.setAllowsRegister();
       return true;
     }
   case 'f': // Any x87 floating point stack register.
     // Constraint 'f' cannot be used for output operands.
     if (Info.ConstraintStr[0] == '=')
       return false;
     Info.setAllowsRegister();
     return true;
   case 'a': // eax.
   case 'b': // ebx.
   case 'c': // ecx.
   case 'd': // edx.
   case 'S': // esi.
   case 'D': // edi.
   case 'A': // edx:eax.
   case 't': // Top of floating point stack.
   case 'u': // Second from top of floating point stack.
   case 'q': // Any register accessible as [r]l: a, b, c, and d.
   case 'y': // Any MMX register.
   case 'v': // Any {X,Y,Z}MM register (Arch & context dependent)
   case 'x': // Any SSE register.
   case 'k': // Any AVX512 mask register (same as Yk, additionaly allows k0
             // for intermideate k reg operations).
   case 'Q': // Any register accessible as [r]h: a, b, c, and d.
   case 'R': // "Legacy" registers: ax, bx, cx, dx, di, si, sp, bp.
   case 'l': // "Index" registers: any general register that can be used as an
             // index in a base+index memory access.
     Info.setAllowsRegister();
     return true;
   // Floating point constant constraints.
   case 'C': // SSE floating point constant.
   case 'G': // x87 floating point constant.
     return true;
   }
 }
 
 bool X86TargetInfo::validateOutputSize(StringRef Constraint,
                                        unsigned Size) const {
   // Strip off constraint modifiers.
   while (Constraint[0] == '=' || Constraint[0] == '+' || Constraint[0] == '&')
     Constraint = Constraint.substr(1);
 
   return validateOperandSize(Constraint, Size);
 }
 
 bool X86TargetInfo::validateInputSize(StringRef Constraint,
                                       unsigned Size) const {
   return validateOperandSize(Constraint, Size);
 }
 
 bool X86TargetInfo::validateOperandSize(StringRef Constraint,
                                         unsigned Size) const {
   switch (Constraint[0]) {
   default:
     break;
   case 'k':
   // Registers k0-k7 (AVX512) size limit is 64 bit.
   case 'y':
     return Size <= 64;
   case 'f':
   case 't':
   case 'u':
     return Size <= 128;
   case 'Y':
     // 'Y' is the first character for several 2-character constraints.
     switch (Constraint[1]) {
     default:
       return false;
     case 'm':
       // 'Ym' is synonymous with 'y'.
     case 'k':
       return Size <= 64;
     case 'z':
     case '0':
       // XMM0
       if (SSELevel >= SSE1)
         return Size <= 128U;
       return false;
     case 'i':
     case 't':
     case '2':
       // 'Yi','Yt','Y2' are synonymous with 'x' when SSE2 is enabled.
       if (SSELevel < SSE2)
         return false;
       break;
     }
   case 'v':
   case 'x':
     if (SSELevel >= AVX512F)
       // 512-bit zmm registers can be used if target supports AVX512F.
       return Size <= 512U;
     else if (SSELevel >= AVX)
       // 256-bit ymm registers can be used if target supports AVX.
       return Size <= 256U;
     return Size <= 128U;
 
   }
 
   return true;
 }
 
 std::string X86TargetInfo::convertConstraint(const char *&Constraint) const {
   switch (*Constraint) {
   case 'a':
     return std::string("{ax}");
   case 'b':
     return std::string("{bx}");
   case 'c':
     return std::string("{cx}");
   case 'd':
     return std::string("{dx}");
   case 'S':
     return std::string("{si}");
   case 'D':
     return std::string("{di}");
   case 'p': // address
     return std::string("im");
   case 't': // top of floating point stack.
     return std::string("{st}");
   case 'u':                        // second from top of floating point stack.
     return std::string("{st(1)}"); // second from top of floating point stack.
   case 'Y':
     switch (Constraint[1]) {
     default:
       // Break from inner switch and fall through (copy single char),
       // continue parsing after copying the current constraint into
       // the return string.
       break;
     case 'k':
     case 'm':
     case 'i':
     case 't':
     case 'z':
     case '0':
     case '2':
       // "^" hints llvm that this is a 2 letter constraint.
       // "Constraint++" is used to promote the string iterator
       // to the next constraint.
       return std::string("^") + std::string(Constraint++, 2);
     }
     LLVM_FALLTHROUGH;
   default:
     return std::string(1, *Constraint);
   }
 }
 
 bool X86TargetInfo::checkCPUKind(CPUKind Kind) const {
   // Perform any per-CPU checks necessary to determine if this CPU is
   // acceptable.
   // FIXME: This results in terrible diagnostics. Clang just says the CPU is
   // invalid without explaining *why*.
   switch (Kind) {
   case CK_Generic:
     // No processor selected!
     return false;
 #define PROC(ENUM, STRING, IS64BIT)                                            \
   case CK_##ENUM:                                                              \
     return IS64BIT || getTriple().getArch() == llvm::Triple::x86;
 #include "clang/Basic/X86Target.def"
   }
   llvm_unreachable("Unhandled CPU kind");
 }
 
 X86TargetInfo::CPUKind X86TargetInfo::getCPUKind(StringRef CPU) const {
   return llvm::StringSwitch<CPUKind>(CPU)
 #define PROC(ENUM, STRING, IS64BIT) .Case(STRING, CK_##ENUM)
 #define PROC_ALIAS(ENUM, ALIAS) .Case(ALIAS, CK_##ENUM)
 #include "clang/Basic/X86Target.def"
       .Default(CK_Generic);
 }
 
 ArrayRef<const char *> X86TargetInfo::getGCCRegNames() const {
   return llvm::makeArrayRef(GCCRegNames);
 }
 
 ArrayRef<TargetInfo::AddlRegName> X86TargetInfo::getGCCAddlRegNames() const {
   return llvm::makeArrayRef(AddlRegNames);
 }
 
 ArrayRef<Builtin::Info> X86_32TargetInfo::getTargetBuiltins() const {
   return llvm::makeArrayRef(BuiltinInfoX86, clang::X86::LastX86CommonBuiltin -
                                                 Builtin::FirstTSBuiltin + 1);
 }
 
 ArrayRef<Builtin::Info> X86_64TargetInfo::getTargetBuiltins() const {
   return llvm::makeArrayRef(BuiltinInfoX86,
                             X86::LastTSBuiltin - Builtin::FirstTSBuiltin);
 }
Index: head/contrib/llvm/tools/clang/lib/Sema/SemaExpr.cpp
===================================================================
--- head/contrib/llvm/tools/clang/lib/Sema/SemaExpr.cpp	(revision 329982)
+++ head/contrib/llvm/tools/clang/lib/Sema/SemaExpr.cpp	(revision 329983)
@@ -1,16099 +1,16100 @@
 //===--- SemaExpr.cpp - Semantic Analysis for Expressions -----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 //  This file implements semantic analysis for expressions.
 //
 //===----------------------------------------------------------------------===//
 
 #include "TreeTransform.h"
 #include "clang/AST/ASTConsumer.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/ASTLambda.h"
 #include "clang/AST/ASTMutationListener.h"
 #include "clang/AST/CXXInheritance.h"
 #include "clang/AST/DeclObjC.h"
 #include "clang/AST/DeclTemplate.h"
 #include "clang/AST/EvaluatedExprVisitor.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/ExprCXX.h"
 #include "clang/AST/ExprObjC.h"
 #include "clang/AST/ExprOpenMP.h"
 #include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/AST/TypeLoc.h"
 #include "clang/Basic/PartialDiagnostic.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Lex/LiteralSupport.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Sema/AnalysisBasedWarnings.h"
 #include "clang/Sema/DeclSpec.h"
 #include "clang/Sema/DelayedDiagnostic.h"
 #include "clang/Sema/Designator.h"
 #include "clang/Sema/Initialization.h"
 #include "clang/Sema/Lookup.h"
 #include "clang/Sema/ParsedTemplate.h"
 #include "clang/Sema/Scope.h"
 #include "clang/Sema/ScopeInfo.h"
 #include "clang/Sema/SemaFixItUtils.h"
 #include "clang/Sema/SemaInternal.h"
 #include "clang/Sema/Template.h"
 #include "llvm/Support/ConvertUTF.h"
 using namespace clang;
 using namespace sema;
 
 /// \brief Determine whether the use of this declaration is valid, without
 /// emitting diagnostics.
 bool Sema::CanUseDecl(NamedDecl *D, bool TreatUnavailableAsInvalid) {
   // See if this is an auto-typed variable whose initializer we are parsing.
   if (ParsingInitForAutoVars.count(D))
     return false;
 
   // See if this is a deleted function.
   if (FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
     if (FD->isDeleted())
       return false;
 
     // If the function has a deduced return type, and we can't deduce it,
     // then we can't use it either.
     if (getLangOpts().CPlusPlus14 && FD->getReturnType()->isUndeducedType() &&
         DeduceReturnType(FD, SourceLocation(), /*Diagnose*/ false))
       return false;
   }
 
   // See if this function is unavailable.
   if (TreatUnavailableAsInvalid && D->getAvailability() == AR_Unavailable &&
       cast<Decl>(CurContext)->getAvailability() != AR_Unavailable)
     return false;
 
   return true;
 }
 
 static void DiagnoseUnusedOfDecl(Sema &S, NamedDecl *D, SourceLocation Loc) {
   // Warn if this is used but marked unused.
   if (const auto *A = D->getAttr<UnusedAttr>()) {
     // [[maybe_unused]] should not diagnose uses, but __attribute__((unused))
     // should diagnose them.
     if (A->getSemanticSpelling() != UnusedAttr::CXX11_maybe_unused &&
         A->getSemanticSpelling() != UnusedAttr::C2x_maybe_unused) {
       const Decl *DC = cast_or_null<Decl>(S.getCurObjCLexicalContext());
       if (DC && !DC->hasAttr<UnusedAttr>())
         S.Diag(Loc, diag::warn_used_but_marked_unused) << D->getDeclName();
     }
   }
 }
 
 /// \brief Emit a note explaining that this function is deleted.
 void Sema::NoteDeletedFunction(FunctionDecl *Decl) {
   assert(Decl->isDeleted());
 
   CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(Decl);
 
   if (Method && Method->isDeleted() && Method->isDefaulted()) {
     // If the method was explicitly defaulted, point at that declaration.
     if (!Method->isImplicit())
       Diag(Decl->getLocation(), diag::note_implicitly_deleted);
 
     // Try to diagnose why this special member function was implicitly
     // deleted. This might fail, if that reason no longer applies.
     CXXSpecialMember CSM = getSpecialMember(Method);
     if (CSM != CXXInvalid)
       ShouldDeleteSpecialMember(Method, CSM, nullptr, /*Diagnose=*/true);
 
     return;
   }
 
   auto *Ctor = dyn_cast<CXXConstructorDecl>(Decl);
   if (Ctor && Ctor->isInheritingConstructor())
     return NoteDeletedInheritingConstructor(Ctor);
 
   Diag(Decl->getLocation(), diag::note_availability_specified_here)
     << Decl << true;
 }
 
 /// \brief Determine whether a FunctionDecl was ever declared with an
 /// explicit storage class.
 static bool hasAnyExplicitStorageClass(const FunctionDecl *D) {
   for (auto I : D->redecls()) {
     if (I->getStorageClass() != SC_None)
       return true;
   }
   return false;
 }
 
 /// \brief Check whether we're in an extern inline function and referring to a
 /// variable or function with internal linkage (C11 6.7.4p3).
 ///
 /// This is only a warning because we used to silently accept this code, but
 /// in many cases it will not behave correctly. This is not enabled in C++ mode
 /// because the restriction language is a bit weaker (C++11 [basic.def.odr]p6)
 /// and so while there may still be user mistakes, most of the time we can't
 /// prove that there are errors.
 static void diagnoseUseOfInternalDeclInInlineFunction(Sema &S,
                                                       const NamedDecl *D,
                                                       SourceLocation Loc) {
   // This is disabled under C++; there are too many ways for this to fire in
   // contexts where the warning is a false positive, or where it is technically
   // correct but benign.
   if (S.getLangOpts().CPlusPlus)
     return;
 
   // Check if this is an inlined function or method.
   FunctionDecl *Current = S.getCurFunctionDecl();
   if (!Current)
     return;
   if (!Current->isInlined())
     return;
   if (!Current->isExternallyVisible())
     return;
 
   // Check if the decl has internal linkage.
   if (D->getFormalLinkage() != InternalLinkage)
     return;
 
   // Downgrade from ExtWarn to Extension if
   //  (1) the supposedly external inline function is in the main file,
   //      and probably won't be included anywhere else.
   //  (2) the thing we're referencing is a pure function.
   //  (3) the thing we're referencing is another inline function.
   // This last can give us false negatives, but it's better than warning on
   // wrappers for simple C library functions.
   const FunctionDecl *UsedFn = dyn_cast<FunctionDecl>(D);
   bool DowngradeWarning = S.getSourceManager().isInMainFile(Loc);
   if (!DowngradeWarning && UsedFn)
     DowngradeWarning = UsedFn->isInlined() || UsedFn->hasAttr<ConstAttr>();
 
   S.Diag(Loc, DowngradeWarning ? diag::ext_internal_in_extern_inline_quiet
                                : diag::ext_internal_in_extern_inline)
     << /*IsVar=*/!UsedFn << D;
 
   S.MaybeSuggestAddingStaticToDecl(Current);
 
   S.Diag(D->getCanonicalDecl()->getLocation(), diag::note_entity_declared_at)
       << D;
 }
 
 void Sema::MaybeSuggestAddingStaticToDecl(const FunctionDecl *Cur) {
   const FunctionDecl *First = Cur->getFirstDecl();
 
   // Suggest "static" on the function, if possible.
   if (!hasAnyExplicitStorageClass(First)) {
     SourceLocation DeclBegin = First->getSourceRange().getBegin();
     Diag(DeclBegin, diag::note_convert_inline_to_static)
       << Cur << FixItHint::CreateInsertion(DeclBegin, "static ");
   }
 }
 
 /// \brief Determine whether the use of this declaration is valid, and
 /// emit any corresponding diagnostics.
 ///
 /// This routine diagnoses various problems with referencing
 /// declarations that can occur when using a declaration. For example,
 /// it might warn if a deprecated or unavailable declaration is being
 /// used, or produce an error (and return true) if a C++0x deleted
 /// function is being used.
 ///
 /// \returns true if there was an error (this declaration cannot be
 /// referenced), false otherwise.
 ///
 bool Sema::DiagnoseUseOfDecl(NamedDecl *D, SourceLocation Loc,
                              const ObjCInterfaceDecl *UnknownObjCClass,
                              bool ObjCPropertyAccess,
                              bool AvoidPartialAvailabilityChecks) {
   if (getLangOpts().CPlusPlus && isa<FunctionDecl>(D)) {
     // If there were any diagnostics suppressed by template argument deduction,
     // emit them now.
     auto Pos = SuppressedDiagnostics.find(D->getCanonicalDecl());
     if (Pos != SuppressedDiagnostics.end()) {
       for (const PartialDiagnosticAt &Suppressed : Pos->second)
         Diag(Suppressed.first, Suppressed.second);
 
       // Clear out the list of suppressed diagnostics, so that we don't emit
       // them again for this specialization. However, we don't obsolete this
       // entry from the table, because we want to avoid ever emitting these
       // diagnostics again.
       Pos->second.clear();
     }
 
     // C++ [basic.start.main]p3:
     //   The function 'main' shall not be used within a program.
     if (cast<FunctionDecl>(D)->isMain())
       Diag(Loc, diag::ext_main_used);
   }
 
   // See if this is an auto-typed variable whose initializer we are parsing.
   if (ParsingInitForAutoVars.count(D)) {
     if (isa<BindingDecl>(D)) {
       Diag(Loc, diag::err_binding_cannot_appear_in_own_initializer)
         << D->getDeclName();
     } else {
       Diag(Loc, diag::err_auto_variable_cannot_appear_in_own_initializer)
         << D->getDeclName() << cast<VarDecl>(D)->getType();
     }
     return true;
   }
 
   // See if this is a deleted function.
   if (FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
     if (FD->isDeleted()) {
       auto *Ctor = dyn_cast<CXXConstructorDecl>(FD);
       if (Ctor && Ctor->isInheritingConstructor())
         Diag(Loc, diag::err_deleted_inherited_ctor_use)
             << Ctor->getParent()
             << Ctor->getInheritedConstructor().getConstructor()->getParent();
       else 
         Diag(Loc, diag::err_deleted_function_use);
       NoteDeletedFunction(FD);
       return true;
     }
 
     // If the function has a deduced return type, and we can't deduce it,
     // then we can't use it either.
     if (getLangOpts().CPlusPlus14 && FD->getReturnType()->isUndeducedType() &&
         DeduceReturnType(FD, Loc))
       return true;
 
     if (getLangOpts().CUDA && !CheckCUDACall(Loc, FD))
       return true;
   }
 
   auto getReferencedObjCProp = [](const NamedDecl *D) ->
                                       const ObjCPropertyDecl * {
     if (const auto *MD = dyn_cast<ObjCMethodDecl>(D))
       return MD->findPropertyDecl();
     return nullptr;
   };
   if (const ObjCPropertyDecl *ObjCPDecl = getReferencedObjCProp(D)) {
     if (diagnoseArgIndependentDiagnoseIfAttrs(ObjCPDecl, Loc))
       return true;
   } else if (diagnoseArgIndependentDiagnoseIfAttrs(D, Loc)) {
       return true;
   }
 
   // [OpenMP 4.0], 2.15 declare reduction Directive, Restrictions
   // Only the variables omp_in and omp_out are allowed in the combiner.
   // Only the variables omp_priv and omp_orig are allowed in the
   // initializer-clause.
   auto *DRD = dyn_cast<OMPDeclareReductionDecl>(CurContext);
   if (LangOpts.OpenMP && DRD && !CurContext->containsDecl(D) &&
       isa<VarDecl>(D)) {
     Diag(Loc, diag::err_omp_wrong_var_in_declare_reduction)
         << getCurFunction()->HasOMPDeclareReductionCombiner;
     Diag(D->getLocation(), diag::note_entity_declared_at) << D;
     return true;
   }
 
   DiagnoseAvailabilityOfDecl(D, Loc, UnknownObjCClass, ObjCPropertyAccess,
                              AvoidPartialAvailabilityChecks);
 
   DiagnoseUnusedOfDecl(*this, D, Loc);
 
   diagnoseUseOfInternalDeclInInlineFunction(*this, D, Loc);
 
   return false;
 }
 
 /// \brief Retrieve the message suffix that should be added to a
 /// diagnostic complaining about the given function being deleted or
 /// unavailable.
 std::string Sema::getDeletedOrUnavailableSuffix(const FunctionDecl *FD) {
   std::string Message;
   if (FD->getAvailability(&Message))
     return ": " + Message;
 
   return std::string();
 }
 
 /// DiagnoseSentinelCalls - This routine checks whether a call or
 /// message-send is to a declaration with the sentinel attribute, and
 /// if so, it checks that the requirements of the sentinel are
 /// satisfied.
 void Sema::DiagnoseSentinelCalls(NamedDecl *D, SourceLocation Loc,
                                  ArrayRef<Expr *> Args) {
   const SentinelAttr *attr = D->getAttr<SentinelAttr>();
   if (!attr)
     return;
 
   // The number of formal parameters of the declaration.
   unsigned numFormalParams;
 
   // The kind of declaration.  This is also an index into a %select in
   // the diagnostic.
   enum CalleeType { CT_Function, CT_Method, CT_Block } calleeType;
 
   if (ObjCMethodDecl *MD = dyn_cast<ObjCMethodDecl>(D)) {
     numFormalParams = MD->param_size();
     calleeType = CT_Method;
   } else if (FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
     numFormalParams = FD->param_size();
     calleeType = CT_Function;
   } else if (isa<VarDecl>(D)) {
     QualType type = cast<ValueDecl>(D)->getType();
     const FunctionType *fn = nullptr;
     if (const PointerType *ptr = type->getAs<PointerType>()) {
       fn = ptr->getPointeeType()->getAs<FunctionType>();
       if (!fn) return;
       calleeType = CT_Function;
     } else if (const BlockPointerType *ptr = type->getAs<BlockPointerType>()) {
       fn = ptr->getPointeeType()->castAs<FunctionType>();
       calleeType = CT_Block;
     } else {
       return;
     }
 
     if (const FunctionProtoType *proto = dyn_cast<FunctionProtoType>(fn)) {
       numFormalParams = proto->getNumParams();
     } else {
       numFormalParams = 0;
     }
   } else {
     return;
   }
 
   // "nullPos" is the number of formal parameters at the end which
   // effectively count as part of the variadic arguments.  This is
   // useful if you would prefer to not have *any* formal parameters,
   // but the language forces you to have at least one.
   unsigned nullPos = attr->getNullPos();
   assert((nullPos == 0 || nullPos == 1) && "invalid null position on sentinel");
   numFormalParams = (nullPos > numFormalParams ? 0 : numFormalParams - nullPos);
 
   // The number of arguments which should follow the sentinel.
   unsigned numArgsAfterSentinel = attr->getSentinel();
 
   // If there aren't enough arguments for all the formal parameters,
   // the sentinel, and the args after the sentinel, complain.
   if (Args.size() < numFormalParams + numArgsAfterSentinel + 1) {
     Diag(Loc, diag::warn_not_enough_argument) << D->getDeclName();
     Diag(D->getLocation(), diag::note_sentinel_here) << int(calleeType);
     return;
   }
 
   // Otherwise, find the sentinel expression.
   Expr *sentinelExpr = Args[Args.size() - numArgsAfterSentinel - 1];
   if (!sentinelExpr) return;
   if (sentinelExpr->isValueDependent()) return;
   if (Context.isSentinelNullExpr(sentinelExpr)) return;
 
   // Pick a reasonable string to insert.  Optimistically use 'nil', 'nullptr',
   // or 'NULL' if those are actually defined in the context.  Only use
   // 'nil' for ObjC methods, where it's much more likely that the
   // variadic arguments form a list of object pointers.
   SourceLocation MissingNilLoc
     = getLocForEndOfToken(sentinelExpr->getLocEnd());
   std::string NullValue;
   if (calleeType == CT_Method && PP.isMacroDefined("nil"))
     NullValue = "nil";
   else if (getLangOpts().CPlusPlus11)
     NullValue = "nullptr";
   else if (PP.isMacroDefined("NULL"))
     NullValue = "NULL";
   else
     NullValue = "(void*) 0";
 
   if (MissingNilLoc.isInvalid())
     Diag(Loc, diag::warn_missing_sentinel) << int(calleeType);
   else
     Diag(MissingNilLoc, diag::warn_missing_sentinel) 
       << int(calleeType)
       << FixItHint::CreateInsertion(MissingNilLoc, ", " + NullValue);
   Diag(D->getLocation(), diag::note_sentinel_here) << int(calleeType);
 }
 
 SourceRange Sema::getExprRange(Expr *E) const {
   return E ? E->getSourceRange() : SourceRange();
 }
 
 //===----------------------------------------------------------------------===//
 //  Standard Promotions and Conversions
 //===----------------------------------------------------------------------===//
 
 /// DefaultFunctionArrayConversion (C99 6.3.2.1p3, C99 6.3.2.1p4).
 ExprResult Sema::DefaultFunctionArrayConversion(Expr *E, bool Diagnose) {
   // Handle any placeholder expressions which made it here.
   if (E->getType()->isPlaceholderType()) {
     ExprResult result = CheckPlaceholderExpr(E);
     if (result.isInvalid()) return ExprError();
     E = result.get();
   }
   
   QualType Ty = E->getType();
   assert(!Ty.isNull() && "DefaultFunctionArrayConversion - missing type");
 
   if (Ty->isFunctionType()) {
     if (auto *DRE = dyn_cast<DeclRefExpr>(E->IgnoreParenCasts()))
       if (auto *FD = dyn_cast<FunctionDecl>(DRE->getDecl()))
         if (!checkAddressOfFunctionIsAvailable(FD, Diagnose, E->getExprLoc()))
           return ExprError();
 
     E = ImpCastExprToType(E, Context.getPointerType(Ty),
                           CK_FunctionToPointerDecay).get();
   } else if (Ty->isArrayType()) {
     // In C90 mode, arrays only promote to pointers if the array expression is
     // an lvalue.  The relevant legalese is C90 6.2.2.1p3: "an lvalue that has
     // type 'array of type' is converted to an expression that has type 'pointer
     // to type'...".  In C99 this was changed to: C99 6.3.2.1p3: "an expression
     // that has type 'array of type' ...".  The relevant change is "an lvalue"
     // (C90) to "an expression" (C99).
     //
     // C++ 4.2p1:
     // An lvalue or rvalue of type "array of N T" or "array of unknown bound of
     // T" can be converted to an rvalue of type "pointer to T".
     //
     if (getLangOpts().C99 || getLangOpts().CPlusPlus || E->isLValue())
       E = ImpCastExprToType(E, Context.getArrayDecayedType(Ty),
                             CK_ArrayToPointerDecay).get();
   }
   return E;
 }
 
 static void CheckForNullPointerDereference(Sema &S, Expr *E) {
   // Check to see if we are dereferencing a null pointer.  If so,
   // and if not volatile-qualified, this is undefined behavior that the
   // optimizer will delete, so warn about it.  People sometimes try to use this
   // to get a deterministic trap and are surprised by clang's behavior.  This
   // only handles the pattern "*null", which is a very syntactic check.
   if (UnaryOperator *UO = dyn_cast<UnaryOperator>(E->IgnoreParenCasts()))
     if (UO->getOpcode() == UO_Deref &&
         UO->getSubExpr()->IgnoreParenCasts()->
           isNullPointerConstant(S.Context, Expr::NPC_ValueDependentIsNotNull) &&
         !UO->getType().isVolatileQualified()) {
     S.DiagRuntimeBehavior(UO->getOperatorLoc(), UO,
                           S.PDiag(diag::warn_indirection_through_null)
                             << UO->getSubExpr()->getSourceRange());
     S.DiagRuntimeBehavior(UO->getOperatorLoc(), UO,
                         S.PDiag(diag::note_indirection_through_null));
   }
 }
 
 static void DiagnoseDirectIsaAccess(Sema &S, const ObjCIvarRefExpr *OIRE,
                                     SourceLocation AssignLoc,
                                     const Expr* RHS) {
   const ObjCIvarDecl *IV = OIRE->getDecl();
   if (!IV)
     return;
   
   DeclarationName MemberName = IV->getDeclName();
   IdentifierInfo *Member = MemberName.getAsIdentifierInfo();
   if (!Member || !Member->isStr("isa"))
     return;
   
   const Expr *Base = OIRE->getBase();
   QualType BaseType = Base->getType();
   if (OIRE->isArrow())
     BaseType = BaseType->getPointeeType();
   if (const ObjCObjectType *OTy = BaseType->getAs<ObjCObjectType>())
     if (ObjCInterfaceDecl *IDecl = OTy->getInterface()) {
       ObjCInterfaceDecl *ClassDeclared = nullptr;
       ObjCIvarDecl *IV = IDecl->lookupInstanceVariable(Member, ClassDeclared);
       if (!ClassDeclared->getSuperClass()
           && (*ClassDeclared->ivar_begin()) == IV) {
         if (RHS) {
           NamedDecl *ObjectSetClass =
             S.LookupSingleName(S.TUScope,
                                &S.Context.Idents.get("object_setClass"),
                                SourceLocation(), S.LookupOrdinaryName);
           if (ObjectSetClass) {
             SourceLocation RHSLocEnd = S.getLocForEndOfToken(RHS->getLocEnd());
             S.Diag(OIRE->getExprLoc(), diag::warn_objc_isa_assign) <<
             FixItHint::CreateInsertion(OIRE->getLocStart(), "object_setClass(") <<
             FixItHint::CreateReplacement(SourceRange(OIRE->getOpLoc(),
                                                      AssignLoc), ",") <<
             FixItHint::CreateInsertion(RHSLocEnd, ")");
           }
           else
             S.Diag(OIRE->getLocation(), diag::warn_objc_isa_assign);
         } else {
           NamedDecl *ObjectGetClass =
             S.LookupSingleName(S.TUScope,
                                &S.Context.Idents.get("object_getClass"),
                                SourceLocation(), S.LookupOrdinaryName);
           if (ObjectGetClass)
             S.Diag(OIRE->getExprLoc(), diag::warn_objc_isa_use) <<
             FixItHint::CreateInsertion(OIRE->getLocStart(), "object_getClass(") <<
             FixItHint::CreateReplacement(
                                          SourceRange(OIRE->getOpLoc(),
                                                      OIRE->getLocEnd()), ")");
           else
             S.Diag(OIRE->getLocation(), diag::warn_objc_isa_use);
         }
         S.Diag(IV->getLocation(), diag::note_ivar_decl);
       }
     }
 }
 
 ExprResult Sema::DefaultLvalueConversion(Expr *E) {
   // Handle any placeholder expressions which made it here.
   if (E->getType()->isPlaceholderType()) {
     ExprResult result = CheckPlaceholderExpr(E);
     if (result.isInvalid()) return ExprError();
     E = result.get();
   }
   
   // C++ [conv.lval]p1:
   //   A glvalue of a non-function, non-array type T can be
   //   converted to a prvalue.
   if (!E->isGLValue()) return E;
 
   QualType T = E->getType();
   assert(!T.isNull() && "r-value conversion on typeless expression?");
 
   // We don't want to throw lvalue-to-rvalue casts on top of
   // expressions of certain types in C++.
   if (getLangOpts().CPlusPlus &&
       (E->getType() == Context.OverloadTy ||
        T->isDependentType() ||
        T->isRecordType()))
     return E;
 
   // The C standard is actually really unclear on this point, and
   // DR106 tells us what the result should be but not why.  It's
   // generally best to say that void types just doesn't undergo
   // lvalue-to-rvalue at all.  Note that expressions of unqualified
   // 'void' type are never l-values, but qualified void can be.
   if (T->isVoidType())
     return E;
 
   // OpenCL usually rejects direct accesses to values of 'half' type.
   if (getLangOpts().OpenCL && !getOpenCLOptions().isEnabled("cl_khr_fp16") &&
       T->isHalfType()) {
     Diag(E->getExprLoc(), diag::err_opencl_half_load_store)
       << 0 << T;
     return ExprError();
   }
 
   CheckForNullPointerDereference(*this, E);
   if (const ObjCIsaExpr *OISA = dyn_cast<ObjCIsaExpr>(E->IgnoreParenCasts())) {
     NamedDecl *ObjectGetClass = LookupSingleName(TUScope,
                                      &Context.Idents.get("object_getClass"),
                                      SourceLocation(), LookupOrdinaryName);
     if (ObjectGetClass)
       Diag(E->getExprLoc(), diag::warn_objc_isa_use) <<
         FixItHint::CreateInsertion(OISA->getLocStart(), "object_getClass(") <<
         FixItHint::CreateReplacement(
                     SourceRange(OISA->getOpLoc(), OISA->getIsaMemberLoc()), ")");
     else
       Diag(E->getExprLoc(), diag::warn_objc_isa_use);
   }
   else if (const ObjCIvarRefExpr *OIRE =
             dyn_cast<ObjCIvarRefExpr>(E->IgnoreParenCasts()))
     DiagnoseDirectIsaAccess(*this, OIRE, SourceLocation(), /* Expr*/nullptr);
 
   // C++ [conv.lval]p1:
   //   [...] If T is a non-class type, the type of the prvalue is the
   //   cv-unqualified version of T. Otherwise, the type of the
   //   rvalue is T.
   //
   // C99 6.3.2.1p2:
   //   If the lvalue has qualified type, the value has the unqualified
   //   version of the type of the lvalue; otherwise, the value has the
   //   type of the lvalue.
   if (T.hasQualifiers())
     T = T.getUnqualifiedType();
 
   // Under the MS ABI, lock down the inheritance model now.
   if (T->isMemberPointerType() &&
       Context.getTargetInfo().getCXXABI().isMicrosoft())
     (void)isCompleteType(E->getExprLoc(), T);
 
   UpdateMarkingForLValueToRValue(E);
   
   // Loading a __weak object implicitly retains the value, so we need a cleanup to 
   // balance that.
   if (E->getType().getObjCLifetime() == Qualifiers::OCL_Weak)
     Cleanup.setExprNeedsCleanups(true);
 
   ExprResult Res = ImplicitCastExpr::Create(Context, T, CK_LValueToRValue, E,
                                             nullptr, VK_RValue);
 
   // C11 6.3.2.1p2:
   //   ... if the lvalue has atomic type, the value has the non-atomic version 
   //   of the type of the lvalue ...
   if (const AtomicType *Atomic = T->getAs<AtomicType>()) {
     T = Atomic->getValueType().getUnqualifiedType();
     Res = ImplicitCastExpr::Create(Context, T, CK_AtomicToNonAtomic, Res.get(),
                                    nullptr, VK_RValue);
   }
   
   return Res;
 }
 
 ExprResult Sema::DefaultFunctionArrayLvalueConversion(Expr *E, bool Diagnose) {
   ExprResult Res = DefaultFunctionArrayConversion(E, Diagnose);
   if (Res.isInvalid())
     return ExprError();
   Res = DefaultLvalueConversion(Res.get());
   if (Res.isInvalid())
     return ExprError();
   return Res;
 }
 
 /// CallExprUnaryConversions - a special case of an unary conversion
 /// performed on a function designator of a call expression.
 ExprResult Sema::CallExprUnaryConversions(Expr *E) {
   QualType Ty = E->getType();
   ExprResult Res = E;
   // Only do implicit cast for a function type, but not for a pointer
   // to function type.
   if (Ty->isFunctionType()) {
     Res = ImpCastExprToType(E, Context.getPointerType(Ty),
                             CK_FunctionToPointerDecay).get();
     if (Res.isInvalid())
       return ExprError();
   }
   Res = DefaultLvalueConversion(Res.get());
   if (Res.isInvalid())
     return ExprError();
   return Res.get();
 }
 
 /// UsualUnaryConversions - Performs various conversions that are common to most
 /// operators (C99 6.3). The conversions of array and function types are
 /// sometimes suppressed. For example, the array->pointer conversion doesn't
 /// apply if the array is an argument to the sizeof or address (&) operators.
 /// In these instances, this routine should *not* be called.
 ExprResult Sema::UsualUnaryConversions(Expr *E) {
   // First, convert to an r-value.
   ExprResult Res = DefaultFunctionArrayLvalueConversion(E);
   if (Res.isInvalid())
     return ExprError();
   E = Res.get();
 
   QualType Ty = E->getType();
   assert(!Ty.isNull() && "UsualUnaryConversions - missing type");
 
   // Half FP have to be promoted to float unless it is natively supported
   if (Ty->isHalfType() && !getLangOpts().NativeHalfType)
     return ImpCastExprToType(Res.get(), Context.FloatTy, CK_FloatingCast);
 
   // Try to perform integral promotions if the object has a theoretically
   // promotable type.
   if (Ty->isIntegralOrUnscopedEnumerationType()) {
     // C99 6.3.1.1p2:
     //
     //   The following may be used in an expression wherever an int or
     //   unsigned int may be used:
     //     - an object or expression with an integer type whose integer
     //       conversion rank is less than or equal to the rank of int
     //       and unsigned int.
     //     - A bit-field of type _Bool, int, signed int, or unsigned int.
     //
     //   If an int can represent all values of the original type, the
     //   value is converted to an int; otherwise, it is converted to an
     //   unsigned int. These are called the integer promotions. All
     //   other types are unchanged by the integer promotions.
 
     QualType PTy = Context.isPromotableBitField(E);
     if (!PTy.isNull()) {
       E = ImpCastExprToType(E, PTy, CK_IntegralCast).get();
       return E;
     }
     if (Ty->isPromotableIntegerType()) {
       QualType PT = Context.getPromotedIntegerType(Ty);
       E = ImpCastExprToType(E, PT, CK_IntegralCast).get();
       return E;
     }
   }
   return E;
 }
 
 /// DefaultArgumentPromotion (C99 6.5.2.2p6). Used for function calls that
 /// do not have a prototype. Arguments that have type float or __fp16
 /// are promoted to double. All other argument types are converted by
 /// UsualUnaryConversions().
 ExprResult Sema::DefaultArgumentPromotion(Expr *E) {
   QualType Ty = E->getType();
   assert(!Ty.isNull() && "DefaultArgumentPromotion - missing type");
 
   ExprResult Res = UsualUnaryConversions(E);
   if (Res.isInvalid())
     return ExprError();
   E = Res.get();
 
   // If this is a 'float'  or '__fp16' (CVR qualified or typedef)
   // promote to double.
   // Note that default argument promotion applies only to float (and
   // half/fp16); it does not apply to _Float16.
   const BuiltinType *BTy = Ty->getAs<BuiltinType>();
   if (BTy && (BTy->getKind() == BuiltinType::Half ||
               BTy->getKind() == BuiltinType::Float)) {
     if (getLangOpts().OpenCL &&
         !getOpenCLOptions().isEnabled("cl_khr_fp64")) {
         if (BTy->getKind() == BuiltinType::Half) {
             E = ImpCastExprToType(E, Context.FloatTy, CK_FloatingCast).get();
         }
     } else {
       E = ImpCastExprToType(E, Context.DoubleTy, CK_FloatingCast).get();
     }
   }
 
   // C++ performs lvalue-to-rvalue conversion as a default argument
   // promotion, even on class types, but note:
   //   C++11 [conv.lval]p2:
   //     When an lvalue-to-rvalue conversion occurs in an unevaluated
   //     operand or a subexpression thereof the value contained in the
   //     referenced object is not accessed. Otherwise, if the glvalue
   //     has a class type, the conversion copy-initializes a temporary
   //     of type T from the glvalue and the result of the conversion
   //     is a prvalue for the temporary.
   // FIXME: add some way to gate this entire thing for correctness in
   // potentially potentially evaluated contexts.
   if (getLangOpts().CPlusPlus && E->isGLValue() && !isUnevaluatedContext()) {
     ExprResult Temp = PerformCopyInitialization(
                        InitializedEntity::InitializeTemporary(E->getType()),
                                                 E->getExprLoc(), E);
     if (Temp.isInvalid())
       return ExprError();
     E = Temp.get();
   }
 
   return E;
 }
 
 /// Determine the degree of POD-ness for an expression.
 /// Incomplete types are considered POD, since this check can be performed
 /// when we're in an unevaluated context.
 Sema::VarArgKind Sema::isValidVarArgType(const QualType &Ty) {
   if (Ty->isIncompleteType()) {
     // C++11 [expr.call]p7:
     //   After these conversions, if the argument does not have arithmetic,
     //   enumeration, pointer, pointer to member, or class type, the program
     //   is ill-formed.
     //
     // Since we've already performed array-to-pointer and function-to-pointer
     // decay, the only such type in C++ is cv void. This also handles
     // initializer lists as variadic arguments.
     if (Ty->isVoidType())
       return VAK_Invalid;
 
     if (Ty->isObjCObjectType())
       return VAK_Invalid;
     return VAK_Valid;
   }
 
   if (Ty.isCXX98PODType(Context))
     return VAK_Valid;
 
   // C++11 [expr.call]p7:
   //   Passing a potentially-evaluated argument of class type (Clause 9)
   //   having a non-trivial copy constructor, a non-trivial move constructor,
   //   or a non-trivial destructor, with no corresponding parameter,
   //   is conditionally-supported with implementation-defined semantics.
   if (getLangOpts().CPlusPlus11 && !Ty->isDependentType())
     if (CXXRecordDecl *Record = Ty->getAsCXXRecordDecl())
       if (!Record->hasNonTrivialCopyConstructor() &&
           !Record->hasNonTrivialMoveConstructor() &&
           !Record->hasNonTrivialDestructor())
         return VAK_ValidInCXX11;
 
   if (getLangOpts().ObjCAutoRefCount && Ty->isObjCLifetimeType())
     return VAK_Valid;
 
   if (Ty->isObjCObjectType())
     return VAK_Invalid;
 
   if (getLangOpts().MSVCCompat)
     return VAK_MSVCUndefined;
 
   // FIXME: In C++11, these cases are conditionally-supported, meaning we're
   // permitted to reject them. We should consider doing so.
   return VAK_Undefined;
 }
 
 void Sema::checkVariadicArgument(const Expr *E, VariadicCallType CT) {
   // Don't allow one to pass an Objective-C interface to a vararg.
   const QualType &Ty = E->getType();
   VarArgKind VAK = isValidVarArgType(Ty);
 
   // Complain about passing non-POD types through varargs.
   switch (VAK) {
   case VAK_ValidInCXX11:
     DiagRuntimeBehavior(
         E->getLocStart(), nullptr,
         PDiag(diag::warn_cxx98_compat_pass_non_pod_arg_to_vararg)
           << Ty << CT);
     LLVM_FALLTHROUGH;
   case VAK_Valid:
     if (Ty->isRecordType()) {
       // This is unlikely to be what the user intended. If the class has a
       // 'c_str' member function, the user probably meant to call that.
       DiagRuntimeBehavior(E->getLocStart(), nullptr,
                           PDiag(diag::warn_pass_class_arg_to_vararg)
                             << Ty << CT << hasCStrMethod(E) << ".c_str()");
     }
     break;
 
   case VAK_Undefined:
   case VAK_MSVCUndefined:
     DiagRuntimeBehavior(
         E->getLocStart(), nullptr,
         PDiag(diag::warn_cannot_pass_non_pod_arg_to_vararg)
           << getLangOpts().CPlusPlus11 << Ty << CT);
     break;
 
   case VAK_Invalid:
     if (Ty->isObjCObjectType())
       DiagRuntimeBehavior(
           E->getLocStart(), nullptr,
           PDiag(diag::err_cannot_pass_objc_interface_to_vararg)
             << Ty << CT);
     else
       Diag(E->getLocStart(), diag::err_cannot_pass_to_vararg)
         << isa<InitListExpr>(E) << Ty << CT;
     break;
   }
 }
 
 /// DefaultVariadicArgumentPromotion - Like DefaultArgumentPromotion, but
 /// will create a trap if the resulting type is not a POD type.
 ExprResult Sema::DefaultVariadicArgumentPromotion(Expr *E, VariadicCallType CT,
                                                   FunctionDecl *FDecl) {
   if (const BuiltinType *PlaceholderTy = E->getType()->getAsPlaceholderType()) {
     // Strip the unbridged-cast placeholder expression off, if applicable.
     if (PlaceholderTy->getKind() == BuiltinType::ARCUnbridgedCast &&
         (CT == VariadicMethod ||
          (FDecl && FDecl->hasAttr<CFAuditedTransferAttr>()))) {
       E = stripARCUnbridgedCast(E);
 
     // Otherwise, do normal placeholder checking.
     } else {
       ExprResult ExprRes = CheckPlaceholderExpr(E);
       if (ExprRes.isInvalid())
         return ExprError();
       E = ExprRes.get();
     }
   }
   
   ExprResult ExprRes = DefaultArgumentPromotion(E);
   if (ExprRes.isInvalid())
     return ExprError();
   E = ExprRes.get();
 
   // Diagnostics regarding non-POD argument types are
   // emitted along with format string checking in Sema::CheckFunctionCall().
   if (isValidVarArgType(E->getType()) == VAK_Undefined) {
     // Turn this into a trap.
     CXXScopeSpec SS;
     SourceLocation TemplateKWLoc;
     UnqualifiedId Name;
     Name.setIdentifier(PP.getIdentifierInfo("__builtin_trap"),
                        E->getLocStart());
     ExprResult TrapFn = ActOnIdExpression(TUScope, SS, TemplateKWLoc,
                                           Name, true, false);
     if (TrapFn.isInvalid())
       return ExprError();
 
     ExprResult Call = ActOnCallExpr(TUScope, TrapFn.get(),
                                     E->getLocStart(), None,
                                     E->getLocEnd());
     if (Call.isInvalid())
       return ExprError();
 
     ExprResult Comma = ActOnBinOp(TUScope, E->getLocStart(), tok::comma,
                                   Call.get(), E);
     if (Comma.isInvalid())
       return ExprError();
     return Comma.get();
   }
 
   if (!getLangOpts().CPlusPlus &&
       RequireCompleteType(E->getExprLoc(), E->getType(),
                           diag::err_call_incomplete_argument))
     return ExprError();
 
   return E;
 }
 
 /// \brief Converts an integer to complex float type.  Helper function of
 /// UsualArithmeticConversions()
 ///
 /// \return false if the integer expression is an integer type and is
 /// successfully converted to the complex type.
 static bool handleIntegerToComplexFloatConversion(Sema &S, ExprResult &IntExpr,
                                                   ExprResult &ComplexExpr,
                                                   QualType IntTy,
                                                   QualType ComplexTy,
                                                   bool SkipCast) {
   if (IntTy->isComplexType() || IntTy->isRealFloatingType()) return true;
   if (SkipCast) return false;
   if (IntTy->isIntegerType()) {
     QualType fpTy = cast<ComplexType>(ComplexTy)->getElementType();
     IntExpr = S.ImpCastExprToType(IntExpr.get(), fpTy, CK_IntegralToFloating);
     IntExpr = S.ImpCastExprToType(IntExpr.get(), ComplexTy,
                                   CK_FloatingRealToComplex);
   } else {
     assert(IntTy->isComplexIntegerType());
     IntExpr = S.ImpCastExprToType(IntExpr.get(), ComplexTy,
                                   CK_IntegralComplexToFloatingComplex);
   }
   return false;
 }
 
 /// \brief Handle arithmetic conversion with complex types.  Helper function of
 /// UsualArithmeticConversions()
 static QualType handleComplexFloatConversion(Sema &S, ExprResult &LHS,
                                              ExprResult &RHS, QualType LHSType,
                                              QualType RHSType,
                                              bool IsCompAssign) {
   // if we have an integer operand, the result is the complex type.
   if (!handleIntegerToComplexFloatConversion(S, RHS, LHS, RHSType, LHSType,
                                              /*skipCast*/false))
     return LHSType;
   if (!handleIntegerToComplexFloatConversion(S, LHS, RHS, LHSType, RHSType,
                                              /*skipCast*/IsCompAssign))
     return RHSType;
 
   // This handles complex/complex, complex/float, or float/complex.
   // When both operands are complex, the shorter operand is converted to the
   // type of the longer, and that is the type of the result. This corresponds
   // to what is done when combining two real floating-point operands.
   // The fun begins when size promotion occur across type domains.
   // From H&S 6.3.4: When one operand is complex and the other is a real
   // floating-point type, the less precise type is converted, within it's
   // real or complex domain, to the precision of the other type. For example,
   // when combining a "long double" with a "double _Complex", the
   // "double _Complex" is promoted to "long double _Complex".
 
   // Compute the rank of the two types, regardless of whether they are complex.
   int Order = S.Context.getFloatingTypeOrder(LHSType, RHSType);
 
   auto *LHSComplexType = dyn_cast<ComplexType>(LHSType);
   auto *RHSComplexType = dyn_cast<ComplexType>(RHSType);
   QualType LHSElementType =
       LHSComplexType ? LHSComplexType->getElementType() : LHSType;
   QualType RHSElementType =
       RHSComplexType ? RHSComplexType->getElementType() : RHSType;
 
   QualType ResultType = S.Context.getComplexType(LHSElementType);
   if (Order < 0) {
     // Promote the precision of the LHS if not an assignment.
     ResultType = S.Context.getComplexType(RHSElementType);
     if (!IsCompAssign) {
       if (LHSComplexType)
         LHS =
             S.ImpCastExprToType(LHS.get(), ResultType, CK_FloatingComplexCast);
       else
         LHS = S.ImpCastExprToType(LHS.get(), RHSElementType, CK_FloatingCast);
     }
   } else if (Order > 0) {
     // Promote the precision of the RHS.
     if (RHSComplexType)
       RHS = S.ImpCastExprToType(RHS.get(), ResultType, CK_FloatingComplexCast);
     else
       RHS = S.ImpCastExprToType(RHS.get(), LHSElementType, CK_FloatingCast);
   }
   return ResultType;
 }
 
 /// \brief Handle arithmetic conversion from integer to float.  Helper function
 /// of UsualArithmeticConversions()
 static QualType handleIntToFloatConversion(Sema &S, ExprResult &FloatExpr,
                                            ExprResult &IntExpr,
                                            QualType FloatTy, QualType IntTy,
                                            bool ConvertFloat, bool ConvertInt) {
   if (IntTy->isIntegerType()) {
     if (ConvertInt)
       // Convert intExpr to the lhs floating point type.
       IntExpr = S.ImpCastExprToType(IntExpr.get(), FloatTy,
                                     CK_IntegralToFloating);
     return FloatTy;
   }
      
   // Convert both sides to the appropriate complex float.
   assert(IntTy->isComplexIntegerType());
   QualType result = S.Context.getComplexType(FloatTy);
 
   // _Complex int -> _Complex float
   if (ConvertInt)
     IntExpr = S.ImpCastExprToType(IntExpr.get(), result,
                                   CK_IntegralComplexToFloatingComplex);
 
   // float -> _Complex float
   if (ConvertFloat)
     FloatExpr = S.ImpCastExprToType(FloatExpr.get(), result,
                                     CK_FloatingRealToComplex);
 
   return result;
 }
 
 /// \brief Handle arithmethic conversion with floating point types.  Helper
 /// function of UsualArithmeticConversions()
 static QualType handleFloatConversion(Sema &S, ExprResult &LHS,
                                       ExprResult &RHS, QualType LHSType,
                                       QualType RHSType, bool IsCompAssign) {
   bool LHSFloat = LHSType->isRealFloatingType();
   bool RHSFloat = RHSType->isRealFloatingType();
 
   // If we have two real floating types, convert the smaller operand
   // to the bigger result.
   if (LHSFloat && RHSFloat) {
     int order = S.Context.getFloatingTypeOrder(LHSType, RHSType);
     if (order > 0) {
       RHS = S.ImpCastExprToType(RHS.get(), LHSType, CK_FloatingCast);
       return LHSType;
     }
 
     assert(order < 0 && "illegal float comparison");
     if (!IsCompAssign)
       LHS = S.ImpCastExprToType(LHS.get(), RHSType, CK_FloatingCast);
     return RHSType;
   }
 
   if (LHSFloat) {
     // Half FP has to be promoted to float unless it is natively supported
     if (LHSType->isHalfType() && !S.getLangOpts().NativeHalfType)
       LHSType = S.Context.FloatTy;
 
     return handleIntToFloatConversion(S, LHS, RHS, LHSType, RHSType,
                                       /*convertFloat=*/!IsCompAssign,
                                       /*convertInt=*/ true);
   }
   assert(RHSFloat);
   return handleIntToFloatConversion(S, RHS, LHS, RHSType, LHSType,
                                     /*convertInt=*/ true,
                                     /*convertFloat=*/!IsCompAssign);
 }
 
 /// \brief Diagnose attempts to convert between __float128 and long double if
 /// there is no support for such conversion. Helper function of
 /// UsualArithmeticConversions().
 static bool unsupportedTypeConversion(const Sema &S, QualType LHSType,
                                       QualType RHSType) {
   /*  No issue converting if at least one of the types is not a floating point
       type or the two types have the same rank.
   */
   if (!LHSType->isFloatingType() || !RHSType->isFloatingType() ||
       S.Context.getFloatingTypeOrder(LHSType, RHSType) == 0)
     return false;
 
   assert(LHSType->isFloatingType() && RHSType->isFloatingType() &&
          "The remaining types must be floating point types.");
 
   auto *LHSComplex = LHSType->getAs<ComplexType>();
   auto *RHSComplex = RHSType->getAs<ComplexType>();
 
   QualType LHSElemType = LHSComplex ?
     LHSComplex->getElementType() : LHSType;
   QualType RHSElemType = RHSComplex ?
     RHSComplex->getElementType() : RHSType;
 
   // No issue if the two types have the same representation
   if (&S.Context.getFloatTypeSemantics(LHSElemType) ==
       &S.Context.getFloatTypeSemantics(RHSElemType))
     return false;
 
   bool Float128AndLongDouble = (LHSElemType == S.Context.Float128Ty &&
                                 RHSElemType == S.Context.LongDoubleTy);
   Float128AndLongDouble |= (LHSElemType == S.Context.LongDoubleTy &&
                             RHSElemType == S.Context.Float128Ty);
 
   /* We've handled the situation where __float128 and long double have the same
      representation. The only other allowable conversion is if long double is
      really just double.
   */
   return Float128AndLongDouble &&
     (&S.Context.getFloatTypeSemantics(S.Context.LongDoubleTy) !=
      &llvm::APFloat::IEEEdouble());
 }
 
 typedef ExprResult PerformCastFn(Sema &S, Expr *operand, QualType toType);
 
 namespace {
 /// These helper callbacks are placed in an anonymous namespace to
 /// permit their use as function template parameters.
 ExprResult doIntegralCast(Sema &S, Expr *op, QualType toType) {
   return S.ImpCastExprToType(op, toType, CK_IntegralCast);
 }
 
 ExprResult doComplexIntegralCast(Sema &S, Expr *op, QualType toType) {
   return S.ImpCastExprToType(op, S.Context.getComplexType(toType),
                              CK_IntegralComplexCast);
 }
 }
 
 /// \brief Handle integer arithmetic conversions.  Helper function of
 /// UsualArithmeticConversions()
 template <PerformCastFn doLHSCast, PerformCastFn doRHSCast>
 static QualType handleIntegerConversion(Sema &S, ExprResult &LHS,
                                         ExprResult &RHS, QualType LHSType,
                                         QualType RHSType, bool IsCompAssign) {
   // The rules for this case are in C99 6.3.1.8
   int order = S.Context.getIntegerTypeOrder(LHSType, RHSType);
   bool LHSSigned = LHSType->hasSignedIntegerRepresentation();
   bool RHSSigned = RHSType->hasSignedIntegerRepresentation();
   if (LHSSigned == RHSSigned) {
     // Same signedness; use the higher-ranked type
     if (order >= 0) {
       RHS = (*doRHSCast)(S, RHS.get(), LHSType);
       return LHSType;
     } else if (!IsCompAssign)
       LHS = (*doLHSCast)(S, LHS.get(), RHSType);
     return RHSType;
   } else if (order != (LHSSigned ? 1 : -1)) {
     // The unsigned type has greater than or equal rank to the
     // signed type, so use the unsigned type
     if (RHSSigned) {
       RHS = (*doRHSCast)(S, RHS.get(), LHSType);
       return LHSType;
     } else if (!IsCompAssign)
       LHS = (*doLHSCast)(S, LHS.get(), RHSType);
     return RHSType;
   } else if (S.Context.getIntWidth(LHSType) != S.Context.getIntWidth(RHSType)) {
     // The two types are different widths; if we are here, that
     // means the signed type is larger than the unsigned type, so
     // use the signed type.
     if (LHSSigned) {
       RHS = (*doRHSCast)(S, RHS.get(), LHSType);
       return LHSType;
     } else if (!IsCompAssign)
       LHS = (*doLHSCast)(S, LHS.get(), RHSType);
     return RHSType;
   } else {
     // The signed type is higher-ranked than the unsigned type,
     // but isn't actually any bigger (like unsigned int and long
     // on most 32-bit systems).  Use the unsigned type corresponding
     // to the signed type.
     QualType result =
       S.Context.getCorrespondingUnsignedType(LHSSigned ? LHSType : RHSType);
     RHS = (*doRHSCast)(S, RHS.get(), result);
     if (!IsCompAssign)
       LHS = (*doLHSCast)(S, LHS.get(), result);
     return result;
   }
 }
 
 /// \brief Handle conversions with GCC complex int extension.  Helper function
 /// of UsualArithmeticConversions()
 static QualType handleComplexIntConversion(Sema &S, ExprResult &LHS,
                                            ExprResult &RHS, QualType LHSType,
                                            QualType RHSType,
                                            bool IsCompAssign) {
   const ComplexType *LHSComplexInt = LHSType->getAsComplexIntegerType();
   const ComplexType *RHSComplexInt = RHSType->getAsComplexIntegerType();
 
   if (LHSComplexInt && RHSComplexInt) {
     QualType LHSEltType = LHSComplexInt->getElementType();
     QualType RHSEltType = RHSComplexInt->getElementType();
     QualType ScalarType =
       handleIntegerConversion<doComplexIntegralCast, doComplexIntegralCast>
         (S, LHS, RHS, LHSEltType, RHSEltType, IsCompAssign);
 
     return S.Context.getComplexType(ScalarType);
   }
 
   if (LHSComplexInt) {
     QualType LHSEltType = LHSComplexInt->getElementType();
     QualType ScalarType =
       handleIntegerConversion<doComplexIntegralCast, doIntegralCast>
         (S, LHS, RHS, LHSEltType, RHSType, IsCompAssign);
     QualType ComplexType = S.Context.getComplexType(ScalarType);
     RHS = S.ImpCastExprToType(RHS.get(), ComplexType,
                               CK_IntegralRealToComplex);
  
     return ComplexType;
   }
 
   assert(RHSComplexInt);
 
   QualType RHSEltType = RHSComplexInt->getElementType();
   QualType ScalarType =
     handleIntegerConversion<doIntegralCast, doComplexIntegralCast>
       (S, LHS, RHS, LHSType, RHSEltType, IsCompAssign);
   QualType ComplexType = S.Context.getComplexType(ScalarType);
   
   if (!IsCompAssign)
     LHS = S.ImpCastExprToType(LHS.get(), ComplexType,
                               CK_IntegralRealToComplex);
   return ComplexType;
 }
 
 /// UsualArithmeticConversions - Performs various conversions that are common to
 /// binary operators (C99 6.3.1.8). If both operands aren't arithmetic, this
 /// routine returns the first non-arithmetic type found. The client is
 /// responsible for emitting appropriate error diagnostics.
 QualType Sema::UsualArithmeticConversions(ExprResult &LHS, ExprResult &RHS,
                                           bool IsCompAssign) {
   if (!IsCompAssign) {
     LHS = UsualUnaryConversions(LHS.get());
     if (LHS.isInvalid())
       return QualType();
   }
 
   RHS = UsualUnaryConversions(RHS.get());
   if (RHS.isInvalid())
     return QualType();
 
   // For conversion purposes, we ignore any qualifiers.
   // For example, "const float" and "float" are equivalent.
   QualType LHSType =
     Context.getCanonicalType(LHS.get()->getType()).getUnqualifiedType();
   QualType RHSType =
     Context.getCanonicalType(RHS.get()->getType()).getUnqualifiedType();
 
   // For conversion purposes, we ignore any atomic qualifier on the LHS.
   if (const AtomicType *AtomicLHS = LHSType->getAs<AtomicType>())
     LHSType = AtomicLHS->getValueType();
 
   // If both types are identical, no conversion is needed.
   if (LHSType == RHSType)
     return LHSType;
 
   // If either side is a non-arithmetic type (e.g. a pointer), we are done.
   // The caller can deal with this (e.g. pointer + int).
   if (!LHSType->isArithmeticType() || !RHSType->isArithmeticType())
     return QualType();
 
   // Apply unary and bitfield promotions to the LHS's type.
   QualType LHSUnpromotedType = LHSType;
   if (LHSType->isPromotableIntegerType())
     LHSType = Context.getPromotedIntegerType(LHSType);
   QualType LHSBitfieldPromoteTy = Context.isPromotableBitField(LHS.get());
   if (!LHSBitfieldPromoteTy.isNull())
     LHSType = LHSBitfieldPromoteTy;
   if (LHSType != LHSUnpromotedType && !IsCompAssign)
     LHS = ImpCastExprToType(LHS.get(), LHSType, CK_IntegralCast);
 
   // If both types are identical, no conversion is needed.
   if (LHSType == RHSType)
     return LHSType;
 
   // At this point, we have two different arithmetic types.
 
   // Diagnose attempts to convert between __float128 and long double where
   // such conversions currently can't be handled.
   if (unsupportedTypeConversion(*this, LHSType, RHSType))
     return QualType();
 
   // Handle complex types first (C99 6.3.1.8p1).
   if (LHSType->isComplexType() || RHSType->isComplexType())
     return handleComplexFloatConversion(*this, LHS, RHS, LHSType, RHSType,
                                         IsCompAssign);
 
   // Now handle "real" floating types (i.e. float, double, long double).
   if (LHSType->isRealFloatingType() || RHSType->isRealFloatingType())
     return handleFloatConversion(*this, LHS, RHS, LHSType, RHSType,
                                  IsCompAssign);
 
   // Handle GCC complex int extension.
   if (LHSType->isComplexIntegerType() || RHSType->isComplexIntegerType())
     return handleComplexIntConversion(*this, LHS, RHS, LHSType, RHSType,
                                       IsCompAssign);
 
   // Finally, we have two differing integer types.
   return handleIntegerConversion<doIntegralCast, doIntegralCast>
            (*this, LHS, RHS, LHSType, RHSType, IsCompAssign);
 }
 
 
 //===----------------------------------------------------------------------===//
 //  Semantic Analysis for various Expression Types
 //===----------------------------------------------------------------------===//
 
 
 ExprResult
 Sema::ActOnGenericSelectionExpr(SourceLocation KeyLoc,
                                 SourceLocation DefaultLoc,
                                 SourceLocation RParenLoc,
                                 Expr *ControllingExpr,
                                 ArrayRef<ParsedType> ArgTypes,
                                 ArrayRef<Expr *> ArgExprs) {
   unsigned NumAssocs = ArgTypes.size();
   assert(NumAssocs == ArgExprs.size());
 
   TypeSourceInfo **Types = new TypeSourceInfo*[NumAssocs];
   for (unsigned i = 0; i < NumAssocs; ++i) {
     if (ArgTypes[i])
       (void) GetTypeFromParser(ArgTypes[i], &Types[i]);
     else
       Types[i] = nullptr;
   }
 
   ExprResult ER = CreateGenericSelectionExpr(KeyLoc, DefaultLoc, RParenLoc,
                                              ControllingExpr,
                                              llvm::makeArrayRef(Types, NumAssocs),
                                              ArgExprs);
   delete [] Types;
   return ER;
 }
 
 ExprResult
 Sema::CreateGenericSelectionExpr(SourceLocation KeyLoc,
                                  SourceLocation DefaultLoc,
                                  SourceLocation RParenLoc,
                                  Expr *ControllingExpr,
                                  ArrayRef<TypeSourceInfo *> Types,
                                  ArrayRef<Expr *> Exprs) {
   unsigned NumAssocs = Types.size();
   assert(NumAssocs == Exprs.size());
 
   // Decay and strip qualifiers for the controlling expression type, and handle
   // placeholder type replacement. See committee discussion from WG14 DR423.
   {
     EnterExpressionEvaluationContext Unevaluated(
         *this, Sema::ExpressionEvaluationContext::Unevaluated);
     ExprResult R = DefaultFunctionArrayLvalueConversion(ControllingExpr);
     if (R.isInvalid())
       return ExprError();
     ControllingExpr = R.get();
   }
 
   // The controlling expression is an unevaluated operand, so side effects are
   // likely unintended.
   if (!inTemplateInstantiation() &&
       ControllingExpr->HasSideEffects(Context, false))
     Diag(ControllingExpr->getExprLoc(),
          diag::warn_side_effects_unevaluated_context);
 
   bool TypeErrorFound = false,
        IsResultDependent = ControllingExpr->isTypeDependent(),
        ContainsUnexpandedParameterPack
          = ControllingExpr->containsUnexpandedParameterPack();
 
   for (unsigned i = 0; i < NumAssocs; ++i) {
     if (Exprs[i]->containsUnexpandedParameterPack())
       ContainsUnexpandedParameterPack = true;
 
     if (Types[i]) {
       if (Types[i]->getType()->containsUnexpandedParameterPack())
         ContainsUnexpandedParameterPack = true;
 
       if (Types[i]->getType()->isDependentType()) {
         IsResultDependent = true;
       } else {
         // C11 6.5.1.1p2 "The type name in a generic association shall specify a
         // complete object type other than a variably modified type."
         unsigned D = 0;
         if (Types[i]->getType()->isIncompleteType())
           D = diag::err_assoc_type_incomplete;
         else if (!Types[i]->getType()->isObjectType())
           D = diag::err_assoc_type_nonobject;
         else if (Types[i]->getType()->isVariablyModifiedType())
           D = diag::err_assoc_type_variably_modified;
 
         if (D != 0) {
           Diag(Types[i]->getTypeLoc().getBeginLoc(), D)
             << Types[i]->getTypeLoc().getSourceRange()
             << Types[i]->getType();
           TypeErrorFound = true;
         }
 
         // C11 6.5.1.1p2 "No two generic associations in the same generic
         // selection shall specify compatible types."
         for (unsigned j = i+1; j < NumAssocs; ++j)
           if (Types[j] && !Types[j]->getType()->isDependentType() &&
               Context.typesAreCompatible(Types[i]->getType(),
                                          Types[j]->getType())) {
             Diag(Types[j]->getTypeLoc().getBeginLoc(),
                  diag::err_assoc_compatible_types)
               << Types[j]->getTypeLoc().getSourceRange()
               << Types[j]->getType()
               << Types[i]->getType();
             Diag(Types[i]->getTypeLoc().getBeginLoc(),
                  diag::note_compat_assoc)
               << Types[i]->getTypeLoc().getSourceRange()
               << Types[i]->getType();
             TypeErrorFound = true;
           }
       }
     }
   }
   if (TypeErrorFound)
     return ExprError();
 
   // If we determined that the generic selection is result-dependent, don't
   // try to compute the result expression.
   if (IsResultDependent)
     return new (Context) GenericSelectionExpr(
         Context, KeyLoc, ControllingExpr, Types, Exprs, DefaultLoc, RParenLoc,
         ContainsUnexpandedParameterPack);
 
   SmallVector<unsigned, 1> CompatIndices;
   unsigned DefaultIndex = -1U;
   for (unsigned i = 0; i < NumAssocs; ++i) {
     if (!Types[i])
       DefaultIndex = i;
     else if (Context.typesAreCompatible(ControllingExpr->getType(),
                                         Types[i]->getType()))
       CompatIndices.push_back(i);
   }
 
   // C11 6.5.1.1p2 "The controlling expression of a generic selection shall have
   // type compatible with at most one of the types named in its generic
   // association list."
   if (CompatIndices.size() > 1) {
     // We strip parens here because the controlling expression is typically
     // parenthesized in macro definitions.
     ControllingExpr = ControllingExpr->IgnoreParens();
     Diag(ControllingExpr->getLocStart(), diag::err_generic_sel_multi_match)
       << ControllingExpr->getSourceRange() << ControllingExpr->getType()
       << (unsigned) CompatIndices.size();
     for (unsigned I : CompatIndices) {
       Diag(Types[I]->getTypeLoc().getBeginLoc(),
            diag::note_compat_assoc)
         << Types[I]->getTypeLoc().getSourceRange()
         << Types[I]->getType();
     }
     return ExprError();
   }
 
   // C11 6.5.1.1p2 "If a generic selection has no default generic association,
   // its controlling expression shall have type compatible with exactly one of
   // the types named in its generic association list."
   if (DefaultIndex == -1U && CompatIndices.size() == 0) {
     // We strip parens here because the controlling expression is typically
     // parenthesized in macro definitions.
     ControllingExpr = ControllingExpr->IgnoreParens();
     Diag(ControllingExpr->getLocStart(), diag::err_generic_sel_no_match)
       << ControllingExpr->getSourceRange() << ControllingExpr->getType();
     return ExprError();
   }
 
   // C11 6.5.1.1p3 "If a generic selection has a generic association with a
   // type name that is compatible with the type of the controlling expression,
   // then the result expression of the generic selection is the expression
   // in that generic association. Otherwise, the result expression of the
   // generic selection is the expression in the default generic association."
   unsigned ResultIndex =
     CompatIndices.size() ? CompatIndices[0] : DefaultIndex;
 
   return new (Context) GenericSelectionExpr(
       Context, KeyLoc, ControllingExpr, Types, Exprs, DefaultLoc, RParenLoc,
       ContainsUnexpandedParameterPack, ResultIndex);
 }
 
 /// getUDSuffixLoc - Create a SourceLocation for a ud-suffix, given the
 /// location of the token and the offset of the ud-suffix within it.
 static SourceLocation getUDSuffixLoc(Sema &S, SourceLocation TokLoc,
                                      unsigned Offset) {
   return Lexer::AdvanceToTokenCharacter(TokLoc, Offset, S.getSourceManager(),
                                         S.getLangOpts());
 }
 
 /// BuildCookedLiteralOperatorCall - A user-defined literal was found. Look up
 /// the corresponding cooked (non-raw) literal operator, and build a call to it.
 static ExprResult BuildCookedLiteralOperatorCall(Sema &S, Scope *Scope,
                                                  IdentifierInfo *UDSuffix,
                                                  SourceLocation UDSuffixLoc,
                                                  ArrayRef<Expr*> Args,
                                                  SourceLocation LitEndLoc) {
   assert(Args.size() <= 2 && "too many arguments for literal operator");
 
   QualType ArgTy[2];
   for (unsigned ArgIdx = 0; ArgIdx != Args.size(); ++ArgIdx) {
     ArgTy[ArgIdx] = Args[ArgIdx]->getType();
     if (ArgTy[ArgIdx]->isArrayType())
       ArgTy[ArgIdx] = S.Context.getArrayDecayedType(ArgTy[ArgIdx]);
   }
 
   DeclarationName OpName =
     S.Context.DeclarationNames.getCXXLiteralOperatorName(UDSuffix);
   DeclarationNameInfo OpNameInfo(OpName, UDSuffixLoc);
   OpNameInfo.setCXXLiteralOperatorNameLoc(UDSuffixLoc);
 
   LookupResult R(S, OpName, UDSuffixLoc, Sema::LookupOrdinaryName);
   if (S.LookupLiteralOperator(Scope, R, llvm::makeArrayRef(ArgTy, Args.size()),
                               /*AllowRaw*/ false, /*AllowTemplate*/ false,
                               /*AllowStringTemplate*/ false,
                               /*DiagnoseMissing*/ true) == Sema::LOLR_Error)
     return ExprError();
 
   return S.BuildLiteralOperatorCall(R, OpNameInfo, Args, LitEndLoc);
 }
 
 /// ActOnStringLiteral - The specified tokens were lexed as pasted string
 /// fragments (e.g. "foo" "bar" L"baz").  The result string has to handle string
 /// concatenation ([C99 5.1.1.2, translation phase #6]), so it may come from
 /// multiple tokens.  However, the common case is that StringToks points to one
 /// string.
 ///
 ExprResult
 Sema::ActOnStringLiteral(ArrayRef<Token> StringToks, Scope *UDLScope) {
   assert(!StringToks.empty() && "Must have at least one string!");
 
   StringLiteralParser Literal(StringToks, PP);
   if (Literal.hadError)
     return ExprError();
 
   SmallVector<SourceLocation, 4> StringTokLocs;
   for (const Token &Tok : StringToks)
     StringTokLocs.push_back(Tok.getLocation());
 
   QualType CharTy = Context.CharTy;
   StringLiteral::StringKind Kind = StringLiteral::Ascii;
   if (Literal.isWide()) {
     CharTy = Context.getWideCharType();
     Kind = StringLiteral::Wide;
   } else if (Literal.isUTF8()) {
     Kind = StringLiteral::UTF8;
   } else if (Literal.isUTF16()) {
     CharTy = Context.Char16Ty;
     Kind = StringLiteral::UTF16;
   } else if (Literal.isUTF32()) {
     CharTy = Context.Char32Ty;
     Kind = StringLiteral::UTF32;
   } else if (Literal.isPascal()) {
     CharTy = Context.UnsignedCharTy;
   }
 
   QualType CharTyConst = CharTy;
   // A C++ string literal has a const-qualified element type (C++ 2.13.4p1).
   if (getLangOpts().CPlusPlus || getLangOpts().ConstStrings)
     CharTyConst.addConst();
 
   // Get an array type for the string, according to C99 6.4.5.  This includes
   // the nul terminator character as well as the string length for pascal
   // strings.
   QualType StrTy = Context.getConstantArrayType(CharTyConst,
                                  llvm::APInt(32, Literal.GetNumStringChars()+1),
                                  ArrayType::Normal, 0);
 
   // OpenCL v1.1 s6.5.3: a string literal is in the constant address space.
   if (getLangOpts().OpenCL) {
     StrTy = Context.getAddrSpaceQualType(StrTy, LangAS::opencl_constant);
   }
 
   // Pass &StringTokLocs[0], StringTokLocs.size() to factory!
   StringLiteral *Lit = StringLiteral::Create(Context, Literal.GetString(),
                                              Kind, Literal.Pascal, StrTy,
                                              &StringTokLocs[0],
                                              StringTokLocs.size());
   if (Literal.getUDSuffix().empty())
     return Lit;
 
   // We're building a user-defined literal.
   IdentifierInfo *UDSuffix = &Context.Idents.get(Literal.getUDSuffix());
   SourceLocation UDSuffixLoc =
     getUDSuffixLoc(*this, StringTokLocs[Literal.getUDSuffixToken()],
                    Literal.getUDSuffixOffset());
 
   // Make sure we're allowed user-defined literals here.
   if (!UDLScope)
     return ExprError(Diag(UDSuffixLoc, diag::err_invalid_string_udl));
 
   // C++11 [lex.ext]p5: The literal L is treated as a call of the form
   //   operator "" X (str, len)
   QualType SizeType = Context.getSizeType();
 
   DeclarationName OpName =
     Context.DeclarationNames.getCXXLiteralOperatorName(UDSuffix);
   DeclarationNameInfo OpNameInfo(OpName, UDSuffixLoc);
   OpNameInfo.setCXXLiteralOperatorNameLoc(UDSuffixLoc);
 
   QualType ArgTy[] = {
     Context.getArrayDecayedType(StrTy), SizeType
   };
 
   LookupResult R(*this, OpName, UDSuffixLoc, LookupOrdinaryName);
   switch (LookupLiteralOperator(UDLScope, R, ArgTy,
                                 /*AllowRaw*/ false, /*AllowTemplate*/ false,
                                 /*AllowStringTemplate*/ true,
                                 /*DiagnoseMissing*/ true)) {
 
   case LOLR_Cooked: {
     llvm::APInt Len(Context.getIntWidth(SizeType), Literal.GetNumStringChars());
     IntegerLiteral *LenArg = IntegerLiteral::Create(Context, Len, SizeType,
                                                     StringTokLocs[0]);
     Expr *Args[] = { Lit, LenArg };
 
     return BuildLiteralOperatorCall(R, OpNameInfo, Args, StringTokLocs.back());
   }
 
   case LOLR_StringTemplate: {
     TemplateArgumentListInfo ExplicitArgs;
 
     unsigned CharBits = Context.getIntWidth(CharTy);
     bool CharIsUnsigned = CharTy->isUnsignedIntegerType();
     llvm::APSInt Value(CharBits, CharIsUnsigned);
 
     TemplateArgument TypeArg(CharTy);
     TemplateArgumentLocInfo TypeArgInfo(Context.getTrivialTypeSourceInfo(CharTy));
     ExplicitArgs.addArgument(TemplateArgumentLoc(TypeArg, TypeArgInfo));
 
     for (unsigned I = 0, N = Lit->getLength(); I != N; ++I) {
       Value = Lit->getCodeUnit(I);
       TemplateArgument Arg(Context, Value, CharTy);
       TemplateArgumentLocInfo ArgInfo;
       ExplicitArgs.addArgument(TemplateArgumentLoc(Arg, ArgInfo));
     }
     return BuildLiteralOperatorCall(R, OpNameInfo, None, StringTokLocs.back(),
                                     &ExplicitArgs);
   }
   case LOLR_Raw:
   case LOLR_Template:
   case LOLR_ErrorNoDiagnostic:
     llvm_unreachable("unexpected literal operator lookup result");
   case LOLR_Error:
     return ExprError();
   }
   llvm_unreachable("unexpected literal operator lookup result");
 }
 
 ExprResult
 Sema::BuildDeclRefExpr(ValueDecl *D, QualType Ty, ExprValueKind VK,
                        SourceLocation Loc,
                        const CXXScopeSpec *SS) {
   DeclarationNameInfo NameInfo(D->getDeclName(), Loc);
   return BuildDeclRefExpr(D, Ty, VK, NameInfo, SS);
 }
 
 /// BuildDeclRefExpr - Build an expression that references a
 /// declaration that does not require a closure capture.
 ExprResult
 Sema::BuildDeclRefExpr(ValueDecl *D, QualType Ty, ExprValueKind VK,
                        const DeclarationNameInfo &NameInfo,
                        const CXXScopeSpec *SS, NamedDecl *FoundD,
                        const TemplateArgumentListInfo *TemplateArgs) {
   bool RefersToCapturedVariable =
       isa<VarDecl>(D) &&
       NeedToCaptureVariable(cast<VarDecl>(D), NameInfo.getLoc());
 
   DeclRefExpr *E;
   if (isa<VarTemplateSpecializationDecl>(D)) {
     VarTemplateSpecializationDecl *VarSpec =
         cast<VarTemplateSpecializationDecl>(D);
 
     E = DeclRefExpr::Create(Context, SS ? SS->getWithLocInContext(Context)
                                         : NestedNameSpecifierLoc(),
                             VarSpec->getTemplateKeywordLoc(), D,
                             RefersToCapturedVariable, NameInfo.getLoc(), Ty, VK,
                             FoundD, TemplateArgs);
   } else {
     assert(!TemplateArgs && "No template arguments for non-variable"
                             " template specialization references");
     E = DeclRefExpr::Create(Context, SS ? SS->getWithLocInContext(Context)
                                         : NestedNameSpecifierLoc(),
                             SourceLocation(), D, RefersToCapturedVariable,
                             NameInfo, Ty, VK, FoundD);
   }
 
   MarkDeclRefReferenced(E);
 
   if (getLangOpts().ObjCWeak && isa<VarDecl>(D) &&
       Ty.getObjCLifetime() == Qualifiers::OCL_Weak &&
       !Diags.isIgnored(diag::warn_arc_repeated_use_of_weak, E->getLocStart()))
       recordUseOfEvaluatedWeak(E);
 
   FieldDecl *FD = dyn_cast<FieldDecl>(D);
   if (IndirectFieldDecl *IFD = dyn_cast<IndirectFieldDecl>(D))
     FD = IFD->getAnonField();
   if (FD) {
     UnusedPrivateFields.remove(FD);
     // Just in case we're building an illegal pointer-to-member.
     if (FD->isBitField())
       E->setObjectKind(OK_BitField);
   }
 
   // C++ [expr.prim]/8: The expression [...] is a bit-field if the identifier
   // designates a bit-field.
   if (auto *BD = dyn_cast<BindingDecl>(D))
     if (auto *BE = BD->getBinding())
       E->setObjectKind(BE->getObjectKind());
 
   return E;
 }
 
 /// Decomposes the given name into a DeclarationNameInfo, its location, and
 /// possibly a list of template arguments.
 ///
 /// If this produces template arguments, it is permitted to call
 /// DecomposeTemplateName.
 ///
 /// This actually loses a lot of source location information for
 /// non-standard name kinds; we should consider preserving that in
 /// some way.
 void
 Sema::DecomposeUnqualifiedId(const UnqualifiedId &Id,
                              TemplateArgumentListInfo &Buffer,
                              DeclarationNameInfo &NameInfo,
                              const TemplateArgumentListInfo *&TemplateArgs) {
   if (Id.getKind() == UnqualifiedIdKind::IK_TemplateId) {
     Buffer.setLAngleLoc(Id.TemplateId->LAngleLoc);
     Buffer.setRAngleLoc(Id.TemplateId->RAngleLoc);
 
     ASTTemplateArgsPtr TemplateArgsPtr(Id.TemplateId->getTemplateArgs(),
                                        Id.TemplateId->NumArgs);
     translateTemplateArguments(TemplateArgsPtr, Buffer);
 
     TemplateName TName = Id.TemplateId->Template.get();
     SourceLocation TNameLoc = Id.TemplateId->TemplateNameLoc;
     NameInfo = Context.getNameForTemplate(TName, TNameLoc);
     TemplateArgs = &Buffer;
   } else {
     NameInfo = GetNameFromUnqualifiedId(Id);
     TemplateArgs = nullptr;
   }
 }
 
 static void emitEmptyLookupTypoDiagnostic(
     const TypoCorrection &TC, Sema &SemaRef, const CXXScopeSpec &SS,
     DeclarationName Typo, SourceLocation TypoLoc, ArrayRef<Expr *> Args,
     unsigned DiagnosticID, unsigned DiagnosticSuggestID) {
   DeclContext *Ctx =
       SS.isEmpty() ? nullptr : SemaRef.computeDeclContext(SS, false);
   if (!TC) {
     // Emit a special diagnostic for failed member lookups.
     // FIXME: computing the declaration context might fail here (?)
     if (Ctx)
       SemaRef.Diag(TypoLoc, diag::err_no_member) << Typo << Ctx
                                                  << SS.getRange();
     else
       SemaRef.Diag(TypoLoc, DiagnosticID) << Typo;
     return;
   }
 
   std::string CorrectedStr = TC.getAsString(SemaRef.getLangOpts());
   bool DroppedSpecifier =
       TC.WillReplaceSpecifier() && Typo.getAsString() == CorrectedStr;
   unsigned NoteID = TC.getCorrectionDeclAs<ImplicitParamDecl>()
                         ? diag::note_implicit_param_decl
                         : diag::note_previous_decl;
   if (!Ctx)
     SemaRef.diagnoseTypo(TC, SemaRef.PDiag(DiagnosticSuggestID) << Typo,
                          SemaRef.PDiag(NoteID));
   else
     SemaRef.diagnoseTypo(TC, SemaRef.PDiag(diag::err_no_member_suggest)
                                  << Typo << Ctx << DroppedSpecifier
                                  << SS.getRange(),
                          SemaRef.PDiag(NoteID));
 }
 
 /// Diagnose an empty lookup.
 ///
 /// \return false if new lookup candidates were found
 bool
 Sema::DiagnoseEmptyLookup(Scope *S, CXXScopeSpec &SS, LookupResult &R,
                           std::unique_ptr<CorrectionCandidateCallback> CCC,
                           TemplateArgumentListInfo *ExplicitTemplateArgs,
                           ArrayRef<Expr *> Args, TypoExpr **Out) {
   DeclarationName Name = R.getLookupName();
 
   unsigned diagnostic = diag::err_undeclared_var_use;
   unsigned diagnostic_suggest = diag::err_undeclared_var_use_suggest;
   if (Name.getNameKind() == DeclarationName::CXXOperatorName ||
       Name.getNameKind() == DeclarationName::CXXLiteralOperatorName ||
       Name.getNameKind() == DeclarationName::CXXConversionFunctionName) {
     diagnostic = diag::err_undeclared_use;
     diagnostic_suggest = diag::err_undeclared_use_suggest;
   }
 
   // If the original lookup was an unqualified lookup, fake an
   // unqualified lookup.  This is useful when (for example) the
   // original lookup would not have found something because it was a
   // dependent name.
   DeclContext *DC = SS.isEmpty() ? CurContext : nullptr;
   while (DC) {
     if (isa<CXXRecordDecl>(DC)) {
       LookupQualifiedName(R, DC);
 
       if (!R.empty()) {
         // Don't give errors about ambiguities in this lookup.
         R.suppressDiagnostics();
 
         // During a default argument instantiation the CurContext points
         // to a CXXMethodDecl; but we can't apply a this-> fixit inside a
         // function parameter list, hence add an explicit check.
         bool isDefaultArgument =
             !CodeSynthesisContexts.empty() &&
             CodeSynthesisContexts.back().Kind ==
                 CodeSynthesisContext::DefaultFunctionArgumentInstantiation;
         CXXMethodDecl *CurMethod = dyn_cast<CXXMethodDecl>(CurContext);
         bool isInstance = CurMethod &&
                           CurMethod->isInstance() &&
                           DC == CurMethod->getParent() && !isDefaultArgument;
 
         // Give a code modification hint to insert 'this->'.
         // TODO: fixit for inserting 'Base<T>::' in the other cases.
         // Actually quite difficult!
         if (getLangOpts().MSVCCompat)
           diagnostic = diag::ext_found_via_dependent_bases_lookup;
         if (isInstance) {
           Diag(R.getNameLoc(), diagnostic) << Name
             << FixItHint::CreateInsertion(R.getNameLoc(), "this->");
           CheckCXXThisCapture(R.getNameLoc());
         } else {
           Diag(R.getNameLoc(), diagnostic) << Name;
         }
 
         // Do we really want to note all of these?
         for (NamedDecl *D : R)
           Diag(D->getLocation(), diag::note_dependent_var_use);
 
         // Return true if we are inside a default argument instantiation
         // and the found name refers to an instance member function, otherwise
         // the function calling DiagnoseEmptyLookup will try to create an
         // implicit member call and this is wrong for default argument.
         if (isDefaultArgument && ((*R.begin())->isCXXInstanceMember())) {
           Diag(R.getNameLoc(), diag::err_member_call_without_object);
           return true;
         }
 
         // Tell the callee to try to recover.
         return false;
       }
 
       R.clear();
     }
 
     // In Microsoft mode, if we are performing lookup from within a friend
     // function definition declared at class scope then we must set
     // DC to the lexical parent to be able to search into the parent
     // class.
     if (getLangOpts().MSVCCompat && isa<FunctionDecl>(DC) &&
         cast<FunctionDecl>(DC)->getFriendObjectKind() &&
         DC->getLexicalParent()->isRecord())
       DC = DC->getLexicalParent();
     else
       DC = DC->getParent();
   }
 
   // We didn't find anything, so try to correct for a typo.
   TypoCorrection Corrected;
   if (S && Out) {
     SourceLocation TypoLoc = R.getNameLoc();
     assert(!ExplicitTemplateArgs &&
            "Diagnosing an empty lookup with explicit template args!");
     *Out = CorrectTypoDelayed(
         R.getLookupNameInfo(), R.getLookupKind(), S, &SS, std::move(CCC),
         [=](const TypoCorrection &TC) {
           emitEmptyLookupTypoDiagnostic(TC, *this, SS, Name, TypoLoc, Args,
                                         diagnostic, diagnostic_suggest);
         },
         nullptr, CTK_ErrorRecovery);
     if (*Out)
       return true;
   } else if (S && (Corrected =
                        CorrectTypo(R.getLookupNameInfo(), R.getLookupKind(), S,
                                    &SS, std::move(CCC), CTK_ErrorRecovery))) {
     std::string CorrectedStr(Corrected.getAsString(getLangOpts()));
     bool DroppedSpecifier =
         Corrected.WillReplaceSpecifier() && Name.getAsString() == CorrectedStr;
     R.setLookupName(Corrected.getCorrection());
 
     bool AcceptableWithRecovery = false;
     bool AcceptableWithoutRecovery = false;
     NamedDecl *ND = Corrected.getFoundDecl();
     if (ND) {
       if (Corrected.isOverloaded()) {
         OverloadCandidateSet OCS(R.getNameLoc(),
                                  OverloadCandidateSet::CSK_Normal);
         OverloadCandidateSet::iterator Best;
         for (NamedDecl *CD : Corrected) {
           if (FunctionTemplateDecl *FTD =
                    dyn_cast<FunctionTemplateDecl>(CD))
             AddTemplateOverloadCandidate(
                 FTD, DeclAccessPair::make(FTD, AS_none), ExplicitTemplateArgs,
                 Args, OCS);
           else if (FunctionDecl *FD = dyn_cast<FunctionDecl>(CD))
             if (!ExplicitTemplateArgs || ExplicitTemplateArgs->size() == 0)
               AddOverloadCandidate(FD, DeclAccessPair::make(FD, AS_none),
                                    Args, OCS);
         }
         switch (OCS.BestViableFunction(*this, R.getNameLoc(), Best)) {
         case OR_Success:
           ND = Best->FoundDecl;
           Corrected.setCorrectionDecl(ND);
           break;
         default:
           // FIXME: Arbitrarily pick the first declaration for the note.
           Corrected.setCorrectionDecl(ND);
           break;
         }
       }
       R.addDecl(ND);
       if (getLangOpts().CPlusPlus && ND->isCXXClassMember()) {
         CXXRecordDecl *Record = nullptr;
         if (Corrected.getCorrectionSpecifier()) {
           const Type *Ty = Corrected.getCorrectionSpecifier()->getAsType();
           Record = Ty->getAsCXXRecordDecl();
         }
         if (!Record)
           Record = cast<CXXRecordDecl>(
               ND->getDeclContext()->getRedeclContext());
         R.setNamingClass(Record);
       }
 
       auto *UnderlyingND = ND->getUnderlyingDecl();
       AcceptableWithRecovery = isa<ValueDecl>(UnderlyingND) ||
                                isa<FunctionTemplateDecl>(UnderlyingND);
       // FIXME: If we ended up with a typo for a type name or
       // Objective-C class name, we're in trouble because the parser
       // is in the wrong place to recover. Suggest the typo
       // correction, but don't make it a fix-it since we're not going
       // to recover well anyway.
       AcceptableWithoutRecovery =
           isa<TypeDecl>(UnderlyingND) || isa<ObjCInterfaceDecl>(UnderlyingND);
     } else {
       // FIXME: We found a keyword. Suggest it, but don't provide a fix-it
       // because we aren't able to recover.
       AcceptableWithoutRecovery = true;
     }
 
     if (AcceptableWithRecovery || AcceptableWithoutRecovery) {
       unsigned NoteID = Corrected.getCorrectionDeclAs<ImplicitParamDecl>()
                             ? diag::note_implicit_param_decl
                             : diag::note_previous_decl;
       if (SS.isEmpty())
         diagnoseTypo(Corrected, PDiag(diagnostic_suggest) << Name,
                      PDiag(NoteID), AcceptableWithRecovery);
       else
         diagnoseTypo(Corrected, PDiag(diag::err_no_member_suggest)
                                   << Name << computeDeclContext(SS, false)
                                   << DroppedSpecifier << SS.getRange(),
                      PDiag(NoteID), AcceptableWithRecovery);
 
       // Tell the callee whether to try to recover.
       return !AcceptableWithRecovery;
     }
   }
   R.clear();
 
   // Emit a special diagnostic for failed member lookups.
   // FIXME: computing the declaration context might fail here (?)
   if (!SS.isEmpty()) {
     Diag(R.getNameLoc(), diag::err_no_member)
       << Name << computeDeclContext(SS, false)
       << SS.getRange();
     return true;
   }
 
   // Give up, we can't recover.
   Diag(R.getNameLoc(), diagnostic) << Name;
   return true;
 }
 
 /// In Microsoft mode, if we are inside a template class whose parent class has
 /// dependent base classes, and we can't resolve an unqualified identifier, then
 /// assume the identifier is a member of a dependent base class.  We can only
 /// recover successfully in static methods, instance methods, and other contexts
 /// where 'this' is available.  This doesn't precisely match MSVC's
 /// instantiation model, but it's close enough.
 static Expr *
 recoverFromMSUnqualifiedLookup(Sema &S, ASTContext &Context,
                                DeclarationNameInfo &NameInfo,
                                SourceLocation TemplateKWLoc,
                                const TemplateArgumentListInfo *TemplateArgs) {
   // Only try to recover from lookup into dependent bases in static methods or
   // contexts where 'this' is available.
   QualType ThisType = S.getCurrentThisType();
   const CXXRecordDecl *RD = nullptr;
   if (!ThisType.isNull())
     RD = ThisType->getPointeeType()->getAsCXXRecordDecl();
   else if (auto *MD = dyn_cast<CXXMethodDecl>(S.CurContext))
     RD = MD->getParent();
   if (!RD || !RD->hasAnyDependentBases())
     return nullptr;
 
   // Diagnose this as unqualified lookup into a dependent base class.  If 'this'
   // is available, suggest inserting 'this->' as a fixit.
   SourceLocation Loc = NameInfo.getLoc();
   auto DB = S.Diag(Loc, diag::ext_undeclared_unqual_id_with_dependent_base);
   DB << NameInfo.getName() << RD;
 
   if (!ThisType.isNull()) {
     DB << FixItHint::CreateInsertion(Loc, "this->");
     return CXXDependentScopeMemberExpr::Create(
         Context, /*This=*/nullptr, ThisType, /*IsArrow=*/true,
         /*Op=*/SourceLocation(), NestedNameSpecifierLoc(), TemplateKWLoc,
         /*FirstQualifierInScope=*/nullptr, NameInfo, TemplateArgs);
   }
 
   // Synthesize a fake NNS that points to the derived class.  This will
   // perform name lookup during template instantiation.
   CXXScopeSpec SS;
   auto *NNS =
       NestedNameSpecifier::Create(Context, nullptr, true, RD->getTypeForDecl());
   SS.MakeTrivial(Context, NNS, SourceRange(Loc, Loc));
   return DependentScopeDeclRefExpr::Create(
       Context, SS.getWithLocInContext(Context), TemplateKWLoc, NameInfo,
       TemplateArgs);
 }
 
 ExprResult
 Sema::ActOnIdExpression(Scope *S, CXXScopeSpec &SS,
                         SourceLocation TemplateKWLoc, UnqualifiedId &Id,
                         bool HasTrailingLParen, bool IsAddressOfOperand,
                         std::unique_ptr<CorrectionCandidateCallback> CCC,
                         bool IsInlineAsmIdentifier, Token *KeywordReplacement) {
   assert(!(IsAddressOfOperand && HasTrailingLParen) &&
          "cannot be direct & operand and have a trailing lparen");
   if (SS.isInvalid())
     return ExprError();
 
   TemplateArgumentListInfo TemplateArgsBuffer;
 
   // Decompose the UnqualifiedId into the following data.
   DeclarationNameInfo NameInfo;
   const TemplateArgumentListInfo *TemplateArgs;
   DecomposeUnqualifiedId(Id, TemplateArgsBuffer, NameInfo, TemplateArgs);
 
   DeclarationName Name = NameInfo.getName();
   IdentifierInfo *II = Name.getAsIdentifierInfo();
   SourceLocation NameLoc = NameInfo.getLoc();
 
   if (II && II->isEditorPlaceholder()) {
     // FIXME: When typed placeholders are supported we can create a typed
     // placeholder expression node.
     return ExprError();
   }
 
   // C++ [temp.dep.expr]p3:
   //   An id-expression is type-dependent if it contains:
   //     -- an identifier that was declared with a dependent type,
   //        (note: handled after lookup)
   //     -- a template-id that is dependent,
   //        (note: handled in BuildTemplateIdExpr)
   //     -- a conversion-function-id that specifies a dependent type,
   //     -- a nested-name-specifier that contains a class-name that
   //        names a dependent type.
   // Determine whether this is a member of an unknown specialization;
   // we need to handle these differently.
   bool DependentID = false;
   if (Name.getNameKind() == DeclarationName::CXXConversionFunctionName &&
       Name.getCXXNameType()->isDependentType()) {
     DependentID = true;
   } else if (SS.isSet()) {
     if (DeclContext *DC = computeDeclContext(SS, false)) {
       if (RequireCompleteDeclContext(SS, DC))
         return ExprError();
     } else {
       DependentID = true;
     }
   }
 
   if (DependentID)
     return ActOnDependentIdExpression(SS, TemplateKWLoc, NameInfo,
                                       IsAddressOfOperand, TemplateArgs);
 
   // Perform the required lookup.
   LookupResult R(*this, NameInfo,
                  (Id.getKind() == UnqualifiedIdKind::IK_ImplicitSelfParam)
                      ? LookupObjCImplicitSelfParam
                      : LookupOrdinaryName);
   if (TemplateArgs) {
     // Lookup the template name again to correctly establish the context in
     // which it was found. This is really unfortunate as we already did the
     // lookup to determine that it was a template name in the first place. If
     // this becomes a performance hit, we can work harder to preserve those
     // results until we get here but it's likely not worth it.
     bool MemberOfUnknownSpecialization;
     LookupTemplateName(R, S, SS, QualType(), /*EnteringContext=*/false,
                        MemberOfUnknownSpecialization);
     
     if (MemberOfUnknownSpecialization ||
         (R.getResultKind() == LookupResult::NotFoundInCurrentInstantiation))
       return ActOnDependentIdExpression(SS, TemplateKWLoc, NameInfo,
                                         IsAddressOfOperand, TemplateArgs);
   } else {
     bool IvarLookupFollowUp = II && !SS.isSet() && getCurMethodDecl();
     LookupParsedName(R, S, &SS, !IvarLookupFollowUp);
 
     // If the result might be in a dependent base class, this is a dependent 
     // id-expression.
     if (R.getResultKind() == LookupResult::NotFoundInCurrentInstantiation)
       return ActOnDependentIdExpression(SS, TemplateKWLoc, NameInfo,
                                         IsAddressOfOperand, TemplateArgs);
 
     // If this reference is in an Objective-C method, then we need to do
     // some special Objective-C lookup, too.
     if (IvarLookupFollowUp) {
       ExprResult E(LookupInObjCMethod(R, S, II, true));
       if (E.isInvalid())
         return ExprError();
 
       if (Expr *Ex = E.getAs<Expr>())
         return Ex;
     }
   }
 
   if (R.isAmbiguous())
     return ExprError();
 
   // This could be an implicitly declared function reference (legal in C90,
   // extension in C99, forbidden in C++).
   if (R.empty() && HasTrailingLParen && II && !getLangOpts().CPlusPlus) {
     NamedDecl *D = ImplicitlyDefineFunction(NameLoc, *II, S);
     if (D) R.addDecl(D);
   }
 
   // Determine whether this name might be a candidate for
   // argument-dependent lookup.
   bool ADL = UseArgumentDependentLookup(SS, R, HasTrailingLParen);
 
   if (R.empty() && !ADL) {
     if (SS.isEmpty() && getLangOpts().MSVCCompat) {
       if (Expr *E = recoverFromMSUnqualifiedLookup(*this, Context, NameInfo,
                                                    TemplateKWLoc, TemplateArgs))
         return E;
     }
 
     // Don't diagnose an empty lookup for inline assembly.
     if (IsInlineAsmIdentifier)
       return ExprError();
 
     // If this name wasn't predeclared and if this is not a function
     // call, diagnose the problem.
     TypoExpr *TE = nullptr;
     auto DefaultValidator = llvm::make_unique<CorrectionCandidateCallback>(
         II, SS.isValid() ? SS.getScopeRep() : nullptr);
     DefaultValidator->IsAddressOfOperand = IsAddressOfOperand;
     assert((!CCC || CCC->IsAddressOfOperand == IsAddressOfOperand) &&
            "Typo correction callback misconfigured");
     if (CCC) {
       // Make sure the callback knows what the typo being diagnosed is.
       CCC->setTypoName(II);
       if (SS.isValid())
         CCC->setTypoNNS(SS.getScopeRep());
     }
     if (DiagnoseEmptyLookup(S, SS, R,
                             CCC ? std::move(CCC) : std::move(DefaultValidator),
                             nullptr, None, &TE)) {
       if (TE && KeywordReplacement) {
         auto &State = getTypoExprState(TE);
         auto BestTC = State.Consumer->getNextCorrection();
         if (BestTC.isKeyword()) {
           auto *II = BestTC.getCorrectionAsIdentifierInfo();
           if (State.DiagHandler)
             State.DiagHandler(BestTC);
           KeywordReplacement->startToken();
           KeywordReplacement->setKind(II->getTokenID());
           KeywordReplacement->setIdentifierInfo(II);
           KeywordReplacement->setLocation(BestTC.getCorrectionRange().getBegin());
           // Clean up the state associated with the TypoExpr, since it has
           // now been diagnosed (without a call to CorrectDelayedTyposInExpr).
           clearDelayedTypo(TE);
           // Signal that a correction to a keyword was performed by returning a
           // valid-but-null ExprResult.
           return (Expr*)nullptr;
         }
         State.Consumer->resetCorrectionStream();
       }
       return TE ? TE : ExprError();
     }
 
     assert(!R.empty() &&
            "DiagnoseEmptyLookup returned false but added no results");
 
     // If we found an Objective-C instance variable, let
     // LookupInObjCMethod build the appropriate expression to
     // reference the ivar.
     if (ObjCIvarDecl *Ivar = R.getAsSingle<ObjCIvarDecl>()) {
       R.clear();
       ExprResult E(LookupInObjCMethod(R, S, Ivar->getIdentifier()));
       // In a hopelessly buggy code, Objective-C instance variable
       // lookup fails and no expression will be built to reference it.
       if (!E.isInvalid() && !E.get())
         return ExprError();
       return E;
     }
   }
 
   // This is guaranteed from this point on.
   assert(!R.empty() || ADL);
 
   // Check whether this might be a C++ implicit instance member access.
   // C++ [class.mfct.non-static]p3:
   //   When an id-expression that is not part of a class member access
   //   syntax and not used to form a pointer to member is used in the
   //   body of a non-static member function of class X, if name lookup
   //   resolves the name in the id-expression to a non-static non-type
   //   member of some class C, the id-expression is transformed into a
   //   class member access expression using (*this) as the
   //   postfix-expression to the left of the . operator.
   //
   // But we don't actually need to do this for '&' operands if R
   // resolved to a function or overloaded function set, because the
   // expression is ill-formed if it actually works out to be a
   // non-static member function:
   //
   // C++ [expr.ref]p4:
   //   Otherwise, if E1.E2 refers to a non-static member function. . .
   //   [t]he expression can be used only as the left-hand operand of a
   //   member function call.
   //
   // There are other safeguards against such uses, but it's important
   // to get this right here so that we don't end up making a
   // spuriously dependent expression if we're inside a dependent
   // instance method.
   if (!R.empty() && (*R.begin())->isCXXClassMember()) {
     bool MightBeImplicitMember;
     if (!IsAddressOfOperand)
       MightBeImplicitMember = true;
     else if (!SS.isEmpty())
       MightBeImplicitMember = false;
     else if (R.isOverloadedResult())
       MightBeImplicitMember = false;
     else if (R.isUnresolvableResult())
       MightBeImplicitMember = true;
     else
       MightBeImplicitMember = isa<FieldDecl>(R.getFoundDecl()) ||
                               isa<IndirectFieldDecl>(R.getFoundDecl()) ||
                               isa<MSPropertyDecl>(R.getFoundDecl());
 
     if (MightBeImplicitMember)
       return BuildPossibleImplicitMemberExpr(SS, TemplateKWLoc,
                                              R, TemplateArgs, S);
   }
 
   if (TemplateArgs || TemplateKWLoc.isValid()) {
 
     // In C++1y, if this is a variable template id, then check it
     // in BuildTemplateIdExpr().
     // The single lookup result must be a variable template declaration.
     if (Id.getKind() == UnqualifiedIdKind::IK_TemplateId && Id.TemplateId &&
         Id.TemplateId->Kind == TNK_Var_template) {
       assert(R.getAsSingle<VarTemplateDecl>() &&
              "There should only be one declaration found.");
     }
 
     return BuildTemplateIdExpr(SS, TemplateKWLoc, R, ADL, TemplateArgs);
   }
 
   return BuildDeclarationNameExpr(SS, R, ADL);
 }
 
 /// BuildQualifiedDeclarationNameExpr - Build a C++ qualified
 /// declaration name, generally during template instantiation.
 /// There's a large number of things which don't need to be done along
 /// this path.
 ExprResult Sema::BuildQualifiedDeclarationNameExpr(
     CXXScopeSpec &SS, const DeclarationNameInfo &NameInfo,
     bool IsAddressOfOperand, const Scope *S, TypeSourceInfo **RecoveryTSI) {
   DeclContext *DC = computeDeclContext(SS, false);
   if (!DC)
     return BuildDependentDeclRefExpr(SS, /*TemplateKWLoc=*/SourceLocation(),
                                      NameInfo, /*TemplateArgs=*/nullptr);
 
   if (RequireCompleteDeclContext(SS, DC))
     return ExprError();
 
   LookupResult R(*this, NameInfo, LookupOrdinaryName);
   LookupQualifiedName(R, DC);
 
   if (R.isAmbiguous())
     return ExprError();
 
   if (R.getResultKind() == LookupResult::NotFoundInCurrentInstantiation)
     return BuildDependentDeclRefExpr(SS, /*TemplateKWLoc=*/SourceLocation(),
                                      NameInfo, /*TemplateArgs=*/nullptr);
 
   if (R.empty()) {
     Diag(NameInfo.getLoc(), diag::err_no_member)
       << NameInfo.getName() << DC << SS.getRange();
     return ExprError();
   }
 
   if (const TypeDecl *TD = R.getAsSingle<TypeDecl>()) {
     // Diagnose a missing typename if this resolved unambiguously to a type in
     // a dependent context.  If we can recover with a type, downgrade this to
     // a warning in Microsoft compatibility mode.
     unsigned DiagID = diag::err_typename_missing;
     if (RecoveryTSI && getLangOpts().MSVCCompat)
       DiagID = diag::ext_typename_missing;
     SourceLocation Loc = SS.getBeginLoc();
     auto D = Diag(Loc, DiagID);
     D << SS.getScopeRep() << NameInfo.getName().getAsString()
       << SourceRange(Loc, NameInfo.getEndLoc());
 
     // Don't recover if the caller isn't expecting us to or if we're in a SFINAE
     // context.
     if (!RecoveryTSI)
       return ExprError();
 
     // Only issue the fixit if we're prepared to recover.
     D << FixItHint::CreateInsertion(Loc, "typename ");
 
     // Recover by pretending this was an elaborated type.
     QualType Ty = Context.getTypeDeclType(TD);
     TypeLocBuilder TLB;
     TLB.pushTypeSpec(Ty).setNameLoc(NameInfo.getLoc());
 
     QualType ET = getElaboratedType(ETK_None, SS, Ty);
     ElaboratedTypeLoc QTL = TLB.push<ElaboratedTypeLoc>(ET);
     QTL.setElaboratedKeywordLoc(SourceLocation());
     QTL.setQualifierLoc(SS.getWithLocInContext(Context));
 
     *RecoveryTSI = TLB.getTypeSourceInfo(Context, ET);
 
     return ExprEmpty();
   }
 
   // Defend against this resolving to an implicit member access. We usually
   // won't get here if this might be a legitimate a class member (we end up in
   // BuildMemberReferenceExpr instead), but this can be valid if we're forming
   // a pointer-to-member or in an unevaluated context in C++11.
   if (!R.empty() && (*R.begin())->isCXXClassMember() && !IsAddressOfOperand)
     return BuildPossibleImplicitMemberExpr(SS,
                                            /*TemplateKWLoc=*/SourceLocation(),
                                            R, /*TemplateArgs=*/nullptr, S);
 
   return BuildDeclarationNameExpr(SS, R, /* ADL */ false);
 }
 
 /// LookupInObjCMethod - The parser has read a name in, and Sema has
 /// detected that we're currently inside an ObjC method.  Perform some
 /// additional lookup.
 ///
 /// Ideally, most of this would be done by lookup, but there's
 /// actually quite a lot of extra work involved.
 ///
 /// Returns a null sentinel to indicate trivial success.
 ExprResult
 Sema::LookupInObjCMethod(LookupResult &Lookup, Scope *S,
                          IdentifierInfo *II, bool AllowBuiltinCreation) {
   SourceLocation Loc = Lookup.getNameLoc();
   ObjCMethodDecl *CurMethod = getCurMethodDecl();
   
   // Check for error condition which is already reported.
   if (!CurMethod)
     return ExprError();
 
   // There are two cases to handle here.  1) scoped lookup could have failed,
   // in which case we should look for an ivar.  2) scoped lookup could have
   // found a decl, but that decl is outside the current instance method (i.e.
   // a global variable).  In these two cases, we do a lookup for an ivar with
   // this name, if the lookup sucedes, we replace it our current decl.
 
   // If we're in a class method, we don't normally want to look for
   // ivars.  But if we don't find anything else, and there's an
   // ivar, that's an error.
   bool IsClassMethod = CurMethod->isClassMethod();
 
   bool LookForIvars;
   if (Lookup.empty())
     LookForIvars = true;
   else if (IsClassMethod)
     LookForIvars = false;
   else
     LookForIvars = (Lookup.isSingleResult() &&
                     Lookup.getFoundDecl()->isDefinedOutsideFunctionOrMethod());
   ObjCInterfaceDecl *IFace = nullptr;
   if (LookForIvars) {
     IFace = CurMethod->getClassInterface();
     ObjCInterfaceDecl *ClassDeclared;
     ObjCIvarDecl *IV = nullptr;
     if (IFace && (IV = IFace->lookupInstanceVariable(II, ClassDeclared))) {
       // Diagnose using an ivar in a class method.
       if (IsClassMethod)
         return ExprError(Diag(Loc, diag::err_ivar_use_in_class_method)
                          << IV->getDeclName());
 
       // If we're referencing an invalid decl, just return this as a silent
       // error node.  The error diagnostic was already emitted on the decl.
       if (IV->isInvalidDecl())
         return ExprError();
 
       // Check if referencing a field with __attribute__((deprecated)).
       if (DiagnoseUseOfDecl(IV, Loc))
         return ExprError();
 
       // Diagnose the use of an ivar outside of the declaring class.
       if (IV->getAccessControl() == ObjCIvarDecl::Private &&
           !declaresSameEntity(ClassDeclared, IFace) &&
           !getLangOpts().DebuggerSupport)
         Diag(Loc, diag::err_private_ivar_access) << IV->getDeclName();
 
       // FIXME: This should use a new expr for a direct reference, don't
       // turn this into Self->ivar, just return a BareIVarExpr or something.
       IdentifierInfo &II = Context.Idents.get("self");
       UnqualifiedId SelfName;
       SelfName.setIdentifier(&II, SourceLocation());
       SelfName.setKind(UnqualifiedIdKind::IK_ImplicitSelfParam);
       CXXScopeSpec SelfScopeSpec;
       SourceLocation TemplateKWLoc;
       ExprResult SelfExpr = ActOnIdExpression(S, SelfScopeSpec, TemplateKWLoc,
                                               SelfName, false, false);
       if (SelfExpr.isInvalid())
         return ExprError();
 
       SelfExpr = DefaultLvalueConversion(SelfExpr.get());
       if (SelfExpr.isInvalid())
         return ExprError();
 
       MarkAnyDeclReferenced(Loc, IV, true);
 
       ObjCMethodFamily MF = CurMethod->getMethodFamily();
       if (MF != OMF_init && MF != OMF_dealloc && MF != OMF_finalize &&
           !IvarBacksCurrentMethodAccessor(IFace, CurMethod, IV))
         Diag(Loc, diag::warn_direct_ivar_access) << IV->getDeclName();
 
       ObjCIvarRefExpr *Result = new (Context)
           ObjCIvarRefExpr(IV, IV->getUsageType(SelfExpr.get()->getType()), Loc,
                           IV->getLocation(), SelfExpr.get(), true, true);
 
       if (IV->getType().getObjCLifetime() == Qualifiers::OCL_Weak) {
         if (!Diags.isIgnored(diag::warn_arc_repeated_use_of_weak, Loc))
           recordUseOfEvaluatedWeak(Result);
       }
       if (getLangOpts().ObjCAutoRefCount) {
         if (CurContext->isClosure())
           Diag(Loc, diag::warn_implicitly_retains_self)
             << FixItHint::CreateInsertion(Loc, "self->");
       }
       
       return Result;
     }
   } else if (CurMethod->isInstanceMethod()) {
     // We should warn if a local variable hides an ivar.
     if (ObjCInterfaceDecl *IFace = CurMethod->getClassInterface()) {
       ObjCInterfaceDecl *ClassDeclared;
       if (ObjCIvarDecl *IV = IFace->lookupInstanceVariable(II, ClassDeclared)) {
         if (IV->getAccessControl() != ObjCIvarDecl::Private ||
             declaresSameEntity(IFace, ClassDeclared))
           Diag(Loc, diag::warn_ivar_use_hidden) << IV->getDeclName();
       }
     }
   } else if (Lookup.isSingleResult() &&
              Lookup.getFoundDecl()->isDefinedOutsideFunctionOrMethod()) {
     // If accessing a stand-alone ivar in a class method, this is an error.
     if (const ObjCIvarDecl *IV = dyn_cast<ObjCIvarDecl>(Lookup.getFoundDecl()))
       return ExprError(Diag(Loc, diag::err_ivar_use_in_class_method)
                        << IV->getDeclName());
   }
 
   if (Lookup.empty() && II && AllowBuiltinCreation) {
     // FIXME. Consolidate this with similar code in LookupName.
     if (unsigned BuiltinID = II->getBuiltinID()) {
       if (!(getLangOpts().CPlusPlus &&
             Context.BuiltinInfo.isPredefinedLibFunction(BuiltinID))) {
         NamedDecl *D = LazilyCreateBuiltin((IdentifierInfo *)II, BuiltinID,
                                            S, Lookup.isForRedeclaration(),
                                            Lookup.getNameLoc());
         if (D) Lookup.addDecl(D);
       }
     }
   }
   // Sentinel value saying that we didn't do anything special.
   return ExprResult((Expr *)nullptr);
 }
 
 /// \brief Cast a base object to a member's actual type.
 ///
 /// Logically this happens in three phases:
 ///
 /// * First we cast from the base type to the naming class.
 ///   The naming class is the class into which we were looking
 ///   when we found the member;  it's the qualifier type if a
 ///   qualifier was provided, and otherwise it's the base type.
 ///
 /// * Next we cast from the naming class to the declaring class.
 ///   If the member we found was brought into a class's scope by
 ///   a using declaration, this is that class;  otherwise it's
 ///   the class declaring the member.
 ///
 /// * Finally we cast from the declaring class to the "true"
 ///   declaring class of the member.  This conversion does not
 ///   obey access control.
 ExprResult
 Sema::PerformObjectMemberConversion(Expr *From,
                                     NestedNameSpecifier *Qualifier,
                                     NamedDecl *FoundDecl,
                                     NamedDecl *Member) {
   CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(Member->getDeclContext());
   if (!RD)
     return From;
 
   QualType DestRecordType;
   QualType DestType;
   QualType FromRecordType;
   QualType FromType = From->getType();
   bool PointerConversions = false;
   if (isa<FieldDecl>(Member)) {
     DestRecordType = Context.getCanonicalType(Context.getTypeDeclType(RD));
 
     if (FromType->getAs<PointerType>()) {
       DestType = Context.getPointerType(DestRecordType);
       FromRecordType = FromType->getPointeeType();
       PointerConversions = true;
     } else {
       DestType = DestRecordType;
       FromRecordType = FromType;
     }
   } else if (CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(Member)) {
     if (Method->isStatic())
       return From;
 
     DestType = Method->getThisType(Context);
     DestRecordType = DestType->getPointeeType();
 
     if (FromType->getAs<PointerType>()) {
       FromRecordType = FromType->getPointeeType();
       PointerConversions = true;
     } else {
       FromRecordType = FromType;
       DestType = DestRecordType;
     }
   } else {
     // No conversion necessary.
     return From;
   }
 
   if (DestType->isDependentType() || FromType->isDependentType())
     return From;
 
   // If the unqualified types are the same, no conversion is necessary.
   if (Context.hasSameUnqualifiedType(FromRecordType, DestRecordType))
     return From;
 
   SourceRange FromRange = From->getSourceRange();
   SourceLocation FromLoc = FromRange.getBegin();
 
   ExprValueKind VK = From->getValueKind();
 
   // C++ [class.member.lookup]p8:
   //   [...] Ambiguities can often be resolved by qualifying a name with its
   //   class name.
   //
   // If the member was a qualified name and the qualified referred to a
   // specific base subobject type, we'll cast to that intermediate type
   // first and then to the object in which the member is declared. That allows
   // one to resolve ambiguities in, e.g., a diamond-shaped hierarchy such as:
   //
   //   class Base { public: int x; };
   //   class Derived1 : public Base { };
   //   class Derived2 : public Base { };
   //   class VeryDerived : public Derived1, public Derived2 { void f(); };
   //
   //   void VeryDerived::f() {
   //     x = 17; // error: ambiguous base subobjects
   //     Derived1::x = 17; // okay, pick the Base subobject of Derived1
   //   }
   if (Qualifier && Qualifier->getAsType()) {
     QualType QType = QualType(Qualifier->getAsType(), 0);
     assert(QType->isRecordType() && "lookup done with non-record type");
 
     QualType QRecordType = QualType(QType->getAs<RecordType>(), 0);
 
     // In C++98, the qualifier type doesn't actually have to be a base
     // type of the object type, in which case we just ignore it.
     // Otherwise build the appropriate casts.
     if (IsDerivedFrom(FromLoc, FromRecordType, QRecordType)) {
       CXXCastPath BasePath;
       if (CheckDerivedToBaseConversion(FromRecordType, QRecordType,
                                        FromLoc, FromRange, &BasePath))
         return ExprError();
 
       if (PointerConversions)
         QType = Context.getPointerType(QType);
       From = ImpCastExprToType(From, QType, CK_UncheckedDerivedToBase,
                                VK, &BasePath).get();
 
       FromType = QType;
       FromRecordType = QRecordType;
 
       // If the qualifier type was the same as the destination type,
       // we're done.
       if (Context.hasSameUnqualifiedType(FromRecordType, DestRecordType))
         return From;
     }
   }
 
   bool IgnoreAccess = false;
 
   // If we actually found the member through a using declaration, cast
   // down to the using declaration's type.
   //
   // Pointer equality is fine here because only one declaration of a
   // class ever has member declarations.
   if (FoundDecl->getDeclContext() != Member->getDeclContext()) {
     assert(isa<UsingShadowDecl>(FoundDecl));
     QualType URecordType = Context.getTypeDeclType(
                            cast<CXXRecordDecl>(FoundDecl->getDeclContext()));
 
     // We only need to do this if the naming-class to declaring-class
     // conversion is non-trivial.
     if (!Context.hasSameUnqualifiedType(FromRecordType, URecordType)) {
       assert(IsDerivedFrom(FromLoc, FromRecordType, URecordType));
       CXXCastPath BasePath;
       if (CheckDerivedToBaseConversion(FromRecordType, URecordType,
                                        FromLoc, FromRange, &BasePath))
         return ExprError();
 
       QualType UType = URecordType;
       if (PointerConversions)
         UType = Context.getPointerType(UType);
       From = ImpCastExprToType(From, UType, CK_UncheckedDerivedToBase,
                                VK, &BasePath).get();
       FromType = UType;
       FromRecordType = URecordType;
     }
 
     // We don't do access control for the conversion from the
     // declaring class to the true declaring class.
     IgnoreAccess = true;
   }
 
   CXXCastPath BasePath;
   if (CheckDerivedToBaseConversion(FromRecordType, DestRecordType,
                                    FromLoc, FromRange, &BasePath,
                                    IgnoreAccess))
     return ExprError();
 
   return ImpCastExprToType(From, DestType, CK_UncheckedDerivedToBase,
                            VK, &BasePath);
 }
 
 bool Sema::UseArgumentDependentLookup(const CXXScopeSpec &SS,
                                       const LookupResult &R,
                                       bool HasTrailingLParen) {
   // Only when used directly as the postfix-expression of a call.
   if (!HasTrailingLParen)
     return false;
 
   // Never if a scope specifier was provided.
   if (SS.isSet())
     return false;
 
   // Only in C++ or ObjC++.
   if (!getLangOpts().CPlusPlus)
     return false;
 
   // Turn off ADL when we find certain kinds of declarations during
   // normal lookup:
   for (NamedDecl *D : R) {
     // C++0x [basic.lookup.argdep]p3:
     //     -- a declaration of a class member
     // Since using decls preserve this property, we check this on the
     // original decl.
     if (D->isCXXClassMember())
       return false;
 
     // C++0x [basic.lookup.argdep]p3:
     //     -- a block-scope function declaration that is not a
     //        using-declaration
     // NOTE: we also trigger this for function templates (in fact, we
     // don't check the decl type at all, since all other decl types
     // turn off ADL anyway).
     if (isa<UsingShadowDecl>(D))
       D = cast<UsingShadowDecl>(D)->getTargetDecl();
     else if (D->getLexicalDeclContext()->isFunctionOrMethod())
       return false;
 
     // C++0x [basic.lookup.argdep]p3:
     //     -- a declaration that is neither a function or a function
     //        template
     // And also for builtin functions.
     if (isa<FunctionDecl>(D)) {
       FunctionDecl *FDecl = cast<FunctionDecl>(D);
 
       // But also builtin functions.
       if (FDecl->getBuiltinID() && FDecl->isImplicit())
         return false;
     } else if (!isa<FunctionTemplateDecl>(D))
       return false;
   }
 
   return true;
 }
 
 
 /// Diagnoses obvious problems with the use of the given declaration
 /// as an expression.  This is only actually called for lookups that
 /// were not overloaded, and it doesn't promise that the declaration
 /// will in fact be used.
 static bool CheckDeclInExpr(Sema &S, SourceLocation Loc, NamedDecl *D) {
   if (D->isInvalidDecl())
     return true;
 
   if (isa<TypedefNameDecl>(D)) {
     S.Diag(Loc, diag::err_unexpected_typedef) << D->getDeclName();
     return true;
   }
 
   if (isa<ObjCInterfaceDecl>(D)) {
     S.Diag(Loc, diag::err_unexpected_interface) << D->getDeclName();
     return true;
   }
 
   if (isa<NamespaceDecl>(D)) {
     S.Diag(Loc, diag::err_unexpected_namespace) << D->getDeclName();
     return true;
   }
 
   return false;
 }
 
 ExprResult Sema::BuildDeclarationNameExpr(const CXXScopeSpec &SS,
                                           LookupResult &R, bool NeedsADL,
                                           bool AcceptInvalidDecl) {
   // If this is a single, fully-resolved result and we don't need ADL,
   // just build an ordinary singleton decl ref.
   if (!NeedsADL && R.isSingleResult() && !R.getAsSingle<FunctionTemplateDecl>())
     return BuildDeclarationNameExpr(SS, R.getLookupNameInfo(), R.getFoundDecl(),
                                     R.getRepresentativeDecl(), nullptr,
                                     AcceptInvalidDecl);
 
   // We only need to check the declaration if there's exactly one
   // result, because in the overloaded case the results can only be
   // functions and function templates.
   if (R.isSingleResult() &&
       CheckDeclInExpr(*this, R.getNameLoc(), R.getFoundDecl()))
     return ExprError();
 
   // Otherwise, just build an unresolved lookup expression.  Suppress
   // any lookup-related diagnostics; we'll hash these out later, when
   // we've picked a target.
   R.suppressDiagnostics();
 
   UnresolvedLookupExpr *ULE
     = UnresolvedLookupExpr::Create(Context, R.getNamingClass(),
                                    SS.getWithLocInContext(Context),
                                    R.getLookupNameInfo(),
                                    NeedsADL, R.isOverloadedResult(),
                                    R.begin(), R.end());
 
   return ULE;
 }
 
 static void
 diagnoseUncapturableValueReference(Sema &S, SourceLocation loc,
                                    ValueDecl *var, DeclContext *DC);
 
 /// \brief Complete semantic analysis for a reference to the given declaration.
 ExprResult Sema::BuildDeclarationNameExpr(
     const CXXScopeSpec &SS, const DeclarationNameInfo &NameInfo, NamedDecl *D,
     NamedDecl *FoundD, const TemplateArgumentListInfo *TemplateArgs,
     bool AcceptInvalidDecl) {
   assert(D && "Cannot refer to a NULL declaration");
   assert(!isa<FunctionTemplateDecl>(D) &&
          "Cannot refer unambiguously to a function template");
 
   SourceLocation Loc = NameInfo.getLoc();
   if (CheckDeclInExpr(*this, Loc, D))
     return ExprError();
 
   if (TemplateDecl *Template = dyn_cast<TemplateDecl>(D)) {
     // Specifically diagnose references to class templates that are missing
     // a template argument list.
     Diag(Loc, diag::err_template_decl_ref) << (isa<VarTemplateDecl>(D) ? 1 : 0)
                                            << Template << SS.getRange();
     Diag(Template->getLocation(), diag::note_template_decl_here);
     return ExprError();
   }
 
   // Make sure that we're referring to a value.
   ValueDecl *VD = dyn_cast<ValueDecl>(D);
   if (!VD) {
     Diag(Loc, diag::err_ref_non_value)
       << D << SS.getRange();
     Diag(D->getLocation(), diag::note_declared_at);
     return ExprError();
   }
 
   // Check whether this declaration can be used. Note that we suppress
   // this check when we're going to perform argument-dependent lookup
   // on this function name, because this might not be the function
   // that overload resolution actually selects.
   if (DiagnoseUseOfDecl(VD, Loc))
     return ExprError();
 
   // Only create DeclRefExpr's for valid Decl's.
   if (VD->isInvalidDecl() && !AcceptInvalidDecl)
     return ExprError();
 
   // Handle members of anonymous structs and unions.  If we got here,
   // and the reference is to a class member indirect field, then this
   // must be the subject of a pointer-to-member expression.
   if (IndirectFieldDecl *indirectField = dyn_cast<IndirectFieldDecl>(VD))
     if (!indirectField->isCXXClassMember())
       return BuildAnonymousStructUnionMemberReference(SS, NameInfo.getLoc(),
                                                       indirectField);
 
   {
     QualType type = VD->getType();
     if (type.isNull())
       return ExprError();
     if (auto *FPT = type->getAs<FunctionProtoType>()) {
       // C++ [except.spec]p17:
       //   An exception-specification is considered to be needed when:
       //   - in an expression, the function is the unique lookup result or
       //     the selected member of a set of overloaded functions.
       ResolveExceptionSpec(Loc, FPT);
       type = VD->getType();
     }
     ExprValueKind valueKind = VK_RValue;
 
     switch (D->getKind()) {
     // Ignore all the non-ValueDecl kinds.
 #define ABSTRACT_DECL(kind)
 #define VALUE(type, base)
 #define DECL(type, base) \
     case Decl::type:
 #include "clang/AST/DeclNodes.inc"
       llvm_unreachable("invalid value decl kind");
 
     // These shouldn't make it here.
     case Decl::ObjCAtDefsField:
     case Decl::ObjCIvar:
       llvm_unreachable("forming non-member reference to ivar?");
 
     // Enum constants are always r-values and never references.
     // Unresolved using declarations are dependent.
     case Decl::EnumConstant:
     case Decl::UnresolvedUsingValue:
     case Decl::OMPDeclareReduction:
       valueKind = VK_RValue;
       break;
 
     // Fields and indirect fields that got here must be for
     // pointer-to-member expressions; we just call them l-values for
     // internal consistency, because this subexpression doesn't really
     // exist in the high-level semantics.
     case Decl::Field:
     case Decl::IndirectField:
       assert(getLangOpts().CPlusPlus &&
              "building reference to field in C?");
 
       // These can't have reference type in well-formed programs, but
       // for internal consistency we do this anyway.
       type = type.getNonReferenceType();
       valueKind = VK_LValue;
       break;
 
     // Non-type template parameters are either l-values or r-values
     // depending on the type.
     case Decl::NonTypeTemplateParm: {
       if (const ReferenceType *reftype = type->getAs<ReferenceType>()) {
         type = reftype->getPointeeType();
         valueKind = VK_LValue; // even if the parameter is an r-value reference
         break;
       }
 
       // For non-references, we need to strip qualifiers just in case
       // the template parameter was declared as 'const int' or whatever.
       valueKind = VK_RValue;
       type = type.getUnqualifiedType();
       break;
     }
 
     case Decl::Var:
     case Decl::VarTemplateSpecialization:
     case Decl::VarTemplatePartialSpecialization:
     case Decl::Decomposition:
     case Decl::OMPCapturedExpr:
       // In C, "extern void blah;" is valid and is an r-value.
       if (!getLangOpts().CPlusPlus &&
           !type.hasQualifiers() &&
           type->isVoidType()) {
         valueKind = VK_RValue;
         break;
       }
       LLVM_FALLTHROUGH;
 
     case Decl::ImplicitParam:
     case Decl::ParmVar: {
       // These are always l-values.
       valueKind = VK_LValue;
       type = type.getNonReferenceType();
 
       // FIXME: Does the addition of const really only apply in
       // potentially-evaluated contexts? Since the variable isn't actually
       // captured in an unevaluated context, it seems that the answer is no.
       if (!isUnevaluatedContext()) {
         QualType CapturedType = getCapturedDeclRefType(cast<VarDecl>(VD), Loc);
         if (!CapturedType.isNull())
           type = CapturedType;
       }
       
       break;
     }
 
     case Decl::Binding: {
       // These are always lvalues.
       valueKind = VK_LValue;
       type = type.getNonReferenceType();
       // FIXME: Support lambda-capture of BindingDecls, once CWG actually
       // decides how that's supposed to work.
       auto *BD = cast<BindingDecl>(VD);
       if (BD->getDeclContext()->isFunctionOrMethod() &&
           BD->getDeclContext() != CurContext)
         diagnoseUncapturableValueReference(*this, Loc, BD, CurContext);
       break;
     }
         
     case Decl::Function: {
       if (unsigned BID = cast<FunctionDecl>(VD)->getBuiltinID()) {
         if (!Context.BuiltinInfo.isPredefinedLibFunction(BID)) {
           type = Context.BuiltinFnTy;
           valueKind = VK_RValue;
           break;
         }
       }
 
       const FunctionType *fty = type->castAs<FunctionType>();
 
       // If we're referring to a function with an __unknown_anytype
       // result type, make the entire expression __unknown_anytype.
       if (fty->getReturnType() == Context.UnknownAnyTy) {
         type = Context.UnknownAnyTy;
         valueKind = VK_RValue;
         break;
       }
 
       // Functions are l-values in C++.
       if (getLangOpts().CPlusPlus) {
         valueKind = VK_LValue;
         break;
       }
       
       // C99 DR 316 says that, if a function type comes from a
       // function definition (without a prototype), that type is only
       // used for checking compatibility. Therefore, when referencing
       // the function, we pretend that we don't have the full function
       // type.
       if (!cast<FunctionDecl>(VD)->hasPrototype() &&
           isa<FunctionProtoType>(fty))
         type = Context.getFunctionNoProtoType(fty->getReturnType(),
                                               fty->getExtInfo());
 
       // Functions are r-values in C.
       valueKind = VK_RValue;
       break;
     }
 
     case Decl::CXXDeductionGuide:
       llvm_unreachable("building reference to deduction guide");
 
     case Decl::MSProperty:
       valueKind = VK_LValue;
       break;
 
     case Decl::CXXMethod:
       // If we're referring to a method with an __unknown_anytype
       // result type, make the entire expression __unknown_anytype.
       // This should only be possible with a type written directly.
       if (const FunctionProtoType *proto
             = dyn_cast<FunctionProtoType>(VD->getType()))
         if (proto->getReturnType() == Context.UnknownAnyTy) {
           type = Context.UnknownAnyTy;
           valueKind = VK_RValue;
           break;
         }
 
       // C++ methods are l-values if static, r-values if non-static.
       if (cast<CXXMethodDecl>(VD)->isStatic()) {
         valueKind = VK_LValue;
         break;
       }
       LLVM_FALLTHROUGH;
 
     case Decl::CXXConversion:
     case Decl::CXXDestructor:
     case Decl::CXXConstructor:
       valueKind = VK_RValue;
       break;
     }
 
     return BuildDeclRefExpr(VD, type, valueKind, NameInfo, &SS, FoundD,
                             TemplateArgs);
   }
 }
 
 static void ConvertUTF8ToWideString(unsigned CharByteWidth, StringRef Source,
                                     SmallString<32> &Target) {
   Target.resize(CharByteWidth * (Source.size() + 1));
   char *ResultPtr = &Target[0];
   const llvm::UTF8 *ErrorPtr;
   bool success =
       llvm::ConvertUTF8toWide(CharByteWidth, Source, ResultPtr, ErrorPtr);
   (void)success;
   assert(success);
   Target.resize(ResultPtr - &Target[0]);
 }
 
 ExprResult Sema::BuildPredefinedExpr(SourceLocation Loc,
                                      PredefinedExpr::IdentType IT) {
   // Pick the current block, lambda, captured statement or function.
   Decl *currentDecl = nullptr;
   if (const BlockScopeInfo *BSI = getCurBlock())
     currentDecl = BSI->TheDecl;
   else if (const LambdaScopeInfo *LSI = getCurLambda())
     currentDecl = LSI->CallOperator;
   else if (const CapturedRegionScopeInfo *CSI = getCurCapturedRegion())
     currentDecl = CSI->TheCapturedDecl;
   else
     currentDecl = getCurFunctionOrMethodDecl();
 
   if (!currentDecl) {
     Diag(Loc, diag::ext_predef_outside_function);
     currentDecl = Context.getTranslationUnitDecl();
   }
 
   QualType ResTy;
   StringLiteral *SL = nullptr;
   if (cast<DeclContext>(currentDecl)->isDependentContext())
     ResTy = Context.DependentTy;
   else {
     // Pre-defined identifiers are of type char[x], where x is the length of
     // the string.
     auto Str = PredefinedExpr::ComputeName(IT, currentDecl);
     unsigned Length = Str.length();
 
     llvm::APInt LengthI(32, Length + 1);
     if (IT == PredefinedExpr::LFunction) {
       ResTy = Context.WideCharTy.withConst();
       SmallString<32> RawChars;
       ConvertUTF8ToWideString(Context.getTypeSizeInChars(ResTy).getQuantity(),
                               Str, RawChars);
       ResTy = Context.getConstantArrayType(ResTy, LengthI, ArrayType::Normal,
                                            /*IndexTypeQuals*/ 0);
       SL = StringLiteral::Create(Context, RawChars, StringLiteral::Wide,
                                  /*Pascal*/ false, ResTy, Loc);
     } else {
       ResTy = Context.CharTy.withConst();
       ResTy = Context.getConstantArrayType(ResTy, LengthI, ArrayType::Normal,
                                            /*IndexTypeQuals*/ 0);
       SL = StringLiteral::Create(Context, Str, StringLiteral::Ascii,
                                  /*Pascal*/ false, ResTy, Loc);
     }
   }
 
   return new (Context) PredefinedExpr(Loc, ResTy, IT, SL);
 }
 
 ExprResult Sema::ActOnPredefinedExpr(SourceLocation Loc, tok::TokenKind Kind) {
   PredefinedExpr::IdentType IT;
 
   switch (Kind) {
   default: llvm_unreachable("Unknown simple primary expr!");
   case tok::kw___func__: IT = PredefinedExpr::Func; break; // [C99 6.4.2.2]
   case tok::kw___FUNCTION__: IT = PredefinedExpr::Function; break;
   case tok::kw___FUNCDNAME__: IT = PredefinedExpr::FuncDName; break; // [MS]
   case tok::kw___FUNCSIG__: IT = PredefinedExpr::FuncSig; break; // [MS]
   case tok::kw_L__FUNCTION__: IT = PredefinedExpr::LFunction; break;
   case tok::kw___PRETTY_FUNCTION__: IT = PredefinedExpr::PrettyFunction; break;
   }
 
   return BuildPredefinedExpr(Loc, IT);
 }
 
 ExprResult Sema::ActOnCharacterConstant(const Token &Tok, Scope *UDLScope) {
   SmallString<16> CharBuffer;
   bool Invalid = false;
   StringRef ThisTok = PP.getSpelling(Tok, CharBuffer, &Invalid);
   if (Invalid)
     return ExprError();
 
   CharLiteralParser Literal(ThisTok.begin(), ThisTok.end(), Tok.getLocation(),
                             PP, Tok.getKind());
   if (Literal.hadError())
     return ExprError();
 
   QualType Ty;
   if (Literal.isWide())
     Ty = Context.WideCharTy; // L'x' -> wchar_t in C and C++.
   else if (Literal.isUTF16())
     Ty = Context.Char16Ty; // u'x' -> char16_t in C11 and C++11.
   else if (Literal.isUTF32())
     Ty = Context.Char32Ty; // U'x' -> char32_t in C11 and C++11.
   else if (!getLangOpts().CPlusPlus || Literal.isMultiChar())
     Ty = Context.IntTy;   // 'x' -> int in C, 'wxyz' -> int in C++.
   else
     Ty = Context.CharTy;  // 'x' -> char in C++
 
   CharacterLiteral::CharacterKind Kind = CharacterLiteral::Ascii;
   if (Literal.isWide())
     Kind = CharacterLiteral::Wide;
   else if (Literal.isUTF16())
     Kind = CharacterLiteral::UTF16;
   else if (Literal.isUTF32())
     Kind = CharacterLiteral::UTF32;
   else if (Literal.isUTF8())
     Kind = CharacterLiteral::UTF8;
 
   Expr *Lit = new (Context) CharacterLiteral(Literal.getValue(), Kind, Ty,
                                              Tok.getLocation());
 
   if (Literal.getUDSuffix().empty())
     return Lit;
 
   // We're building a user-defined literal.
   IdentifierInfo *UDSuffix = &Context.Idents.get(Literal.getUDSuffix());
   SourceLocation UDSuffixLoc =
     getUDSuffixLoc(*this, Tok.getLocation(), Literal.getUDSuffixOffset());
 
   // Make sure we're allowed user-defined literals here.
   if (!UDLScope)
     return ExprError(Diag(UDSuffixLoc, diag::err_invalid_character_udl));
 
   // C++11 [lex.ext]p6: The literal L is treated as a call of the form
   //   operator "" X (ch)
   return BuildCookedLiteralOperatorCall(*this, UDLScope, UDSuffix, UDSuffixLoc,
                                         Lit, Tok.getLocation());
 }
 
 ExprResult Sema::ActOnIntegerConstant(SourceLocation Loc, uint64_t Val) {
   unsigned IntSize = Context.getTargetInfo().getIntWidth();
   return IntegerLiteral::Create(Context, llvm::APInt(IntSize, Val),
                                 Context.IntTy, Loc);
 }
 
 static Expr *BuildFloatingLiteral(Sema &S, NumericLiteralParser &Literal,
                                   QualType Ty, SourceLocation Loc) {
   const llvm::fltSemantics &Format = S.Context.getFloatTypeSemantics(Ty);
 
   using llvm::APFloat;
   APFloat Val(Format);
 
   APFloat::opStatus result = Literal.GetFloatValue(Val);
 
   // Overflow is always an error, but underflow is only an error if
   // we underflowed to zero (APFloat reports denormals as underflow).
   if ((result & APFloat::opOverflow) ||
       ((result & APFloat::opUnderflow) && Val.isZero())) {
     unsigned diagnostic;
     SmallString<20> buffer;
     if (result & APFloat::opOverflow) {
       diagnostic = diag::warn_float_overflow;
       APFloat::getLargest(Format).toString(buffer);
     } else {
       diagnostic = diag::warn_float_underflow;
       APFloat::getSmallest(Format).toString(buffer);
     }
 
     S.Diag(Loc, diagnostic)
       << Ty
       << StringRef(buffer.data(), buffer.size());
   }
 
   bool isExact = (result == APFloat::opOK);
   return FloatingLiteral::Create(S.Context, Val, isExact, Ty, Loc);
 }
 
 bool Sema::CheckLoopHintExpr(Expr *E, SourceLocation Loc) {
   assert(E && "Invalid expression");
 
   if (E->isValueDependent())
     return false;
 
   QualType QT = E->getType();
   if (!QT->isIntegerType() || QT->isBooleanType() || QT->isCharType()) {
     Diag(E->getExprLoc(), diag::err_pragma_loop_invalid_argument_type) << QT;
     return true;
   }
 
   llvm::APSInt ValueAPS;
   ExprResult R = VerifyIntegerConstantExpression(E, &ValueAPS);
 
   if (R.isInvalid())
     return true;
 
   bool ValueIsPositive = ValueAPS.isStrictlyPositive();
   if (!ValueIsPositive || ValueAPS.getActiveBits() > 31) {
     Diag(E->getExprLoc(), diag::err_pragma_loop_invalid_argument_value)
         << ValueAPS.toString(10) << ValueIsPositive;
     return true;
   }
 
   return false;
 }
 
 ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) {
   // Fast path for a single digit (which is quite common).  A single digit
   // cannot have a trigraph, escaped newline, radix prefix, or suffix.
   if (Tok.getLength() == 1) {
     const char Val = PP.getSpellingOfSingleCharacterNumericConstant(Tok);
     return ActOnIntegerConstant(Tok.getLocation(), Val-'0');
   }
 
   SmallString<128> SpellingBuffer;
   // NumericLiteralParser wants to overread by one character.  Add padding to
   // the buffer in case the token is copied to the buffer.  If getSpelling()
   // returns a StringRef to the memory buffer, it should have a null char at
   // the EOF, so it is also safe.
   SpellingBuffer.resize(Tok.getLength() + 1);
 
   // Get the spelling of the token, which eliminates trigraphs, etc.
   bool Invalid = false;
   StringRef TokSpelling = PP.getSpelling(Tok, SpellingBuffer, &Invalid);
   if (Invalid)
     return ExprError();
 
   NumericLiteralParser Literal(TokSpelling, Tok.getLocation(), PP);
   if (Literal.hadError)
     return ExprError();
 
   if (Literal.hasUDSuffix()) {
     // We're building a user-defined literal.
     IdentifierInfo *UDSuffix = &Context.Idents.get(Literal.getUDSuffix());
     SourceLocation UDSuffixLoc =
       getUDSuffixLoc(*this, Tok.getLocation(), Literal.getUDSuffixOffset());
 
     // Make sure we're allowed user-defined literals here.
     if (!UDLScope)
       return ExprError(Diag(UDSuffixLoc, diag::err_invalid_numeric_udl));
 
     QualType CookedTy;
     if (Literal.isFloatingLiteral()) {
       // C++11 [lex.ext]p4: If S contains a literal operator with parameter type
       // long double, the literal is treated as a call of the form
       //   operator "" X (f L)
       CookedTy = Context.LongDoubleTy;
     } else {
       // C++11 [lex.ext]p3: If S contains a literal operator with parameter type
       // unsigned long long, the literal is treated as a call of the form
       //   operator "" X (n ULL)
       CookedTy = Context.UnsignedLongLongTy;
     }
 
     DeclarationName OpName =
       Context.DeclarationNames.getCXXLiteralOperatorName(UDSuffix);
     DeclarationNameInfo OpNameInfo(OpName, UDSuffixLoc);
     OpNameInfo.setCXXLiteralOperatorNameLoc(UDSuffixLoc);
 
     SourceLocation TokLoc = Tok.getLocation();
 
     // Perform literal operator lookup to determine if we're building a raw
     // literal or a cooked one.
     LookupResult R(*this, OpName, UDSuffixLoc, LookupOrdinaryName);
     switch (LookupLiteralOperator(UDLScope, R, CookedTy,
                                   /*AllowRaw*/ true, /*AllowTemplate*/ true,
                                   /*AllowStringTemplate*/ false,
                                   /*DiagnoseMissing*/ !Literal.isImaginary)) {
     case LOLR_ErrorNoDiagnostic:
       // Lookup failure for imaginary constants isn't fatal, there's still the
       // GNU extension producing _Complex types.
       break;
     case LOLR_Error:
       return ExprError();
     case LOLR_Cooked: {
       Expr *Lit;
       if (Literal.isFloatingLiteral()) {
         Lit = BuildFloatingLiteral(*this, Literal, CookedTy, Tok.getLocation());
       } else {
         llvm::APInt ResultVal(Context.getTargetInfo().getLongLongWidth(), 0);
         if (Literal.GetIntegerValue(ResultVal))
           Diag(Tok.getLocation(), diag::err_integer_literal_too_large)
               << /* Unsigned */ 1;
         Lit = IntegerLiteral::Create(Context, ResultVal, CookedTy,
                                      Tok.getLocation());
       }
       return BuildLiteralOperatorCall(R, OpNameInfo, Lit, TokLoc);
     }
 
     case LOLR_Raw: {
       // C++11 [lit.ext]p3, p4: If S contains a raw literal operator, the
       // literal is treated as a call of the form
       //   operator "" X ("n")
       unsigned Length = Literal.getUDSuffixOffset();
       QualType StrTy = Context.getConstantArrayType(
           Context.CharTy.withConst(), llvm::APInt(32, Length + 1),
           ArrayType::Normal, 0);
       Expr *Lit = StringLiteral::Create(
           Context, StringRef(TokSpelling.data(), Length), StringLiteral::Ascii,
           /*Pascal*/false, StrTy, &TokLoc, 1);
       return BuildLiteralOperatorCall(R, OpNameInfo, Lit, TokLoc);
     }
 
     case LOLR_Template: {
       // C++11 [lit.ext]p3, p4: Otherwise (S contains a literal operator
       // template), L is treated as a call fo the form
       //   operator "" X <'c1', 'c2', ... 'ck'>()
       // where n is the source character sequence c1 c2 ... ck.
       TemplateArgumentListInfo ExplicitArgs;
       unsigned CharBits = Context.getIntWidth(Context.CharTy);
       bool CharIsUnsigned = Context.CharTy->isUnsignedIntegerType();
       llvm::APSInt Value(CharBits, CharIsUnsigned);
       for (unsigned I = 0, N = Literal.getUDSuffixOffset(); I != N; ++I) {
         Value = TokSpelling[I];
         TemplateArgument Arg(Context, Value, Context.CharTy);
         TemplateArgumentLocInfo ArgInfo;
         ExplicitArgs.addArgument(TemplateArgumentLoc(Arg, ArgInfo));
       }
       return BuildLiteralOperatorCall(R, OpNameInfo, None, TokLoc,
                                       &ExplicitArgs);
     }
     case LOLR_StringTemplate:
       llvm_unreachable("unexpected literal operator lookup result");
     }
   }
 
   Expr *Res;
 
   if (Literal.isFloatingLiteral()) {
     QualType Ty;
     if (Literal.isHalf){
       if (getOpenCLOptions().isEnabled("cl_khr_fp16"))
         Ty = Context.HalfTy;
       else {
         Diag(Tok.getLocation(), diag::err_half_const_requires_fp16);
         return ExprError();
       }
     } else if (Literal.isFloat)
       Ty = Context.FloatTy;
     else if (Literal.isLong)
       Ty = Context.LongDoubleTy;
     else if (Literal.isFloat16)
       Ty = Context.Float16Ty;
     else if (Literal.isFloat128)
       Ty = Context.Float128Ty;
     else
       Ty = Context.DoubleTy;
 
     Res = BuildFloatingLiteral(*this, Literal, Ty, Tok.getLocation());
 
     if (Ty == Context.DoubleTy) {
       if (getLangOpts().SinglePrecisionConstants) {
         const BuiltinType *BTy = Ty->getAs<BuiltinType>();
         if (BTy->getKind() != BuiltinType::Float) {
           Res = ImpCastExprToType(Res, Context.FloatTy, CK_FloatingCast).get();
         }
       } else if (getLangOpts().OpenCL &&
                  !getOpenCLOptions().isEnabled("cl_khr_fp64")) {
         // Impose single-precision float type when cl_khr_fp64 is not enabled.
         Diag(Tok.getLocation(), diag::warn_double_const_requires_fp64);
         Res = ImpCastExprToType(Res, Context.FloatTy, CK_FloatingCast).get();
       }
     }
   } else if (!Literal.isIntegerLiteral()) {
     return ExprError();
   } else {
     QualType Ty;
 
     // 'long long' is a C99 or C++11 feature.
     if (!getLangOpts().C99 && Literal.isLongLong) {
       if (getLangOpts().CPlusPlus)
         Diag(Tok.getLocation(),
              getLangOpts().CPlusPlus11 ?
              diag::warn_cxx98_compat_longlong : diag::ext_cxx11_longlong);
       else
         Diag(Tok.getLocation(), diag::ext_c99_longlong);
     }
 
     // Get the value in the widest-possible width.
     unsigned MaxWidth = Context.getTargetInfo().getIntMaxTWidth();
     llvm::APInt ResultVal(MaxWidth, 0);
 
     if (Literal.GetIntegerValue(ResultVal)) {
       // If this value didn't fit into uintmax_t, error and force to ull.
       Diag(Tok.getLocation(), diag::err_integer_literal_too_large)
           << /* Unsigned */ 1;
       Ty = Context.UnsignedLongLongTy;
       assert(Context.getTypeSize(Ty) == ResultVal.getBitWidth() &&
              "long long is not intmax_t?");
     } else {
       // If this value fits into a ULL, try to figure out what else it fits into
       // according to the rules of C99 6.4.4.1p5.
 
       // Octal, Hexadecimal, and integers with a U suffix are allowed to
       // be an unsigned int.
       bool AllowUnsigned = Literal.isUnsigned || Literal.getRadix() != 10;
 
       // Check from smallest to largest, picking the smallest type we can.
       unsigned Width = 0;
 
       // Microsoft specific integer suffixes are explicitly sized.
       if (Literal.MicrosoftInteger) {
         if (Literal.MicrosoftInteger == 8 && !Literal.isUnsigned) {
           Width = 8;
           Ty = Context.CharTy;
         } else {
           Width = Literal.MicrosoftInteger;
           Ty = Context.getIntTypeForBitwidth(Width,
                                              /*Signed=*/!Literal.isUnsigned);
         }
       }
 
       if (Ty.isNull() && !Literal.isLong && !Literal.isLongLong) {
         // Are int/unsigned possibilities?
         unsigned IntSize = Context.getTargetInfo().getIntWidth();
 
         // Does it fit in a unsigned int?
         if (ResultVal.isIntN(IntSize)) {
           // Does it fit in a signed int?
           if (!Literal.isUnsigned && ResultVal[IntSize-1] == 0)
             Ty = Context.IntTy;
           else if (AllowUnsigned)
             Ty = Context.UnsignedIntTy;
           Width = IntSize;
         }
       }
 
       // Are long/unsigned long possibilities?
       if (Ty.isNull() && !Literal.isLongLong) {
         unsigned LongSize = Context.getTargetInfo().getLongWidth();
 
         // Does it fit in a unsigned long?
         if (ResultVal.isIntN(LongSize)) {
           // Does it fit in a signed long?
           if (!Literal.isUnsigned && ResultVal[LongSize-1] == 0)
             Ty = Context.LongTy;
           else if (AllowUnsigned)
             Ty = Context.UnsignedLongTy;
           // Check according to the rules of C90 6.1.3.2p5. C++03 [lex.icon]p2
           // is compatible.
           else if (!getLangOpts().C99 && !getLangOpts().CPlusPlus11) {
             const unsigned LongLongSize =
                 Context.getTargetInfo().getLongLongWidth();
             Diag(Tok.getLocation(),
                  getLangOpts().CPlusPlus
                      ? Literal.isLong
                            ? diag::warn_old_implicitly_unsigned_long_cxx
                            : /*C++98 UB*/ diag::
                                  ext_old_implicitly_unsigned_long_cxx
                      : diag::warn_old_implicitly_unsigned_long)
                 << (LongLongSize > LongSize ? /*will have type 'long long'*/ 0
                                             : /*will be ill-formed*/ 1);
             Ty = Context.UnsignedLongTy;
           }
           Width = LongSize;
         }
       }
 
       // Check long long if needed.
       if (Ty.isNull()) {
         unsigned LongLongSize = Context.getTargetInfo().getLongLongWidth();
 
         // Does it fit in a unsigned long long?
         if (ResultVal.isIntN(LongLongSize)) {
           // Does it fit in a signed long long?
           // To be compatible with MSVC, hex integer literals ending with the
           // LL or i64 suffix are always signed in Microsoft mode.
           if (!Literal.isUnsigned && (ResultVal[LongLongSize-1] == 0 ||
               (getLangOpts().MSVCCompat && Literal.isLongLong)))
             Ty = Context.LongLongTy;
           else if (AllowUnsigned)
             Ty = Context.UnsignedLongLongTy;
           Width = LongLongSize;
         }
       }
 
       // If we still couldn't decide a type, we probably have something that
       // does not fit in a signed long long, but has no U suffix.
       if (Ty.isNull()) {
         Diag(Tok.getLocation(), diag::ext_integer_literal_too_large_for_signed);
         Ty = Context.UnsignedLongLongTy;
         Width = Context.getTargetInfo().getLongLongWidth();
       }
 
       if (ResultVal.getBitWidth() != Width)
         ResultVal = ResultVal.trunc(Width);
     }
     Res = IntegerLiteral::Create(Context, ResultVal, Ty, Tok.getLocation());
   }
 
   // If this is an imaginary literal, create the ImaginaryLiteral wrapper.
   if (Literal.isImaginary) {
     Res = new (Context) ImaginaryLiteral(Res,
                                         Context.getComplexType(Res->getType()));
 
     Diag(Tok.getLocation(), diag::ext_imaginary_constant);
   }
   return Res;
 }
 
 ExprResult Sema::ActOnParenExpr(SourceLocation L, SourceLocation R, Expr *E) {
   assert(E && "ActOnParenExpr() missing expr");
   return new (Context) ParenExpr(L, R, E);
 }
 
 static bool CheckVecStepTraitOperandType(Sema &S, QualType T,
                                          SourceLocation Loc,
                                          SourceRange ArgRange) {
   // [OpenCL 1.1 6.11.12] "The vec_step built-in function takes a built-in
   // scalar or vector data type argument..."
   // Every built-in scalar type (OpenCL 1.1 6.1.1) is either an arithmetic
   // type (C99 6.2.5p18) or void.
   if (!(T->isArithmeticType() || T->isVoidType() || T->isVectorType())) {
     S.Diag(Loc, diag::err_vecstep_non_scalar_vector_type)
       << T << ArgRange;
     return true;
   }
 
   assert((T->isVoidType() || !T->isIncompleteType()) &&
          "Scalar types should always be complete");
   return false;
 }
 
 static bool CheckExtensionTraitOperandType(Sema &S, QualType T,
                                            SourceLocation Loc,
                                            SourceRange ArgRange,
                                            UnaryExprOrTypeTrait TraitKind) {
   // Invalid types must be hard errors for SFINAE in C++.
   if (S.LangOpts.CPlusPlus)
     return true;
 
   // C99 6.5.3.4p1:
   if (T->isFunctionType() &&
       (TraitKind == UETT_SizeOf || TraitKind == UETT_AlignOf)) {
     // sizeof(function)/alignof(function) is allowed as an extension.
     S.Diag(Loc, diag::ext_sizeof_alignof_function_type)
       << TraitKind << ArgRange;
     return false;
   }
 
   // Allow sizeof(void)/alignof(void) as an extension, unless in OpenCL where
   // this is an error (OpenCL v1.1 s6.3.k)
   if (T->isVoidType()) {
     unsigned DiagID = S.LangOpts.OpenCL ? diag::err_opencl_sizeof_alignof_type
                                         : diag::ext_sizeof_alignof_void_type;
     S.Diag(Loc, DiagID) << TraitKind << ArgRange;
     return false;
   }
 
   return true;
 }
 
 static bool CheckObjCTraitOperandConstraints(Sema &S, QualType T,
                                              SourceLocation Loc,
                                              SourceRange ArgRange,
                                              UnaryExprOrTypeTrait TraitKind) {
   // Reject sizeof(interface) and sizeof(interface<proto>) if the
   // runtime doesn't allow it.
   if (!S.LangOpts.ObjCRuntime.allowsSizeofAlignof() && T->isObjCObjectType()) {
     S.Diag(Loc, diag::err_sizeof_nonfragile_interface)
       << T << (TraitKind == UETT_SizeOf)
       << ArgRange;
     return true;
   }
 
   return false;
 }
 
 /// \brief Check whether E is a pointer from a decayed array type (the decayed
 /// pointer type is equal to T) and emit a warning if it is.
 static void warnOnSizeofOnArrayDecay(Sema &S, SourceLocation Loc, QualType T,
                                      Expr *E) {
   // Don't warn if the operation changed the type.
   if (T != E->getType())
     return;
 
   // Now look for array decays.
   ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(E);
   if (!ICE || ICE->getCastKind() != CK_ArrayToPointerDecay)
     return;
 
   S.Diag(Loc, diag::warn_sizeof_array_decay) << ICE->getSourceRange()
                                              << ICE->getType()
                                              << ICE->getSubExpr()->getType();
 }
 
 /// \brief Check the constraints on expression operands to unary type expression
 /// and type traits.
 ///
 /// Completes any types necessary and validates the constraints on the operand
 /// expression. The logic mostly mirrors the type-based overload, but may modify
 /// the expression as it completes the type for that expression through template
 /// instantiation, etc.
 bool Sema::CheckUnaryExprOrTypeTraitOperand(Expr *E,
                                             UnaryExprOrTypeTrait ExprKind) {
   QualType ExprTy = E->getType();
   assert(!ExprTy->isReferenceType());
 
   if (ExprKind == UETT_VecStep)
     return CheckVecStepTraitOperandType(*this, ExprTy, E->getExprLoc(),
                                         E->getSourceRange());
 
   // Whitelist some types as extensions
   if (!CheckExtensionTraitOperandType(*this, ExprTy, E->getExprLoc(),
                                       E->getSourceRange(), ExprKind))
     return false;
 
   // 'alignof' applied to an expression only requires the base element type of
   // the expression to be complete. 'sizeof' requires the expression's type to
   // be complete (and will attempt to complete it if it's an array of unknown
   // bound).
   if (ExprKind == UETT_AlignOf) {
     if (RequireCompleteType(E->getExprLoc(),
                             Context.getBaseElementType(E->getType()),
                             diag::err_sizeof_alignof_incomplete_type, ExprKind,
                             E->getSourceRange()))
       return true;
   } else {
     if (RequireCompleteExprType(E, diag::err_sizeof_alignof_incomplete_type,
                                 ExprKind, E->getSourceRange()))
       return true;
   }
 
   // Completing the expression's type may have changed it.
   ExprTy = E->getType();
   assert(!ExprTy->isReferenceType());
 
   if (ExprTy->isFunctionType()) {
     Diag(E->getExprLoc(), diag::err_sizeof_alignof_function_type)
       << ExprKind << E->getSourceRange();
     return true;
   }
 
   // The operand for sizeof and alignof is in an unevaluated expression context,
   // so side effects could result in unintended consequences.
   if ((ExprKind == UETT_SizeOf || ExprKind == UETT_AlignOf) &&
       !inTemplateInstantiation() && E->HasSideEffects(Context, false))
     Diag(E->getExprLoc(), diag::warn_side_effects_unevaluated_context);
 
   if (CheckObjCTraitOperandConstraints(*this, ExprTy, E->getExprLoc(),
                                        E->getSourceRange(), ExprKind))
     return true;
 
   if (ExprKind == UETT_SizeOf) {
     if (DeclRefExpr *DeclRef = dyn_cast<DeclRefExpr>(E->IgnoreParens())) {
       if (ParmVarDecl *PVD = dyn_cast<ParmVarDecl>(DeclRef->getFoundDecl())) {
         QualType OType = PVD->getOriginalType();
         QualType Type = PVD->getType();
         if (Type->isPointerType() && OType->isArrayType()) {
           Diag(E->getExprLoc(), diag::warn_sizeof_array_param)
             << Type << OType;
           Diag(PVD->getLocation(), diag::note_declared_at);
         }
       }
     }
 
     // Warn on "sizeof(array op x)" and "sizeof(x op array)", where the array
     // decays into a pointer and returns an unintended result. This is most
     // likely a typo for "sizeof(array) op x".
     if (BinaryOperator *BO = dyn_cast<BinaryOperator>(E->IgnoreParens())) {
       warnOnSizeofOnArrayDecay(*this, BO->getOperatorLoc(), BO->getType(),
                                BO->getLHS());
       warnOnSizeofOnArrayDecay(*this, BO->getOperatorLoc(), BO->getType(),
                                BO->getRHS());
     }
   }
 
   return false;
 }
 
 /// \brief Check the constraints on operands to unary expression and type
 /// traits.
 ///
 /// This will complete any types necessary, and validate the various constraints
 /// on those operands.
 ///
 /// The UsualUnaryConversions() function is *not* called by this routine.
 /// C99 6.3.2.1p[2-4] all state:
 ///   Except when it is the operand of the sizeof operator ...
 ///
 /// C++ [expr.sizeof]p4
 ///   The lvalue-to-rvalue, array-to-pointer, and function-to-pointer
 ///   standard conversions are not applied to the operand of sizeof.
 ///
 /// This policy is followed for all of the unary trait expressions.
 bool Sema::CheckUnaryExprOrTypeTraitOperand(QualType ExprType,
                                             SourceLocation OpLoc,
                                             SourceRange ExprRange,
                                             UnaryExprOrTypeTrait ExprKind) {
   if (ExprType->isDependentType())
     return false;
 
   // C++ [expr.sizeof]p2:
   //     When applied to a reference or a reference type, the result
   //     is the size of the referenced type.
   // C++11 [expr.alignof]p3:
   //     When alignof is applied to a reference type, the result
   //     shall be the alignment of the referenced type.
   if (const ReferenceType *Ref = ExprType->getAs<ReferenceType>())
     ExprType = Ref->getPointeeType();
 
   // C11 6.5.3.4/3, C++11 [expr.alignof]p3:
   //   When alignof or _Alignof is applied to an array type, the result
   //   is the alignment of the element type.
   if (ExprKind == UETT_AlignOf || ExprKind == UETT_OpenMPRequiredSimdAlign)
     ExprType = Context.getBaseElementType(ExprType);
 
   if (ExprKind == UETT_VecStep)
     return CheckVecStepTraitOperandType(*this, ExprType, OpLoc, ExprRange);
 
   // Whitelist some types as extensions
   if (!CheckExtensionTraitOperandType(*this, ExprType, OpLoc, ExprRange,
                                       ExprKind))
     return false;
 
   if (RequireCompleteType(OpLoc, ExprType,
                           diag::err_sizeof_alignof_incomplete_type,
                           ExprKind, ExprRange))
     return true;
 
   if (ExprType->isFunctionType()) {
     Diag(OpLoc, diag::err_sizeof_alignof_function_type)
       << ExprKind << ExprRange;
     return true;
   }
 
   if (CheckObjCTraitOperandConstraints(*this, ExprType, OpLoc, ExprRange,
                                        ExprKind))
     return true;
 
   return false;
 }
 
 static bool CheckAlignOfExpr(Sema &S, Expr *E) {
   E = E->IgnoreParens();
 
   // Cannot know anything else if the expression is dependent.
   if (E->isTypeDependent())
     return false;
 
   if (E->getObjectKind() == OK_BitField) {
     S.Diag(E->getExprLoc(), diag::err_sizeof_alignof_typeof_bitfield)
        << 1 << E->getSourceRange();
     return true;
   }
 
   ValueDecl *D = nullptr;
   if (DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E)) {
     D = DRE->getDecl();
   } else if (MemberExpr *ME = dyn_cast<MemberExpr>(E)) {
     D = ME->getMemberDecl();
   }
 
   // If it's a field, require the containing struct to have a
   // complete definition so that we can compute the layout.
   //
   // This can happen in C++11 onwards, either by naming the member
   // in a way that is not transformed into a member access expression
   // (in an unevaluated operand, for instance), or by naming the member
   // in a trailing-return-type.
   //
   // For the record, since __alignof__ on expressions is a GCC
   // extension, GCC seems to permit this but always gives the
   // nonsensical answer 0.
   //
   // We don't really need the layout here --- we could instead just
   // directly check for all the appropriate alignment-lowing
   // attributes --- but that would require duplicating a lot of
   // logic that just isn't worth duplicating for such a marginal
   // use-case.
   if (FieldDecl *FD = dyn_cast_or_null<FieldDecl>(D)) {
     // Fast path this check, since we at least know the record has a
     // definition if we can find a member of it.
     if (!FD->getParent()->isCompleteDefinition()) {
       S.Diag(E->getExprLoc(), diag::err_alignof_member_of_incomplete_type)
         << E->getSourceRange();
       return true;
     }
 
     // Otherwise, if it's a field, and the field doesn't have
     // reference type, then it must have a complete type (or be a
     // flexible array member, which we explicitly want to
     // white-list anyway), which makes the following checks trivial.
     if (!FD->getType()->isReferenceType())
       return false;
   }
 
   return S.CheckUnaryExprOrTypeTraitOperand(E, UETT_AlignOf);
 }
 
 bool Sema::CheckVecStepExpr(Expr *E) {
   E = E->IgnoreParens();
 
   // Cannot know anything else if the expression is dependent.
   if (E->isTypeDependent())
     return false;
 
   return CheckUnaryExprOrTypeTraitOperand(E, UETT_VecStep);
 }
 
 static void captureVariablyModifiedType(ASTContext &Context, QualType T,
                                         CapturingScopeInfo *CSI) {
   assert(T->isVariablyModifiedType());
   assert(CSI != nullptr);
 
   // We're going to walk down into the type and look for VLA expressions.
   do {
     const Type *Ty = T.getTypePtr();
     switch (Ty->getTypeClass()) {
 #define TYPE(Class, Base)
 #define ABSTRACT_TYPE(Class, Base)
 #define NON_CANONICAL_TYPE(Class, Base)
 #define DEPENDENT_TYPE(Class, Base) case Type::Class:
 #define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base)
 #include "clang/AST/TypeNodes.def"
       T = QualType();
       break;
     // These types are never variably-modified.
     case Type::Builtin:
     case Type::Complex:
     case Type::Vector:
     case Type::ExtVector:
     case Type::Record:
     case Type::Enum:
     case Type::Elaborated:
     case Type::TemplateSpecialization:
     case Type::ObjCObject:
     case Type::ObjCInterface:
     case Type::ObjCObjectPointer:
     case Type::ObjCTypeParam:
     case Type::Pipe:
       llvm_unreachable("type class is never variably-modified!");
     case Type::Adjusted:
       T = cast<AdjustedType>(Ty)->getOriginalType();
       break;
     case Type::Decayed:
       T = cast<DecayedType>(Ty)->getPointeeType();
       break;
     case Type::Pointer:
       T = cast<PointerType>(Ty)->getPointeeType();
       break;
     case Type::BlockPointer:
       T = cast<BlockPointerType>(Ty)->getPointeeType();
       break;
     case Type::LValueReference:
     case Type::RValueReference:
       T = cast<ReferenceType>(Ty)->getPointeeType();
       break;
     case Type::MemberPointer:
       T = cast<MemberPointerType>(Ty)->getPointeeType();
       break;
     case Type::ConstantArray:
     case Type::IncompleteArray:
       // Losing element qualification here is fine.
       T = cast<ArrayType>(Ty)->getElementType();
       break;
     case Type::VariableArray: {
       // Losing element qualification here is fine.
       const VariableArrayType *VAT = cast<VariableArrayType>(Ty);
 
       // Unknown size indication requires no size computation.
       // Otherwise, evaluate and record it.
       if (auto Size = VAT->getSizeExpr()) {
         if (!CSI->isVLATypeCaptured(VAT)) {
           RecordDecl *CapRecord = nullptr;
           if (auto LSI = dyn_cast<LambdaScopeInfo>(CSI)) {
             CapRecord = LSI->Lambda;
           } else if (auto CRSI = dyn_cast<CapturedRegionScopeInfo>(CSI)) {
             CapRecord = CRSI->TheRecordDecl;
           }
           if (CapRecord) {
             auto ExprLoc = Size->getExprLoc();
             auto SizeType = Context.getSizeType();
             // Build the non-static data member.
             auto Field =
                 FieldDecl::Create(Context, CapRecord, ExprLoc, ExprLoc,
                                   /*Id*/ nullptr, SizeType, /*TInfo*/ nullptr,
                                   /*BW*/ nullptr, /*Mutable*/ false,
                                   /*InitStyle*/ ICIS_NoInit);
             Field->setImplicit(true);
             Field->setAccess(AS_private);
             Field->setCapturedVLAType(VAT);
             CapRecord->addDecl(Field);
 
             CSI->addVLATypeCapture(ExprLoc, SizeType);
           }
         }
       }
       T = VAT->getElementType();
       break;
     }
     case Type::FunctionProto:
     case Type::FunctionNoProto:
       T = cast<FunctionType>(Ty)->getReturnType();
       break;
     case Type::Paren:
     case Type::TypeOf:
     case Type::UnaryTransform:
     case Type::Attributed:
     case Type::SubstTemplateTypeParm:
     case Type::PackExpansion:
       // Keep walking after single level desugaring.
       T = T.getSingleStepDesugaredType(Context);
       break;
     case Type::Typedef:
       T = cast<TypedefType>(Ty)->desugar();
       break;
     case Type::Decltype:
       T = cast<DecltypeType>(Ty)->desugar();
       break;
     case Type::Auto:
     case Type::DeducedTemplateSpecialization:
       T = cast<DeducedType>(Ty)->getDeducedType();
       break;
     case Type::TypeOfExpr:
       T = cast<TypeOfExprType>(Ty)->getUnderlyingExpr()->getType();
       break;
     case Type::Atomic:
       T = cast<AtomicType>(Ty)->getValueType();
       break;
     }
   } while (!T.isNull() && T->isVariablyModifiedType());
 }
 
 /// \brief Build a sizeof or alignof expression given a type operand.
 ExprResult
 Sema::CreateUnaryExprOrTypeTraitExpr(TypeSourceInfo *TInfo,
                                      SourceLocation OpLoc,
                                      UnaryExprOrTypeTrait ExprKind,
                                      SourceRange R) {
   if (!TInfo)
     return ExprError();
 
   QualType T = TInfo->getType();
 
   if (!T->isDependentType() &&
       CheckUnaryExprOrTypeTraitOperand(T, OpLoc, R, ExprKind))
     return ExprError();
 
   if (T->isVariablyModifiedType() && FunctionScopes.size() > 1) {
     if (auto *TT = T->getAs<TypedefType>()) {
       for (auto I = FunctionScopes.rbegin(),
                 E = std::prev(FunctionScopes.rend());
            I != E; ++I) {
         auto *CSI = dyn_cast<CapturingScopeInfo>(*I);
         if (CSI == nullptr)
           break;
         DeclContext *DC = nullptr;
         if (auto *LSI = dyn_cast<LambdaScopeInfo>(CSI))
           DC = LSI->CallOperator;
         else if (auto *CRSI = dyn_cast<CapturedRegionScopeInfo>(CSI))
           DC = CRSI->TheCapturedDecl;
         else if (auto *BSI = dyn_cast<BlockScopeInfo>(CSI))
           DC = BSI->TheDecl;
         if (DC) {
           if (DC->containsDecl(TT->getDecl()))
             break;
           captureVariablyModifiedType(Context, T, CSI);
         }
       }
     }
   }
 
   // C99 6.5.3.4p4: the type (an unsigned integer type) is size_t.
   return new (Context) UnaryExprOrTypeTraitExpr(
       ExprKind, TInfo, Context.getSizeType(), OpLoc, R.getEnd());
 }
 
 /// \brief Build a sizeof or alignof expression given an expression
 /// operand.
 ExprResult
 Sema::CreateUnaryExprOrTypeTraitExpr(Expr *E, SourceLocation OpLoc,
                                      UnaryExprOrTypeTrait ExprKind) {
   ExprResult PE = CheckPlaceholderExpr(E);
   if (PE.isInvalid()) 
     return ExprError();
 
   E = PE.get();
   
   // Verify that the operand is valid.
   bool isInvalid = false;
   if (E->isTypeDependent()) {
     // Delay type-checking for type-dependent expressions.
   } else if (ExprKind == UETT_AlignOf) {
     isInvalid = CheckAlignOfExpr(*this, E);
   } else if (ExprKind == UETT_VecStep) {
     isInvalid = CheckVecStepExpr(E);
   } else if (ExprKind == UETT_OpenMPRequiredSimdAlign) {
       Diag(E->getExprLoc(), diag::err_openmp_default_simd_align_expr);
       isInvalid = true;
   } else if (E->refersToBitField()) {  // C99 6.5.3.4p1.
     Diag(E->getExprLoc(), diag::err_sizeof_alignof_typeof_bitfield) << 0;
     isInvalid = true;
   } else {
     isInvalid = CheckUnaryExprOrTypeTraitOperand(E, UETT_SizeOf);
   }
 
   if (isInvalid)
     return ExprError();
 
   if (ExprKind == UETT_SizeOf && E->getType()->isVariableArrayType()) {
     PE = TransformToPotentiallyEvaluated(E);
     if (PE.isInvalid()) return ExprError();
     E = PE.get();
   }
 
   // C99 6.5.3.4p4: the type (an unsigned integer type) is size_t.
   return new (Context) UnaryExprOrTypeTraitExpr(
       ExprKind, E, Context.getSizeType(), OpLoc, E->getSourceRange().getEnd());
 }
 
 /// ActOnUnaryExprOrTypeTraitExpr - Handle @c sizeof(type) and @c sizeof @c
 /// expr and the same for @c alignof and @c __alignof
 /// Note that the ArgRange is invalid if isType is false.
 ExprResult
 Sema::ActOnUnaryExprOrTypeTraitExpr(SourceLocation OpLoc,
                                     UnaryExprOrTypeTrait ExprKind, bool IsType,
                                     void *TyOrEx, SourceRange ArgRange) {
   // If error parsing type, ignore.
   if (!TyOrEx) return ExprError();
 
   if (IsType) {
     TypeSourceInfo *TInfo;
     (void) GetTypeFromParser(ParsedType::getFromOpaquePtr(TyOrEx), &TInfo);
     return CreateUnaryExprOrTypeTraitExpr(TInfo, OpLoc, ExprKind, ArgRange);
   }
 
   Expr *ArgEx = (Expr *)TyOrEx;
   ExprResult Result = CreateUnaryExprOrTypeTraitExpr(ArgEx, OpLoc, ExprKind);
   return Result;
 }
 
 static QualType CheckRealImagOperand(Sema &S, ExprResult &V, SourceLocation Loc,
                                      bool IsReal) {
   if (V.get()->isTypeDependent())
     return S.Context.DependentTy;
 
   // _Real and _Imag are only l-values for normal l-values.
   if (V.get()->getObjectKind() != OK_Ordinary) {
     V = S.DefaultLvalueConversion(V.get());
     if (V.isInvalid())
       return QualType();
   }
 
   // These operators return the element type of a complex type.
   if (const ComplexType *CT = V.get()->getType()->getAs<ComplexType>())
     return CT->getElementType();
 
   // Otherwise they pass through real integer and floating point types here.
   if (V.get()->getType()->isArithmeticType())
     return V.get()->getType();
 
   // Test for placeholders.
   ExprResult PR = S.CheckPlaceholderExpr(V.get());
   if (PR.isInvalid()) return QualType();
   if (PR.get() != V.get()) {
     V = PR;
     return CheckRealImagOperand(S, V, Loc, IsReal);
   }
 
   // Reject anything else.
   S.Diag(Loc, diag::err_realimag_invalid_type) << V.get()->getType()
     << (IsReal ? "__real" : "__imag");
   return QualType();
 }
 
 
 
 ExprResult
 Sema::ActOnPostfixUnaryOp(Scope *S, SourceLocation OpLoc,
                           tok::TokenKind Kind, Expr *Input) {
   UnaryOperatorKind Opc;
   switch (Kind) {
   default: llvm_unreachable("Unknown unary op!");
   case tok::plusplus:   Opc = UO_PostInc; break;
   case tok::minusminus: Opc = UO_PostDec; break;
   }
 
   // Since this might is a postfix expression, get rid of ParenListExprs.
   ExprResult Result = MaybeConvertParenListExprToParenExpr(S, Input);
   if (Result.isInvalid()) return ExprError();
   Input = Result.get();
 
   return BuildUnaryOp(S, OpLoc, Opc, Input);
 }
 
 /// \brief Diagnose if arithmetic on the given ObjC pointer is illegal.
 ///
 /// \return true on error
 static bool checkArithmeticOnObjCPointer(Sema &S,
                                          SourceLocation opLoc,
                                          Expr *op) {
   assert(op->getType()->isObjCObjectPointerType());
   if (S.LangOpts.ObjCRuntime.allowsPointerArithmetic() &&
       !S.LangOpts.ObjCSubscriptingLegacyRuntime)
     return false;
 
   S.Diag(opLoc, diag::err_arithmetic_nonfragile_interface)
     << op->getType()->castAs<ObjCObjectPointerType>()->getPointeeType()
     << op->getSourceRange();
   return true;
 }
 
 static bool isMSPropertySubscriptExpr(Sema &S, Expr *Base) {
   auto *BaseNoParens = Base->IgnoreParens();
   if (auto *MSProp = dyn_cast<MSPropertyRefExpr>(BaseNoParens))
     return MSProp->getPropertyDecl()->getType()->isArrayType();
   return isa<MSPropertySubscriptExpr>(BaseNoParens);
 }
 
 ExprResult
 Sema::ActOnArraySubscriptExpr(Scope *S, Expr *base, SourceLocation lbLoc,
                               Expr *idx, SourceLocation rbLoc) {
   if (base && !base->getType().isNull() &&
       base->getType()->isSpecificPlaceholderType(BuiltinType::OMPArraySection))
     return ActOnOMPArraySectionExpr(base, lbLoc, idx, SourceLocation(),
                                     /*Length=*/nullptr, rbLoc);
 
   // Since this might be a postfix expression, get rid of ParenListExprs.
   if (isa<ParenListExpr>(base)) {
     ExprResult result = MaybeConvertParenListExprToParenExpr(S, base);
     if (result.isInvalid()) return ExprError();
     base = result.get();
   }
 
   // Handle any non-overload placeholder types in the base and index
   // expressions.  We can't handle overloads here because the other
   // operand might be an overloadable type, in which case the overload
   // resolution for the operator overload should get the first crack
   // at the overload.
   bool IsMSPropertySubscript = false;
   if (base->getType()->isNonOverloadPlaceholderType()) {
     IsMSPropertySubscript = isMSPropertySubscriptExpr(*this, base);
     if (!IsMSPropertySubscript) {
       ExprResult result = CheckPlaceholderExpr(base);
       if (result.isInvalid())
         return ExprError();
       base = result.get();
     }
   }
   if (idx->getType()->isNonOverloadPlaceholderType()) {
     ExprResult result = CheckPlaceholderExpr(idx);
     if (result.isInvalid()) return ExprError();
     idx = result.get();
   }
 
   // Build an unanalyzed expression if either operand is type-dependent.
   if (getLangOpts().CPlusPlus &&
       (base->isTypeDependent() || idx->isTypeDependent())) {
     return new (Context) ArraySubscriptExpr(base, idx, Context.DependentTy,
                                             VK_LValue, OK_Ordinary, rbLoc);
   }
 
   // MSDN, property (C++)
   // https://msdn.microsoft.com/en-us/library/yhfk0thd(v=vs.120).aspx
   // This attribute can also be used in the declaration of an empty array in a
   // class or structure definition. For example:
   // __declspec(property(get=GetX, put=PutX)) int x[];
   // The above statement indicates that x[] can be used with one or more array
   // indices. In this case, i=p->x[a][b] will be turned into i=p->GetX(a, b),
   // and p->x[a][b] = i will be turned into p->PutX(a, b, i);
   if (IsMSPropertySubscript) {
     // Build MS property subscript expression if base is MS property reference
     // or MS property subscript.
     return new (Context) MSPropertySubscriptExpr(
         base, idx, Context.PseudoObjectTy, VK_LValue, OK_Ordinary, rbLoc);
   }
 
   // Use C++ overloaded-operator rules if either operand has record
   // type.  The spec says to do this if either type is *overloadable*,
   // but enum types can't declare subscript operators or conversion
   // operators, so there's nothing interesting for overload resolution
   // to do if there aren't any record types involved.
   //
   // ObjC pointers have their own subscripting logic that is not tied
   // to overload resolution and so should not take this path.
   if (getLangOpts().CPlusPlus &&
       (base->getType()->isRecordType() ||
        (!base->getType()->isObjCObjectPointerType() &&
         idx->getType()->isRecordType()))) {
     return CreateOverloadedArraySubscriptExpr(lbLoc, rbLoc, base, idx);
   }
 
   return CreateBuiltinArraySubscriptExpr(base, lbLoc, idx, rbLoc);
 }
 
 ExprResult Sema::ActOnOMPArraySectionExpr(Expr *Base, SourceLocation LBLoc,
                                           Expr *LowerBound,
                                           SourceLocation ColonLoc, Expr *Length,
                                           SourceLocation RBLoc) {
   if (Base->getType()->isPlaceholderType() &&
       !Base->getType()->isSpecificPlaceholderType(
           BuiltinType::OMPArraySection)) {
     ExprResult Result = CheckPlaceholderExpr(Base);
     if (Result.isInvalid())
       return ExprError();
     Base = Result.get();
   }
   if (LowerBound && LowerBound->getType()->isNonOverloadPlaceholderType()) {
     ExprResult Result = CheckPlaceholderExpr(LowerBound);
     if (Result.isInvalid())
       return ExprError();
     Result = DefaultLvalueConversion(Result.get());
     if (Result.isInvalid())
       return ExprError();
     LowerBound = Result.get();
   }
   if (Length && Length->getType()->isNonOverloadPlaceholderType()) {
     ExprResult Result = CheckPlaceholderExpr(Length);
     if (Result.isInvalid())
       return ExprError();
     Result = DefaultLvalueConversion(Result.get());
     if (Result.isInvalid())
       return ExprError();
     Length = Result.get();
   }
 
   // Build an unanalyzed expression if either operand is type-dependent.
   if (Base->isTypeDependent() ||
       (LowerBound &&
        (LowerBound->isTypeDependent() || LowerBound->isValueDependent())) ||
       (Length && (Length->isTypeDependent() || Length->isValueDependent()))) {
     return new (Context)
         OMPArraySectionExpr(Base, LowerBound, Length, Context.DependentTy,
                             VK_LValue, OK_Ordinary, ColonLoc, RBLoc);
   }
 
   // Perform default conversions.
   QualType OriginalTy = OMPArraySectionExpr::getBaseOriginalType(Base);
   QualType ResultTy;
   if (OriginalTy->isAnyPointerType()) {
     ResultTy = OriginalTy->getPointeeType();
   } else if (OriginalTy->isArrayType()) {
     ResultTy = OriginalTy->getAsArrayTypeUnsafe()->getElementType();
   } else {
     return ExprError(
         Diag(Base->getExprLoc(), diag::err_omp_typecheck_section_value)
         << Base->getSourceRange());
   }
   // C99 6.5.2.1p1
   if (LowerBound) {
     auto Res = PerformOpenMPImplicitIntegerConversion(LowerBound->getExprLoc(),
                                                       LowerBound);
     if (Res.isInvalid())
       return ExprError(Diag(LowerBound->getExprLoc(),
                             diag::err_omp_typecheck_section_not_integer)
                        << 0 << LowerBound->getSourceRange());
     LowerBound = Res.get();
 
     if (LowerBound->getType()->isSpecificBuiltinType(BuiltinType::Char_S) ||
         LowerBound->getType()->isSpecificBuiltinType(BuiltinType::Char_U))
       Diag(LowerBound->getExprLoc(), diag::warn_omp_section_is_char)
           << 0 << LowerBound->getSourceRange();
   }
   if (Length) {
     auto Res =
         PerformOpenMPImplicitIntegerConversion(Length->getExprLoc(), Length);
     if (Res.isInvalid())
       return ExprError(Diag(Length->getExprLoc(),
                             diag::err_omp_typecheck_section_not_integer)
                        << 1 << Length->getSourceRange());
     Length = Res.get();
 
     if (Length->getType()->isSpecificBuiltinType(BuiltinType::Char_S) ||
         Length->getType()->isSpecificBuiltinType(BuiltinType::Char_U))
       Diag(Length->getExprLoc(), diag::warn_omp_section_is_char)
           << 1 << Length->getSourceRange();
   }
 
   // C99 6.5.2.1p1: "shall have type "pointer to *object* type". Similarly,
   // C++ [expr.sub]p1: The type "T" shall be a completely-defined object
   // type. Note that functions are not objects, and that (in C99 parlance)
   // incomplete types are not object types.
   if (ResultTy->isFunctionType()) {
     Diag(Base->getExprLoc(), diag::err_omp_section_function_type)
         << ResultTy << Base->getSourceRange();
     return ExprError();
   }
 
   if (RequireCompleteType(Base->getExprLoc(), ResultTy,
                           diag::err_omp_section_incomplete_type, Base))
     return ExprError();
 
   if (LowerBound && !OriginalTy->isAnyPointerType()) {
     llvm::APSInt LowerBoundValue;
     if (LowerBound->EvaluateAsInt(LowerBoundValue, Context)) {
       // OpenMP 4.5, [2.4 Array Sections]
       // The array section must be a subset of the original array.
       if (LowerBoundValue.isNegative()) {
         Diag(LowerBound->getExprLoc(), diag::err_omp_section_not_subset_of_array)
             << LowerBound->getSourceRange();
         return ExprError();
       }
     }
   }
 
   if (Length) {
     llvm::APSInt LengthValue;
     if (Length->EvaluateAsInt(LengthValue, Context)) {
       // OpenMP 4.5, [2.4 Array Sections]
       // The length must evaluate to non-negative integers.
       if (LengthValue.isNegative()) {
         Diag(Length->getExprLoc(), diag::err_omp_section_length_negative)
             << LengthValue.toString(/*Radix=*/10, /*Signed=*/true)
             << Length->getSourceRange();
         return ExprError();
       }
     }
   } else if (ColonLoc.isValid() &&
              (OriginalTy.isNull() || (!OriginalTy->isConstantArrayType() &&
                                       !OriginalTy->isVariableArrayType()))) {
     // OpenMP 4.5, [2.4 Array Sections]
     // When the size of the array dimension is not known, the length must be
     // specified explicitly.
     Diag(ColonLoc, diag::err_omp_section_length_undefined)
         << (!OriginalTy.isNull() && OriginalTy->isArrayType());
     return ExprError();
   }
 
   if (!Base->getType()->isSpecificPlaceholderType(
           BuiltinType::OMPArraySection)) {
     ExprResult Result = DefaultFunctionArrayLvalueConversion(Base);
     if (Result.isInvalid())
       return ExprError();
     Base = Result.get();
   }
   return new (Context)
       OMPArraySectionExpr(Base, LowerBound, Length, Context.OMPArraySectionTy,
                           VK_LValue, OK_Ordinary, ColonLoc, RBLoc);
 }
 
 ExprResult
 Sema::CreateBuiltinArraySubscriptExpr(Expr *Base, SourceLocation LLoc,
                                       Expr *Idx, SourceLocation RLoc) {
   Expr *LHSExp = Base;
   Expr *RHSExp = Idx;
 
   ExprValueKind VK = VK_LValue;
   ExprObjectKind OK = OK_Ordinary;
 
   // Per C++ core issue 1213, the result is an xvalue if either operand is
   // a non-lvalue array, and an lvalue otherwise.
   if (getLangOpts().CPlusPlus11 &&
       ((LHSExp->getType()->isArrayType() && !LHSExp->isLValue()) ||
        (RHSExp->getType()->isArrayType() && !RHSExp->isLValue())))
     VK = VK_XValue;
 
   // Perform default conversions.
   if (!LHSExp->getType()->getAs<VectorType>()) {
     ExprResult Result = DefaultFunctionArrayLvalueConversion(LHSExp);
     if (Result.isInvalid())
       return ExprError();
     LHSExp = Result.get();
   }
   ExprResult Result = DefaultFunctionArrayLvalueConversion(RHSExp);
   if (Result.isInvalid())
     return ExprError();
   RHSExp = Result.get();
 
   QualType LHSTy = LHSExp->getType(), RHSTy = RHSExp->getType();
 
   // C99 6.5.2.1p2: the expression e1[e2] is by definition precisely equivalent
   // to the expression *((e1)+(e2)). This means the array "Base" may actually be
   // in the subscript position. As a result, we need to derive the array base
   // and index from the expression types.
   Expr *BaseExpr, *IndexExpr;
   QualType ResultType;
   if (LHSTy->isDependentType() || RHSTy->isDependentType()) {
     BaseExpr = LHSExp;
     IndexExpr = RHSExp;
     ResultType = Context.DependentTy;
   } else if (const PointerType *PTy = LHSTy->getAs<PointerType>()) {
     BaseExpr = LHSExp;
     IndexExpr = RHSExp;
     ResultType = PTy->getPointeeType();
   } else if (const ObjCObjectPointerType *PTy =
                LHSTy->getAs<ObjCObjectPointerType>()) {
     BaseExpr = LHSExp;
     IndexExpr = RHSExp;
 
     // Use custom logic if this should be the pseudo-object subscript
     // expression.
     if (!LangOpts.isSubscriptPointerArithmetic())
       return BuildObjCSubscriptExpression(RLoc, BaseExpr, IndexExpr, nullptr,
                                           nullptr);
 
     ResultType = PTy->getPointeeType();
   } else if (const PointerType *PTy = RHSTy->getAs<PointerType>()) {
      // Handle the uncommon case of "123[Ptr]".
     BaseExpr = RHSExp;
     IndexExpr = LHSExp;
     ResultType = PTy->getPointeeType();
   } else if (const ObjCObjectPointerType *PTy =
                RHSTy->getAs<ObjCObjectPointerType>()) {
      // Handle the uncommon case of "123[Ptr]".
     BaseExpr = RHSExp;
     IndexExpr = LHSExp;
     ResultType = PTy->getPointeeType();
     if (!LangOpts.isSubscriptPointerArithmetic()) {
       Diag(LLoc, diag::err_subscript_nonfragile_interface)
         << ResultType << BaseExpr->getSourceRange();
       return ExprError();
     }
   } else if (const VectorType *VTy = LHSTy->getAs<VectorType>()) {
     BaseExpr = LHSExp;    // vectors: V[123]
     IndexExpr = RHSExp;
     VK = LHSExp->getValueKind();
     if (VK != VK_RValue)
       OK = OK_VectorComponent;
 
     // FIXME: need to deal with const...
     ResultType = VTy->getElementType();
   } else if (LHSTy->isArrayType()) {
     // If we see an array that wasn't promoted by
     // DefaultFunctionArrayLvalueConversion, it must be an array that
     // wasn't promoted because of the C90 rule that doesn't
     // allow promoting non-lvalue arrays.  Warn, then
     // force the promotion here.
     Diag(LHSExp->getLocStart(), diag::ext_subscript_non_lvalue) <<
         LHSExp->getSourceRange();
     LHSExp = ImpCastExprToType(LHSExp, Context.getArrayDecayedType(LHSTy),
                                CK_ArrayToPointerDecay).get();
     LHSTy = LHSExp->getType();
 
     BaseExpr = LHSExp;
     IndexExpr = RHSExp;
     ResultType = LHSTy->getAs<PointerType>()->getPointeeType();
   } else if (RHSTy->isArrayType()) {
     // Same as previous, except for 123[f().a] case
     Diag(RHSExp->getLocStart(), diag::ext_subscript_non_lvalue) <<
         RHSExp->getSourceRange();
     RHSExp = ImpCastExprToType(RHSExp, Context.getArrayDecayedType(RHSTy),
                                CK_ArrayToPointerDecay).get();
     RHSTy = RHSExp->getType();
 
     BaseExpr = RHSExp;
     IndexExpr = LHSExp;
     ResultType = RHSTy->getAs<PointerType>()->getPointeeType();
   } else {
     return ExprError(Diag(LLoc, diag::err_typecheck_subscript_value)
        << LHSExp->getSourceRange() << RHSExp->getSourceRange());
   }
   // C99 6.5.2.1p1
   if (!IndexExpr->getType()->isIntegerType() && !IndexExpr->isTypeDependent())
     return ExprError(Diag(LLoc, diag::err_typecheck_subscript_not_integer)
                      << IndexExpr->getSourceRange());
 
   if ((IndexExpr->getType()->isSpecificBuiltinType(BuiltinType::Char_S) ||
        IndexExpr->getType()->isSpecificBuiltinType(BuiltinType::Char_U))
          && !IndexExpr->isTypeDependent())
     Diag(LLoc, diag::warn_subscript_is_char) << IndexExpr->getSourceRange();
 
   // C99 6.5.2.1p1: "shall have type "pointer to *object* type". Similarly,
   // C++ [expr.sub]p1: The type "T" shall be a completely-defined object
   // type. Note that Functions are not objects, and that (in C99 parlance)
   // incomplete types are not object types.
   if (ResultType->isFunctionType()) {
     Diag(BaseExpr->getLocStart(), diag::err_subscript_function_type)
       << ResultType << BaseExpr->getSourceRange();
     return ExprError();
   }
 
   if (ResultType->isVoidType() && !getLangOpts().CPlusPlus) {
     // GNU extension: subscripting on pointer to void
     Diag(LLoc, diag::ext_gnu_subscript_void_type)
       << BaseExpr->getSourceRange();
 
     // C forbids expressions of unqualified void type from being l-values.
     // See IsCForbiddenLValueType.
     if (!ResultType.hasQualifiers()) VK = VK_RValue;
   } else if (!ResultType->isDependentType() &&
       RequireCompleteType(LLoc, ResultType,
                           diag::err_subscript_incomplete_type, BaseExpr))
     return ExprError();
 
   assert(VK == VK_RValue || LangOpts.CPlusPlus ||
          !ResultType.isCForbiddenLValueType());
 
   return new (Context)
       ArraySubscriptExpr(LHSExp, RHSExp, ResultType, VK, OK, RLoc);
 }
 
 bool Sema::CheckCXXDefaultArgExpr(SourceLocation CallLoc, FunctionDecl *FD,
                                   ParmVarDecl *Param) {
   if (Param->hasUnparsedDefaultArg()) {
     Diag(CallLoc,
          diag::err_use_of_default_argument_to_function_declared_later) <<
       FD << cast<CXXRecordDecl>(FD->getDeclContext())->getDeclName();
     Diag(UnparsedDefaultArgLocs[Param],
          diag::note_default_argument_declared_here);
     return true;
   }
   
   if (Param->hasUninstantiatedDefaultArg()) {
     Expr *UninstExpr = Param->getUninstantiatedDefaultArg();
 
     EnterExpressionEvaluationContext EvalContext(
         *this, ExpressionEvaluationContext::PotentiallyEvaluated, Param);
 
     // Instantiate the expression.
     //
     // FIXME: Pass in a correct Pattern argument, otherwise
     // getTemplateInstantiationArgs uses the lexical context of FD, e.g.
     //
     // template<typename T>
     // struct A {
     //   static int FooImpl();
     //
     //   template<typename Tp>
     //   // bug: default argument A<T>::FooImpl() is evaluated with 2-level
     //   // template argument list [[T], [Tp]], should be [[Tp]].
     //   friend A<Tp> Foo(int a);
     // };
     //
     // template<typename T>
     // A<T> Foo(int a = A<T>::FooImpl());
     MultiLevelTemplateArgumentList MutiLevelArgList
       = getTemplateInstantiationArgs(FD, nullptr, /*RelativeToPrimary=*/true);
 
     InstantiatingTemplate Inst(*this, CallLoc, Param,
                                MutiLevelArgList.getInnermost());
     if (Inst.isInvalid())
       return true;
     if (Inst.isAlreadyInstantiating()) {
       Diag(Param->getLocStart(), diag::err_recursive_default_argument) << FD;
       Param->setInvalidDecl();
       return true;
     }
 
     ExprResult Result;
     {
       // C++ [dcl.fct.default]p5:
       //   The names in the [default argument] expression are bound, and
       //   the semantic constraints are checked, at the point where the
       //   default argument expression appears.
       ContextRAII SavedContext(*this, FD);
       LocalInstantiationScope Local(*this);
       Result = SubstInitializer(UninstExpr, MutiLevelArgList,
                                 /*DirectInit*/false);
     }
     if (Result.isInvalid())
       return true;
 
     // Check the expression as an initializer for the parameter.
     InitializedEntity Entity
       = InitializedEntity::InitializeParameter(Context, Param);
     InitializationKind Kind
       = InitializationKind::CreateCopy(Param->getLocation(),
              /*FIXME:EqualLoc*/UninstExpr->getLocStart());
     Expr *ResultE = Result.getAs<Expr>();
 
     InitializationSequence InitSeq(*this, Entity, Kind, ResultE);
     Result = InitSeq.Perform(*this, Entity, Kind, ResultE);
     if (Result.isInvalid())
       return true;
 
     Result = ActOnFinishFullExpr(Result.getAs<Expr>(),
                                  Param->getOuterLocStart());
     if (Result.isInvalid())
       return true;
 
     // Remember the instantiated default argument.
     Param->setDefaultArg(Result.getAs<Expr>());
     if (ASTMutationListener *L = getASTMutationListener()) {
       L->DefaultArgumentInstantiated(Param);
     }
   }
 
   // If the default argument expression is not set yet, we are building it now.
   if (!Param->hasInit()) {
     Diag(Param->getLocStart(), diag::err_recursive_default_argument) << FD;
     Param->setInvalidDecl();
     return true;
   }
 
   // If the default expression creates temporaries, we need to
   // push them to the current stack of expression temporaries so they'll
   // be properly destroyed.
   // FIXME: We should really be rebuilding the default argument with new
   // bound temporaries; see the comment in PR5810.
   // We don't need to do that with block decls, though, because
   // blocks in default argument expression can never capture anything.
   if (auto Init = dyn_cast<ExprWithCleanups>(Param->getInit())) {
     // Set the "needs cleanups" bit regardless of whether there are
     // any explicit objects.
     Cleanup.setExprNeedsCleanups(Init->cleanupsHaveSideEffects());
 
     // Append all the objects to the cleanup list.  Right now, this
     // should always be a no-op, because blocks in default argument
     // expressions should never be able to capture anything.
     assert(!Init->getNumObjects() &&
            "default argument expression has capturing blocks?");
   }
 
   // We already type-checked the argument, so we know it works. 
   // Just mark all of the declarations in this potentially-evaluated expression
   // as being "referenced".
   MarkDeclarationsReferencedInExpr(Param->getDefaultArg(),
                                    /*SkipLocalVariables=*/true);
   return false;
 }
 
 ExprResult Sema::BuildCXXDefaultArgExpr(SourceLocation CallLoc,
                                         FunctionDecl *FD, ParmVarDecl *Param) {
   if (CheckCXXDefaultArgExpr(CallLoc, FD, Param))
     return ExprError();
   return CXXDefaultArgExpr::Create(Context, CallLoc, Param);
 }
 
 Sema::VariadicCallType
 Sema::getVariadicCallType(FunctionDecl *FDecl, const FunctionProtoType *Proto,
                           Expr *Fn) {
   if (Proto && Proto->isVariadic()) {
     if (dyn_cast_or_null<CXXConstructorDecl>(FDecl))
       return VariadicConstructor;
     else if (Fn && Fn->getType()->isBlockPointerType())
       return VariadicBlock;
     else if (FDecl) {
       if (CXXMethodDecl *Method = dyn_cast_or_null<CXXMethodDecl>(FDecl))
         if (Method->isInstance())
           return VariadicMethod;
     } else if (Fn && Fn->getType() == Context.BoundMemberTy)
       return VariadicMethod;
     return VariadicFunction;
   }
   return VariadicDoesNotApply;
 }
 
 namespace {
 class FunctionCallCCC : public FunctionCallFilterCCC {
 public:
   FunctionCallCCC(Sema &SemaRef, const IdentifierInfo *FuncName,
                   unsigned NumArgs, MemberExpr *ME)
       : FunctionCallFilterCCC(SemaRef, NumArgs, false, ME),
         FunctionName(FuncName) {}
 
   bool ValidateCandidate(const TypoCorrection &candidate) override {
     if (!candidate.getCorrectionSpecifier() ||
         candidate.getCorrectionAsIdentifierInfo() != FunctionName) {
       return false;
     }
 
     return FunctionCallFilterCCC::ValidateCandidate(candidate);
   }
 
 private:
   const IdentifierInfo *const FunctionName;
 };
 }
 
 static TypoCorrection TryTypoCorrectionForCall(Sema &S, Expr *Fn,
                                                FunctionDecl *FDecl,
                                                ArrayRef<Expr *> Args) {
   MemberExpr *ME = dyn_cast<MemberExpr>(Fn);
   DeclarationName FuncName = FDecl->getDeclName();
   SourceLocation NameLoc = ME ? ME->getMemberLoc() : Fn->getLocStart();
 
   if (TypoCorrection Corrected = S.CorrectTypo(
           DeclarationNameInfo(FuncName, NameLoc), Sema::LookupOrdinaryName,
           S.getScopeForContext(S.CurContext), nullptr,
           llvm::make_unique<FunctionCallCCC>(S, FuncName.getAsIdentifierInfo(),
                                              Args.size(), ME),
           Sema::CTK_ErrorRecovery)) {
     if (NamedDecl *ND = Corrected.getFoundDecl()) {
       if (Corrected.isOverloaded()) {
         OverloadCandidateSet OCS(NameLoc, OverloadCandidateSet::CSK_Normal);
         OverloadCandidateSet::iterator Best;
         for (NamedDecl *CD : Corrected) {
           if (FunctionDecl *FD = dyn_cast<FunctionDecl>(CD))
             S.AddOverloadCandidate(FD, DeclAccessPair::make(FD, AS_none), Args,
                                    OCS);
         }
         switch (OCS.BestViableFunction(S, NameLoc, Best)) {
         case OR_Success:
           ND = Best->FoundDecl;
           Corrected.setCorrectionDecl(ND);
           break;
         default:
           break;
         }
       }
       ND = ND->getUnderlyingDecl();
       if (isa<ValueDecl>(ND) || isa<FunctionTemplateDecl>(ND))
         return Corrected;
     }
   }
   return TypoCorrection();
 }
 
 /// ConvertArgumentsForCall - Converts the arguments specified in
 /// Args/NumArgs to the parameter types of the function FDecl with
 /// function prototype Proto. Call is the call expression itself, and
 /// Fn is the function expression. For a C++ member function, this
 /// routine does not attempt to convert the object argument. Returns
 /// true if the call is ill-formed.
 bool
 Sema::ConvertArgumentsForCall(CallExpr *Call, Expr *Fn,
                               FunctionDecl *FDecl,
                               const FunctionProtoType *Proto,
                               ArrayRef<Expr *> Args,
                               SourceLocation RParenLoc,
                               bool IsExecConfig) {
   // Bail out early if calling a builtin with custom typechecking.
   if (FDecl)
     if (unsigned ID = FDecl->getBuiltinID())
       if (Context.BuiltinInfo.hasCustomTypechecking(ID))
         return false;
 
   // C99 6.5.2.2p7 - the arguments are implicitly converted, as if by
   // assignment, to the types of the corresponding parameter, ...
   unsigned NumParams = Proto->getNumParams();
   bool Invalid = false;
   unsigned MinArgs = FDecl ? FDecl->getMinRequiredArguments() : NumParams;
   unsigned FnKind = Fn->getType()->isBlockPointerType()
                        ? 1 /* block */
                        : (IsExecConfig ? 3 /* kernel function (exec config) */
                                        : 0 /* function */);
 
   // If too few arguments are available (and we don't have default
   // arguments for the remaining parameters), don't make the call.
   if (Args.size() < NumParams) {
     if (Args.size() < MinArgs) {
       TypoCorrection TC;
       if (FDecl && (TC = TryTypoCorrectionForCall(*this, Fn, FDecl, Args))) {
         unsigned diag_id =
             MinArgs == NumParams && !Proto->isVariadic()
                 ? diag::err_typecheck_call_too_few_args_suggest
                 : diag::err_typecheck_call_too_few_args_at_least_suggest;
         diagnoseTypo(TC, PDiag(diag_id) << FnKind << MinArgs
                                         << static_cast<unsigned>(Args.size())
                                         << TC.getCorrectionRange());
       } else if (MinArgs == 1 && FDecl && FDecl->getParamDecl(0)->getDeclName())
         Diag(RParenLoc,
              MinArgs == NumParams && !Proto->isVariadic()
                  ? diag::err_typecheck_call_too_few_args_one
                  : diag::err_typecheck_call_too_few_args_at_least_one)
             << FnKind << FDecl->getParamDecl(0) << Fn->getSourceRange();
       else
         Diag(RParenLoc, MinArgs == NumParams && !Proto->isVariadic()
                             ? diag::err_typecheck_call_too_few_args
                             : diag::err_typecheck_call_too_few_args_at_least)
             << FnKind << MinArgs << static_cast<unsigned>(Args.size())
             << Fn->getSourceRange();
 
       // Emit the location of the prototype.
       if (!TC && FDecl && !FDecl->getBuiltinID() && !IsExecConfig)
         Diag(FDecl->getLocStart(), diag::note_callee_decl)
           << FDecl;
 
       return true;
     }
     Call->setNumArgs(Context, NumParams);
   }
 
   // If too many are passed and not variadic, error on the extras and drop
   // them.
   if (Args.size() > NumParams) {
     if (!Proto->isVariadic()) {
       TypoCorrection TC;
       if (FDecl && (TC = TryTypoCorrectionForCall(*this, Fn, FDecl, Args))) {
         unsigned diag_id =
             MinArgs == NumParams && !Proto->isVariadic()
                 ? diag::err_typecheck_call_too_many_args_suggest
                 : diag::err_typecheck_call_too_many_args_at_most_suggest;
         diagnoseTypo(TC, PDiag(diag_id) << FnKind << NumParams
                                         << static_cast<unsigned>(Args.size())
                                         << TC.getCorrectionRange());
       } else if (NumParams == 1 && FDecl &&
                  FDecl->getParamDecl(0)->getDeclName())
         Diag(Args[NumParams]->getLocStart(),
              MinArgs == NumParams
                  ? diag::err_typecheck_call_too_many_args_one
                  : diag::err_typecheck_call_too_many_args_at_most_one)
             << FnKind << FDecl->getParamDecl(0)
             << static_cast<unsigned>(Args.size()) << Fn->getSourceRange()
             << SourceRange(Args[NumParams]->getLocStart(),
                            Args.back()->getLocEnd());
       else
         Diag(Args[NumParams]->getLocStart(),
              MinArgs == NumParams
                  ? diag::err_typecheck_call_too_many_args
                  : diag::err_typecheck_call_too_many_args_at_most)
             << FnKind << NumParams << static_cast<unsigned>(Args.size())
             << Fn->getSourceRange()
             << SourceRange(Args[NumParams]->getLocStart(),
                            Args.back()->getLocEnd());
 
       // Emit the location of the prototype.
       if (!TC && FDecl && !FDecl->getBuiltinID() && !IsExecConfig)
         Diag(FDecl->getLocStart(), diag::note_callee_decl)
           << FDecl;
       
       // This deletes the extra arguments.
       Call->setNumArgs(Context, NumParams);
       return true;
     }
   }
   SmallVector<Expr *, 8> AllArgs;
   VariadicCallType CallType = getVariadicCallType(FDecl, Proto, Fn);
   
   Invalid = GatherArgumentsForCall(Call->getLocStart(), FDecl,
                                    Proto, 0, Args, AllArgs, CallType);
   if (Invalid)
     return true;
   unsigned TotalNumArgs = AllArgs.size();
   for (unsigned i = 0; i < TotalNumArgs; ++i)
     Call->setArg(i, AllArgs[i]);
 
   return false;
 }
 
 bool Sema::GatherArgumentsForCall(SourceLocation CallLoc, FunctionDecl *FDecl,
                                   const FunctionProtoType *Proto,
                                   unsigned FirstParam, ArrayRef<Expr *> Args,
                                   SmallVectorImpl<Expr *> &AllArgs,
                                   VariadicCallType CallType, bool AllowExplicit,
                                   bool IsListInitialization) {
   unsigned NumParams = Proto->getNumParams();
   bool Invalid = false;
   size_t ArgIx = 0;
   // Continue to check argument types (even if we have too few/many args).
   for (unsigned i = FirstParam; i < NumParams; i++) {
     QualType ProtoArgType = Proto->getParamType(i);
 
     Expr *Arg;
     ParmVarDecl *Param = FDecl ? FDecl->getParamDecl(i) : nullptr;
     if (ArgIx < Args.size()) {
       Arg = Args[ArgIx++];
 
       if (RequireCompleteType(Arg->getLocStart(),
                               ProtoArgType,
                               diag::err_call_incomplete_argument, Arg))
         return true;
 
       // Strip the unbridged-cast placeholder expression off, if applicable.
       bool CFAudited = false;
       if (Arg->getType() == Context.ARCUnbridgedCastTy &&
           FDecl && FDecl->hasAttr<CFAuditedTransferAttr>() &&
           (!Param || !Param->hasAttr<CFConsumedAttr>()))
         Arg = stripARCUnbridgedCast(Arg);
       else if (getLangOpts().ObjCAutoRefCount &&
                FDecl && FDecl->hasAttr<CFAuditedTransferAttr>() &&
                (!Param || !Param->hasAttr<CFConsumedAttr>()))
         CFAudited = true;
 
       InitializedEntity Entity =
           Param ? InitializedEntity::InitializeParameter(Context, Param,
                                                          ProtoArgType)
                 : InitializedEntity::InitializeParameter(
                       Context, ProtoArgType, Proto->isParamConsumed(i));
 
       // Remember that parameter belongs to a CF audited API.
       if (CFAudited)
         Entity.setParameterCFAudited();
 
       ExprResult ArgE = PerformCopyInitialization(
           Entity, SourceLocation(), Arg, IsListInitialization, AllowExplicit);
       if (ArgE.isInvalid())
         return true;
 
       Arg = ArgE.getAs<Expr>();
     } else {
       assert(Param && "can't use default arguments without a known callee");
 
       ExprResult ArgExpr =
         BuildCXXDefaultArgExpr(CallLoc, FDecl, Param);
       if (ArgExpr.isInvalid())
         return true;
 
       Arg = ArgExpr.getAs<Expr>();
     }
 
     // Check for array bounds violations for each argument to the call. This
     // check only triggers warnings when the argument isn't a more complex Expr
     // with its own checking, such as a BinaryOperator.
     CheckArrayAccess(Arg);
 
     // Check for violations of C99 static array rules (C99 6.7.5.3p7).
     CheckStaticArrayArgument(CallLoc, Param, Arg);
 
     AllArgs.push_back(Arg);
   }
 
   // If this is a variadic call, handle args passed through "...".
   if (CallType != VariadicDoesNotApply) {
     // Assume that extern "C" functions with variadic arguments that
     // return __unknown_anytype aren't *really* variadic.
     if (Proto->getReturnType() == Context.UnknownAnyTy && FDecl &&
         FDecl->isExternC()) {
       for (Expr *A : Args.slice(ArgIx)) {
         QualType paramType; // ignored
         ExprResult arg = checkUnknownAnyArg(CallLoc, A, paramType);
         Invalid |= arg.isInvalid();
         AllArgs.push_back(arg.get());
       }
 
     // Otherwise do argument promotion, (C99 6.5.2.2p7).
     } else {
       for (Expr *A : Args.slice(ArgIx)) {
         ExprResult Arg = DefaultVariadicArgumentPromotion(A, CallType, FDecl);
         Invalid |= Arg.isInvalid();
         AllArgs.push_back(Arg.get());
       }
     }
 
     // Check for array bounds violations.
     for (Expr *A : Args.slice(ArgIx))
       CheckArrayAccess(A);
   }
   return Invalid;
 }
 
 static void DiagnoseCalleeStaticArrayParam(Sema &S, ParmVarDecl *PVD) {
   TypeLoc TL = PVD->getTypeSourceInfo()->getTypeLoc();
   if (DecayedTypeLoc DTL = TL.getAs<DecayedTypeLoc>())
     TL = DTL.getOriginalLoc();
   if (ArrayTypeLoc ATL = TL.getAs<ArrayTypeLoc>())
     S.Diag(PVD->getLocation(), diag::note_callee_static_array)
       << ATL.getLocalSourceRange();
 }
 
 /// CheckStaticArrayArgument - If the given argument corresponds to a static
 /// array parameter, check that it is non-null, and that if it is formed by
 /// array-to-pointer decay, the underlying array is sufficiently large.
 ///
 /// C99 6.7.5.3p7: If the keyword static also appears within the [ and ] of the
 /// array type derivation, then for each call to the function, the value of the
 /// corresponding actual argument shall provide access to the first element of
 /// an array with at least as many elements as specified by the size expression.
 void
 Sema::CheckStaticArrayArgument(SourceLocation CallLoc,
                                ParmVarDecl *Param,
                                const Expr *ArgExpr) {
   // Static array parameters are not supported in C++.
   if (!Param || getLangOpts().CPlusPlus)
     return;
 
   QualType OrigTy = Param->getOriginalType();
 
   const ArrayType *AT = Context.getAsArrayType(OrigTy);
   if (!AT || AT->getSizeModifier() != ArrayType::Static)
     return;
 
   if (ArgExpr->isNullPointerConstant(Context,
                                      Expr::NPC_NeverValueDependent)) {
     Diag(CallLoc, diag::warn_null_arg) << ArgExpr->getSourceRange();
     DiagnoseCalleeStaticArrayParam(*this, Param);
     return;
   }
 
   const ConstantArrayType *CAT = dyn_cast<ConstantArrayType>(AT);
   if (!CAT)
     return;
 
   const ConstantArrayType *ArgCAT =
     Context.getAsConstantArrayType(ArgExpr->IgnoreParenImpCasts()->getType());
   if (!ArgCAT)
     return;
 
   if (ArgCAT->getSize().ult(CAT->getSize())) {
     Diag(CallLoc, diag::warn_static_array_too_small)
       << ArgExpr->getSourceRange()
       << (unsigned) ArgCAT->getSize().getZExtValue()
       << (unsigned) CAT->getSize().getZExtValue();
     DiagnoseCalleeStaticArrayParam(*this, Param);
   }
 }
 
 /// Given a function expression of unknown-any type, try to rebuild it
 /// to have a function type.
 static ExprResult rebuildUnknownAnyFunction(Sema &S, Expr *fn);
 
 /// Is the given type a placeholder that we need to lower out
 /// immediately during argument processing?
 static bool isPlaceholderToRemoveAsArg(QualType type) {
   // Placeholders are never sugared.
   const BuiltinType *placeholder = dyn_cast<BuiltinType>(type);
   if (!placeholder) return false;
 
   switch (placeholder->getKind()) {
   // Ignore all the non-placeholder types.
 #define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
   case BuiltinType::Id:
 #include "clang/Basic/OpenCLImageTypes.def"
 #define PLACEHOLDER_TYPE(ID, SINGLETON_ID)
 #define BUILTIN_TYPE(ID, SINGLETON_ID) case BuiltinType::ID:
 #include "clang/AST/BuiltinTypes.def"
     return false;
 
   // We cannot lower out overload sets; they might validly be resolved
   // by the call machinery.
   case BuiltinType::Overload:
     return false;
 
   // Unbridged casts in ARC can be handled in some call positions and
   // should be left in place.
   case BuiltinType::ARCUnbridgedCast:
     return false;
 
   // Pseudo-objects should be converted as soon as possible.
   case BuiltinType::PseudoObject:
     return true;
 
   // The debugger mode could theoretically but currently does not try
   // to resolve unknown-typed arguments based on known parameter types.
   case BuiltinType::UnknownAny:
     return true;
 
   // These are always invalid as call arguments and should be reported.
   case BuiltinType::BoundMember:
   case BuiltinType::BuiltinFn:
   case BuiltinType::OMPArraySection:
     return true;
 
   }
   llvm_unreachable("bad builtin type kind");
 }
 
 /// Check an argument list for placeholders that we won't try to
 /// handle later.
 static bool checkArgsForPlaceholders(Sema &S, MultiExprArg args) {
   // Apply this processing to all the arguments at once instead of
   // dying at the first failure.
   bool hasInvalid = false;
   for (size_t i = 0, e = args.size(); i != e; i++) {
     if (isPlaceholderToRemoveAsArg(args[i]->getType())) {
       ExprResult result = S.CheckPlaceholderExpr(args[i]);
       if (result.isInvalid()) hasInvalid = true;
       else args[i] = result.get();
     } else if (hasInvalid) {
       (void)S.CorrectDelayedTyposInExpr(args[i]);
     }
   }
   return hasInvalid;
 }
 
 /// If a builtin function has a pointer argument with no explicit address
 /// space, then it should be able to accept a pointer to any address
 /// space as input.  In order to do this, we need to replace the
 /// standard builtin declaration with one that uses the same address space
 /// as the call.
 ///
 /// \returns nullptr If this builtin is not a candidate for a rewrite i.e.
 ///                  it does not contain any pointer arguments without
 ///                  an address space qualifer.  Otherwise the rewritten
 ///                  FunctionDecl is returned.
 /// TODO: Handle pointer return types.
 static FunctionDecl *rewriteBuiltinFunctionDecl(Sema *Sema, ASTContext &Context,
                                                 const FunctionDecl *FDecl,
                                                 MultiExprArg ArgExprs) {
 
   QualType DeclType = FDecl->getType();
   const FunctionProtoType *FT = dyn_cast<FunctionProtoType>(DeclType);
 
   if (!Context.BuiltinInfo.hasPtrArgsOrResult(FDecl->getBuiltinID()) ||
       !FT || FT->isVariadic() || ArgExprs.size() != FT->getNumParams())
     return nullptr;
 
   bool NeedsNewDecl = false;
   unsigned i = 0;
   SmallVector<QualType, 8> OverloadParams;
 
   for (QualType ParamType : FT->param_types()) {
 
     // Convert array arguments to pointer to simplify type lookup.
     ExprResult ArgRes =
         Sema->DefaultFunctionArrayLvalueConversion(ArgExprs[i++]);
     if (ArgRes.isInvalid())
       return nullptr;
     Expr *Arg = ArgRes.get();
     QualType ArgType = Arg->getType();
     if (!ParamType->isPointerType() ||
         ParamType.getQualifiers().hasAddressSpace() ||
         !ArgType->isPointerType() ||
         !ArgType->getPointeeType().getQualifiers().hasAddressSpace()) {
       OverloadParams.push_back(ParamType);
       continue;
     }
 
     NeedsNewDecl = true;
     LangAS AS = ArgType->getPointeeType().getAddressSpace();
 
     QualType PointeeType = ParamType->getPointeeType();
     PointeeType = Context.getAddrSpaceQualType(PointeeType, AS);
     OverloadParams.push_back(Context.getPointerType(PointeeType));
   }
 
   if (!NeedsNewDecl)
     return nullptr;
 
   FunctionProtoType::ExtProtoInfo EPI;
   QualType OverloadTy = Context.getFunctionType(FT->getReturnType(),
                                                 OverloadParams, EPI);
   DeclContext *Parent = Context.getTranslationUnitDecl();
   FunctionDecl *OverloadDecl = FunctionDecl::Create(Context, Parent,
                                                     FDecl->getLocation(),
                                                     FDecl->getLocation(),
                                                     FDecl->getIdentifier(),
                                                     OverloadTy,
                                                     /*TInfo=*/nullptr,
                                                     SC_Extern, false,
                                                     /*hasPrototype=*/true);
   SmallVector<ParmVarDecl*, 16> Params;
   FT = cast<FunctionProtoType>(OverloadTy);
   for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) {
     QualType ParamType = FT->getParamType(i);
     ParmVarDecl *Parm =
         ParmVarDecl::Create(Context, OverloadDecl, SourceLocation(),
                                 SourceLocation(), nullptr, ParamType,
                                 /*TInfo=*/nullptr, SC_None, nullptr);
     Parm->setScopeInfo(0, i);
     Params.push_back(Parm);
   }
   OverloadDecl->setParams(Params);
   return OverloadDecl;
 }
 
 static void checkDirectCallValidity(Sema &S, const Expr *Fn,
                                     FunctionDecl *Callee,
                                     MultiExprArg ArgExprs) {
   // `Callee` (when called with ArgExprs) may be ill-formed. enable_if (and
   // similar attributes) really don't like it when functions are called with an
   // invalid number of args.
   if (S.TooManyArguments(Callee->getNumParams(), ArgExprs.size(),
                          /*PartialOverloading=*/false) &&
       !Callee->isVariadic())
     return;
   if (Callee->getMinRequiredArguments() > ArgExprs.size())
     return;
 
   if (const EnableIfAttr *Attr = S.CheckEnableIf(Callee, ArgExprs, true)) {
     S.Diag(Fn->getLocStart(),
            isa<CXXMethodDecl>(Callee)
                ? diag::err_ovl_no_viable_member_function_in_call
                : diag::err_ovl_no_viable_function_in_call)
         << Callee << Callee->getSourceRange();
     S.Diag(Callee->getLocation(),
            diag::note_ovl_candidate_disabled_by_function_cond_attr)
         << Attr->getCond()->getSourceRange() << Attr->getMessage();
     return;
   }
 }
 
 static bool enclosingClassIsRelatedToClassInWhichMembersWereFound(
     const UnresolvedMemberExpr *const UME, Sema &S) {
 
   const auto GetFunctionLevelDCIfCXXClass =
       [](Sema &S) -> const CXXRecordDecl * {
     const DeclContext *const DC = S.getFunctionLevelDeclContext();
     if (!DC || !DC->getParent())
       return nullptr;
 
     // If the call to some member function was made from within a member
     // function body 'M' return return 'M's parent.
     if (const auto *MD = dyn_cast<CXXMethodDecl>(DC))
       return MD->getParent()->getCanonicalDecl();
     // else the call was made from within a default member initializer of a
     // class, so return the class.
     if (const auto *RD = dyn_cast<CXXRecordDecl>(DC))
       return RD->getCanonicalDecl();
     return nullptr;
   };
   // If our DeclContext is neither a member function nor a class (in the
   // case of a lambda in a default member initializer), we can't have an
   // enclosing 'this'.
 
   const CXXRecordDecl *const CurParentClass = GetFunctionLevelDCIfCXXClass(S);
   if (!CurParentClass)
     return false;
 
   // The naming class for implicit member functions call is the class in which
   // name lookup starts.
   const CXXRecordDecl *const NamingClass =
       UME->getNamingClass()->getCanonicalDecl();
   assert(NamingClass && "Must have naming class even for implicit access");
 
   // If the unresolved member functions were found in a 'naming class' that is
   // related (either the same or derived from) to the class that contains the
   // member function that itself contained the implicit member access.
 
   return CurParentClass == NamingClass ||
          CurParentClass->isDerivedFrom(NamingClass);
 }
 
 static void
 tryImplicitlyCaptureThisIfImplicitMemberFunctionAccessWithDependentArgs(
     Sema &S, const UnresolvedMemberExpr *const UME, SourceLocation CallLoc) {
 
   if (!UME)
     return;
 
   LambdaScopeInfo *const CurLSI = S.getCurLambda();
   // Only try and implicitly capture 'this' within a C++ Lambda if it hasn't
   // already been captured, or if this is an implicit member function call (if
   // it isn't, an attempt to capture 'this' should already have been made).
   if (!CurLSI || CurLSI->ImpCaptureStyle == CurLSI->ImpCap_None ||
       !UME->isImplicitAccess() || CurLSI->isCXXThisCaptured())
     return;
 
   // Check if the naming class in which the unresolved members were found is
   // related (same as or is a base of) to the enclosing class.
  
   if (!enclosingClassIsRelatedToClassInWhichMembersWereFound(UME, S))
     return;
   
         
   DeclContext *EnclosingFunctionCtx = S.CurContext->getParent()->getParent();
   // If the enclosing function is not dependent, then this lambda is
   // capture ready, so if we can capture this, do so.
   if (!EnclosingFunctionCtx->isDependentContext()) {
     // If the current lambda and all enclosing lambdas can capture 'this' -
     // then go ahead and capture 'this' (since our unresolved overload set
     // contains at least one non-static member function).
     if (!S.CheckCXXThisCapture(CallLoc, /*Explcit*/ false, /*Diagnose*/ false))
       S.CheckCXXThisCapture(CallLoc);
   } else if (S.CurContext->isDependentContext()) {
     // ... since this is an implicit member reference, that might potentially
     // involve a 'this' capture, mark 'this' for potential capture in
     // enclosing lambdas.
     if (CurLSI->ImpCaptureStyle != CurLSI->ImpCap_None)
       CurLSI->addPotentialThisCapture(CallLoc);
   }
 }
 
 /// ActOnCallExpr - Handle a call to Fn with the specified array of arguments.
 /// This provides the location of the left/right parens and a list of comma
 /// locations.
 ExprResult Sema::ActOnCallExpr(Scope *Scope, Expr *Fn, SourceLocation LParenLoc,
                                MultiExprArg ArgExprs, SourceLocation RParenLoc,
                                Expr *ExecConfig, bool IsExecConfig) {
   // Since this might be a postfix expression, get rid of ParenListExprs.
   ExprResult Result = MaybeConvertParenListExprToParenExpr(Scope, Fn);
   if (Result.isInvalid()) return ExprError();
   Fn = Result.get();
 
   if (checkArgsForPlaceholders(*this, ArgExprs))
     return ExprError();
 
   if (getLangOpts().CPlusPlus) {
     // If this is a pseudo-destructor expression, build the call immediately.
     if (isa<CXXPseudoDestructorExpr>(Fn)) {
       if (!ArgExprs.empty()) {
         // Pseudo-destructor calls should not have any arguments.
         Diag(Fn->getLocStart(), diag::err_pseudo_dtor_call_with_args)
             << FixItHint::CreateRemoval(
                    SourceRange(ArgExprs.front()->getLocStart(),
                                ArgExprs.back()->getLocEnd()));
       }
 
       return new (Context)
           CallExpr(Context, Fn, None, Context.VoidTy, VK_RValue, RParenLoc);
     }
     if (Fn->getType() == Context.PseudoObjectTy) {
       ExprResult result = CheckPlaceholderExpr(Fn);
       if (result.isInvalid()) return ExprError();
       Fn = result.get();
     }
 
     // Determine whether this is a dependent call inside a C++ template,
     // in which case we won't do any semantic analysis now.
     bool Dependent = false;
     if (Fn->isTypeDependent())
       Dependent = true;
     else if (Expr::hasAnyTypeDependentArguments(ArgExprs))
       Dependent = true;
 
     if (Dependent) {
       if (ExecConfig) {
         return new (Context) CUDAKernelCallExpr(
             Context, Fn, cast<CallExpr>(ExecConfig), ArgExprs,
             Context.DependentTy, VK_RValue, RParenLoc);
       } else {
 
        tryImplicitlyCaptureThisIfImplicitMemberFunctionAccessWithDependentArgs(
             *this, dyn_cast<UnresolvedMemberExpr>(Fn->IgnoreParens()),
             Fn->getLocStart());
 
         return new (Context) CallExpr(
             Context, Fn, ArgExprs, Context.DependentTy, VK_RValue, RParenLoc);
       }
     }
 
     // Determine whether this is a call to an object (C++ [over.call.object]).
     if (Fn->getType()->isRecordType())
       return BuildCallToObjectOfClassType(Scope, Fn, LParenLoc, ArgExprs,
                                           RParenLoc);
 
     if (Fn->getType() == Context.UnknownAnyTy) {
       ExprResult result = rebuildUnknownAnyFunction(*this, Fn);
       if (result.isInvalid()) return ExprError();
       Fn = result.get();
     }
 
     if (Fn->getType() == Context.BoundMemberTy) {
       return BuildCallToMemberFunction(Scope, Fn, LParenLoc, ArgExprs,
                                        RParenLoc);
     }
   }
 
   // Check for overloaded calls.  This can happen even in C due to extensions.
   if (Fn->getType() == Context.OverloadTy) {
     OverloadExpr::FindResult find = OverloadExpr::find(Fn);
 
     // We aren't supposed to apply this logic if there's an '&' involved.
     if (!find.HasFormOfMemberPointer) {
       if (Expr::hasAnyTypeDependentArguments(ArgExprs))
         return new (Context) CallExpr(
             Context, Fn, ArgExprs, Context.DependentTy, VK_RValue, RParenLoc);
       OverloadExpr *ovl = find.Expression;
       if (UnresolvedLookupExpr *ULE = dyn_cast<UnresolvedLookupExpr>(ovl))
         return BuildOverloadedCallExpr(
             Scope, Fn, ULE, LParenLoc, ArgExprs, RParenLoc, ExecConfig,
             /*AllowTypoCorrection=*/true, find.IsAddressOfOperand);
       return BuildCallToMemberFunction(Scope, Fn, LParenLoc, ArgExprs,
                                        RParenLoc);
     }
   }
 
   // If we're directly calling a function, get the appropriate declaration.
   if (Fn->getType() == Context.UnknownAnyTy) {
     ExprResult result = rebuildUnknownAnyFunction(*this, Fn);
     if (result.isInvalid()) return ExprError();
     Fn = result.get();
   }
 
   Expr *NakedFn = Fn->IgnoreParens();
 
   bool CallingNDeclIndirectly = false;
   NamedDecl *NDecl = nullptr;
   if (UnaryOperator *UnOp = dyn_cast<UnaryOperator>(NakedFn)) {
     if (UnOp->getOpcode() == UO_AddrOf) {
       CallingNDeclIndirectly = true;
       NakedFn = UnOp->getSubExpr()->IgnoreParens();
     }
   }
 
   if (isa<DeclRefExpr>(NakedFn)) {
     NDecl = cast<DeclRefExpr>(NakedFn)->getDecl();
 
     FunctionDecl *FDecl = dyn_cast<FunctionDecl>(NDecl);
     if (FDecl && FDecl->getBuiltinID()) {
       // Rewrite the function decl for this builtin by replacing parameters
       // with no explicit address space with the address space of the arguments
       // in ArgExprs.
       if ((FDecl =
                rewriteBuiltinFunctionDecl(this, Context, FDecl, ArgExprs))) {
         NDecl = FDecl;
         Fn = DeclRefExpr::Create(
             Context, FDecl->getQualifierLoc(), SourceLocation(), FDecl, false,
             SourceLocation(), FDecl->getType(), Fn->getValueKind(), FDecl);
       }
     }
   } else if (isa<MemberExpr>(NakedFn))
     NDecl = cast<MemberExpr>(NakedFn)->getMemberDecl();
 
   if (FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(NDecl)) {
     if (CallingNDeclIndirectly &&
         !checkAddressOfFunctionIsAvailable(FD, /*Complain=*/true,
                                            Fn->getLocStart()))
       return ExprError();
 
     if (getLangOpts().OpenCL && checkOpenCLDisabledDecl(*FD, *Fn))
       return ExprError();
 
     checkDirectCallValidity(*this, Fn, FD, ArgExprs);
   }
 
   return BuildResolvedCallExpr(Fn, NDecl, LParenLoc, ArgExprs, RParenLoc,
                                ExecConfig, IsExecConfig);
 }
 
 /// ActOnAsTypeExpr - create a new asType (bitcast) from the arguments.
 ///
 /// __builtin_astype( value, dst type )
 ///
 ExprResult Sema::ActOnAsTypeExpr(Expr *E, ParsedType ParsedDestTy,
                                  SourceLocation BuiltinLoc,
                                  SourceLocation RParenLoc) {
   ExprValueKind VK = VK_RValue;
   ExprObjectKind OK = OK_Ordinary;
   QualType DstTy = GetTypeFromParser(ParsedDestTy);
   QualType SrcTy = E->getType();
   if (Context.getTypeSize(DstTy) != Context.getTypeSize(SrcTy))
     return ExprError(Diag(BuiltinLoc,
                           diag::err_invalid_astype_of_different_size)
                      << DstTy
                      << SrcTy
                      << E->getSourceRange());
   return new (Context) AsTypeExpr(E, DstTy, VK, OK, BuiltinLoc, RParenLoc);
 }
 
 /// ActOnConvertVectorExpr - create a new convert-vector expression from the
 /// provided arguments.
 ///
 /// __builtin_convertvector( value, dst type )
 ///
 ExprResult Sema::ActOnConvertVectorExpr(Expr *E, ParsedType ParsedDestTy,
                                         SourceLocation BuiltinLoc,
                                         SourceLocation RParenLoc) {
   TypeSourceInfo *TInfo;
   GetTypeFromParser(ParsedDestTy, &TInfo);
   return SemaConvertVectorExpr(E, TInfo, BuiltinLoc, RParenLoc);
 }
 
 /// BuildResolvedCallExpr - Build a call to a resolved expression,
 /// i.e. an expression not of \p OverloadTy.  The expression should
 /// unary-convert to an expression of function-pointer or
 /// block-pointer type.
 ///
 /// \param NDecl the declaration being called, if available
 ExprResult
 Sema::BuildResolvedCallExpr(Expr *Fn, NamedDecl *NDecl,
                             SourceLocation LParenLoc,
                             ArrayRef<Expr *> Args,
                             SourceLocation RParenLoc,
                             Expr *Config, bool IsExecConfig) {
   FunctionDecl *FDecl = dyn_cast_or_null<FunctionDecl>(NDecl);
   unsigned BuiltinID = (FDecl ? FDecl->getBuiltinID() : 0);
 
   // Functions with 'interrupt' attribute cannot be called directly.
   if (FDecl && FDecl->hasAttr<AnyX86InterruptAttr>()) {
     Diag(Fn->getExprLoc(), diag::err_anyx86_interrupt_called);
     return ExprError();
   }
 
   // Interrupt handlers don't save off the VFP regs automatically on ARM,
   // so there's some risk when calling out to non-interrupt handler functions
   // that the callee might not preserve them. This is easy to diagnose here,
   // but can be very challenging to debug.
   if (auto *Caller = getCurFunctionDecl())
     if (Caller->hasAttr<ARMInterruptAttr>()) {
       bool VFP = Context.getTargetInfo().hasFeature("vfp");
       if (VFP && (!FDecl || !FDecl->hasAttr<ARMInterruptAttr>()))
         Diag(Fn->getExprLoc(), diag::warn_arm_interrupt_calling_convention);
     }
 
   // Promote the function operand.
   // We special-case function promotion here because we only allow promoting
   // builtin functions to function pointers in the callee of a call.
   ExprResult Result;
   if (BuiltinID &&
       Fn->getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn)) {
     Result = ImpCastExprToType(Fn, Context.getPointerType(FDecl->getType()),
                                CK_BuiltinFnToFnPtr).get();
   } else {
     Result = CallExprUnaryConversions(Fn);
   }
   if (Result.isInvalid())
     return ExprError();
   Fn = Result.get();
 
   // Make the call expr early, before semantic checks.  This guarantees cleanup
   // of arguments and function on error.
   CallExpr *TheCall;
   if (Config)
     TheCall = new (Context) CUDAKernelCallExpr(Context, Fn,
                                                cast<CallExpr>(Config), Args,
                                                Context.BoolTy, VK_RValue,
                                                RParenLoc);
   else
     TheCall = new (Context) CallExpr(Context, Fn, Args, Context.BoolTy,
                                      VK_RValue, RParenLoc);
 
   if (!getLangOpts().CPlusPlus) {
     // C cannot always handle TypoExpr nodes in builtin calls and direct
     // function calls as their argument checking don't necessarily handle
     // dependent types properly, so make sure any TypoExprs have been
     // dealt with.
     ExprResult Result = CorrectDelayedTyposInExpr(TheCall);
     if (!Result.isUsable()) return ExprError();
     TheCall = dyn_cast<CallExpr>(Result.get());
     if (!TheCall) return Result;
     Args = llvm::makeArrayRef(TheCall->getArgs(), TheCall->getNumArgs());
   }
 
   // Bail out early if calling a builtin with custom typechecking.
   if (BuiltinID && Context.BuiltinInfo.hasCustomTypechecking(BuiltinID))
     return CheckBuiltinFunctionCall(FDecl, BuiltinID, TheCall);
 
  retry:
   const FunctionType *FuncT;
   if (const PointerType *PT = Fn->getType()->getAs<PointerType>()) {
     // C99 6.5.2.2p1 - "The expression that denotes the called function shall
     // have type pointer to function".
     FuncT = PT->getPointeeType()->getAs<FunctionType>();
     if (!FuncT)
       return ExprError(Diag(LParenLoc, diag::err_typecheck_call_not_function)
                          << Fn->getType() << Fn->getSourceRange());
   } else if (const BlockPointerType *BPT =
                Fn->getType()->getAs<BlockPointerType>()) {
     FuncT = BPT->getPointeeType()->castAs<FunctionType>();
   } else {
     // Handle calls to expressions of unknown-any type.
     if (Fn->getType() == Context.UnknownAnyTy) {
       ExprResult rewrite = rebuildUnknownAnyFunction(*this, Fn);
       if (rewrite.isInvalid()) return ExprError();
       Fn = rewrite.get();
       TheCall->setCallee(Fn);
       goto retry;
     }
 
     return ExprError(Diag(LParenLoc, diag::err_typecheck_call_not_function)
       << Fn->getType() << Fn->getSourceRange());
   }
 
   if (getLangOpts().CUDA) {
     if (Config) {
       // CUDA: Kernel calls must be to global functions
       if (FDecl && !FDecl->hasAttr<CUDAGlobalAttr>())
         return ExprError(Diag(LParenLoc,diag::err_kern_call_not_global_function)
             << FDecl->getName() << Fn->getSourceRange());
 
       // CUDA: Kernel function must have 'void' return type
       if (!FuncT->getReturnType()->isVoidType())
         return ExprError(Diag(LParenLoc, diag::err_kern_type_not_void_return)
             << Fn->getType() << Fn->getSourceRange());
     } else {
       // CUDA: Calls to global functions must be configured
       if (FDecl && FDecl->hasAttr<CUDAGlobalAttr>())
         return ExprError(Diag(LParenLoc, diag::err_global_call_not_config)
             << FDecl->getName() << Fn->getSourceRange());
     }
   }
 
   // Check for a valid return type
   if (CheckCallReturnType(FuncT->getReturnType(), Fn->getLocStart(), TheCall,
                           FDecl))
     return ExprError();
 
   // We know the result type of the call, set it.
   TheCall->setType(FuncT->getCallResultType(Context));
   TheCall->setValueKind(Expr::getValueKindForType(FuncT->getReturnType()));
 
   const FunctionProtoType *Proto = dyn_cast<FunctionProtoType>(FuncT);
   if (Proto) {
     if (ConvertArgumentsForCall(TheCall, Fn, FDecl, Proto, Args, RParenLoc,
                                 IsExecConfig))
       return ExprError();
   } else {
     assert(isa<FunctionNoProtoType>(FuncT) && "Unknown FunctionType!");
 
     if (FDecl) {
       // Check if we have too few/too many template arguments, based
       // on our knowledge of the function definition.
       const FunctionDecl *Def = nullptr;
       if (FDecl->hasBody(Def) && Args.size() != Def->param_size()) {
         Proto = Def->getType()->getAs<FunctionProtoType>();
        if (!Proto || !(Proto->isVariadic() && Args.size() >= Def->param_size()))
           Diag(RParenLoc, diag::warn_call_wrong_number_of_arguments)
           << (Args.size() > Def->param_size()) << FDecl << Fn->getSourceRange();
       }
       
       // If the function we're calling isn't a function prototype, but we have
       // a function prototype from a prior declaratiom, use that prototype.
       if (!FDecl->hasPrototype())
         Proto = FDecl->getType()->getAs<FunctionProtoType>();
     }
 
     // Promote the arguments (C99 6.5.2.2p6).
     for (unsigned i = 0, e = Args.size(); i != e; i++) {
       Expr *Arg = Args[i];
 
       if (Proto && i < Proto->getNumParams()) {
         InitializedEntity Entity = InitializedEntity::InitializeParameter(
             Context, Proto->getParamType(i), Proto->isParamConsumed(i));
         ExprResult ArgE =
             PerformCopyInitialization(Entity, SourceLocation(), Arg);
         if (ArgE.isInvalid())
           return true;
         
         Arg = ArgE.getAs<Expr>();
 
       } else {
         ExprResult ArgE = DefaultArgumentPromotion(Arg);
 
         if (ArgE.isInvalid())
           return true;
 
         Arg = ArgE.getAs<Expr>();
       }
       
       if (RequireCompleteType(Arg->getLocStart(),
                               Arg->getType(),
                               diag::err_call_incomplete_argument, Arg))
         return ExprError();
 
       TheCall->setArg(i, Arg);
     }
   }
 
   if (CXXMethodDecl *Method = dyn_cast_or_null<CXXMethodDecl>(FDecl))
     if (!Method->isStatic())
       return ExprError(Diag(LParenLoc, diag::err_member_call_without_object)
         << Fn->getSourceRange());
 
   // Check for sentinels
   if (NDecl)
     DiagnoseSentinelCalls(NDecl, LParenLoc, Args);
 
   // Do special checking on direct calls to functions.
   if (FDecl) {
     if (CheckFunctionCall(FDecl, TheCall, Proto))
       return ExprError();
 
     if (BuiltinID)
       return CheckBuiltinFunctionCall(FDecl, BuiltinID, TheCall);
   } else if (NDecl) {
     if (CheckPointerCall(NDecl, TheCall, Proto))
       return ExprError();
   } else {
     if (CheckOtherCall(TheCall, Proto))
       return ExprError();
   }
 
   return MaybeBindToTemporary(TheCall);
 }
 
 ExprResult
 Sema::ActOnCompoundLiteral(SourceLocation LParenLoc, ParsedType Ty,
                            SourceLocation RParenLoc, Expr *InitExpr) {
   assert(Ty && "ActOnCompoundLiteral(): missing type");
   assert(InitExpr && "ActOnCompoundLiteral(): missing expression");
 
   TypeSourceInfo *TInfo;
   QualType literalType = GetTypeFromParser(Ty, &TInfo);
   if (!TInfo)
     TInfo = Context.getTrivialTypeSourceInfo(literalType);
 
   return BuildCompoundLiteralExpr(LParenLoc, TInfo, RParenLoc, InitExpr);
 }
 
 ExprResult
 Sema::BuildCompoundLiteralExpr(SourceLocation LParenLoc, TypeSourceInfo *TInfo,
                                SourceLocation RParenLoc, Expr *LiteralExpr) {
   QualType literalType = TInfo->getType();
 
   if (literalType->isArrayType()) {
     if (RequireCompleteType(LParenLoc, Context.getBaseElementType(literalType),
           diag::err_illegal_decl_array_incomplete_type,
           SourceRange(LParenLoc,
                       LiteralExpr->getSourceRange().getEnd())))
       return ExprError();
     if (literalType->isVariableArrayType())
       return ExprError(Diag(LParenLoc, diag::err_variable_object_no_init)
         << SourceRange(LParenLoc, LiteralExpr->getSourceRange().getEnd()));
   } else if (!literalType->isDependentType() &&
              RequireCompleteType(LParenLoc, literalType,
                diag::err_typecheck_decl_incomplete_type,
                SourceRange(LParenLoc, LiteralExpr->getSourceRange().getEnd())))
     return ExprError();
 
   InitializedEntity Entity
     = InitializedEntity::InitializeCompoundLiteralInit(TInfo);
   InitializationKind Kind
     = InitializationKind::CreateCStyleCast(LParenLoc, 
                                            SourceRange(LParenLoc, RParenLoc),
                                            /*InitList=*/true);
   InitializationSequence InitSeq(*this, Entity, Kind, LiteralExpr);
   ExprResult Result = InitSeq.Perform(*this, Entity, Kind, LiteralExpr,
                                       &literalType);
   if (Result.isInvalid())
     return ExprError();
   LiteralExpr = Result.get();
 
   bool isFileScope = !CurContext->isFunctionOrMethod();
   if (isFileScope &&
       !LiteralExpr->isTypeDependent() &&
       !LiteralExpr->isValueDependent() &&
       !literalType->isDependentType()) { // 6.5.2.5p3
     if (CheckForConstantInitializer(LiteralExpr, literalType))
       return ExprError();
   }
 
   // In C, compound literals are l-values for some reason.
   // For GCC compatibility, in C++, file-scope array compound literals with
   // constant initializers are also l-values, and compound literals are
   // otherwise prvalues.
   //
   // (GCC also treats C++ list-initialized file-scope array prvalues with
   // constant initializers as l-values, but that's non-conforming, so we don't
   // follow it there.)
   //
   // FIXME: It would be better to handle the lvalue cases as materializing and
   // lifetime-extending a temporary object, but our materialized temporaries
   // representation only supports lifetime extension from a variable, not "out
   // of thin air".
   // FIXME: For C++, we might want to instead lifetime-extend only if a pointer
   // is bound to the result of applying array-to-pointer decay to the compound
   // literal.
   // FIXME: GCC supports compound literals of reference type, which should
   // obviously have a value kind derived from the kind of reference involved.
   ExprValueKind VK =
       (getLangOpts().CPlusPlus && !(isFileScope && literalType->isArrayType()))
           ? VK_RValue
           : VK_LValue;
 
   return MaybeBindToTemporary(
       new (Context) CompoundLiteralExpr(LParenLoc, TInfo, literalType,
                                         VK, LiteralExpr, isFileScope));
 }
 
 ExprResult
 Sema::ActOnInitList(SourceLocation LBraceLoc, MultiExprArg InitArgList,
                     SourceLocation RBraceLoc) {
   // Immediately handle non-overload placeholders.  Overloads can be
   // resolved contextually, but everything else here can't.
   for (unsigned I = 0, E = InitArgList.size(); I != E; ++I) {
     if (InitArgList[I]->getType()->isNonOverloadPlaceholderType()) {
       ExprResult result = CheckPlaceholderExpr(InitArgList[I]);
 
       // Ignore failures; dropping the entire initializer list because
       // of one failure would be terrible for indexing/etc.
       if (result.isInvalid()) continue;
 
       InitArgList[I] = result.get();
     }
   }
 
   // Semantic analysis for initializers is done by ActOnDeclarator() and
   // CheckInitializer() - it requires knowledge of the object being intialized.
 
   InitListExpr *E = new (Context) InitListExpr(Context, LBraceLoc, InitArgList,
                                                RBraceLoc);
   E->setType(Context.VoidTy); // FIXME: just a place holder for now.
   return E;
 }
 
 /// Do an explicit extend of the given block pointer if we're in ARC.
 void Sema::maybeExtendBlockObject(ExprResult &E) {
   assert(E.get()->getType()->isBlockPointerType());
   assert(E.get()->isRValue());
 
   // Only do this in an r-value context.
   if (!getLangOpts().ObjCAutoRefCount) return;
 
   E = ImplicitCastExpr::Create(Context, E.get()->getType(),
                                CK_ARCExtendBlockObject, E.get(),
                                /*base path*/ nullptr, VK_RValue);
   Cleanup.setExprNeedsCleanups(true);
 }
 
 /// Prepare a conversion of the given expression to an ObjC object
 /// pointer type.
 CastKind Sema::PrepareCastToObjCObjectPointer(ExprResult &E) {
   QualType type = E.get()->getType();
   if (type->isObjCObjectPointerType()) {
     return CK_BitCast;
   } else if (type->isBlockPointerType()) {
     maybeExtendBlockObject(E);
     return CK_BlockPointerToObjCPointerCast;
   } else {
     assert(type->isPointerType());
     return CK_CPointerToObjCPointerCast;
   }
 }
 
 /// Prepares for a scalar cast, performing all the necessary stages
 /// except the final cast and returning the kind required.
 CastKind Sema::PrepareScalarCast(ExprResult &Src, QualType DestTy) {
   // Both Src and Dest are scalar types, i.e. arithmetic or pointer.
   // Also, callers should have filtered out the invalid cases with
   // pointers.  Everything else should be possible.
 
   QualType SrcTy = Src.get()->getType();
   if (Context.hasSameUnqualifiedType(SrcTy, DestTy))
     return CK_NoOp;
 
   switch (Type::ScalarTypeKind SrcKind = SrcTy->getScalarTypeKind()) {
   case Type::STK_MemberPointer:
     llvm_unreachable("member pointer type in C");
 
   case Type::STK_CPointer:
   case Type::STK_BlockPointer:
   case Type::STK_ObjCObjectPointer:
     switch (DestTy->getScalarTypeKind()) {
     case Type::STK_CPointer: {
       LangAS SrcAS = SrcTy->getPointeeType().getAddressSpace();
       LangAS DestAS = DestTy->getPointeeType().getAddressSpace();
       if (SrcAS != DestAS)
         return CK_AddressSpaceConversion;
       return CK_BitCast;
     }
     case Type::STK_BlockPointer:
       return (SrcKind == Type::STK_BlockPointer
                 ? CK_BitCast : CK_AnyPointerToBlockPointerCast);
     case Type::STK_ObjCObjectPointer:
       if (SrcKind == Type::STK_ObjCObjectPointer)
         return CK_BitCast;
       if (SrcKind == Type::STK_CPointer)
         return CK_CPointerToObjCPointerCast;
       maybeExtendBlockObject(Src);
       return CK_BlockPointerToObjCPointerCast;
     case Type::STK_Bool:
       return CK_PointerToBoolean;
     case Type::STK_Integral:
       return CK_PointerToIntegral;
     case Type::STK_Floating:
     case Type::STK_FloatingComplex:
     case Type::STK_IntegralComplex:
     case Type::STK_MemberPointer:
       llvm_unreachable("illegal cast from pointer");
     }
     llvm_unreachable("Should have returned before this");
 
   case Type::STK_Bool: // casting from bool is like casting from an integer
   case Type::STK_Integral:
     switch (DestTy->getScalarTypeKind()) {
     case Type::STK_CPointer:
     case Type::STK_ObjCObjectPointer:
     case Type::STK_BlockPointer:
       if (Src.get()->isNullPointerConstant(Context,
                                            Expr::NPC_ValueDependentIsNull))
         return CK_NullToPointer;
       return CK_IntegralToPointer;
     case Type::STK_Bool:
       return CK_IntegralToBoolean;
     case Type::STK_Integral:
       return CK_IntegralCast;
     case Type::STK_Floating:
       return CK_IntegralToFloating;
     case Type::STK_IntegralComplex:
       Src = ImpCastExprToType(Src.get(),
                       DestTy->castAs<ComplexType>()->getElementType(),
                       CK_IntegralCast);
       return CK_IntegralRealToComplex;
     case Type::STK_FloatingComplex:
       Src = ImpCastExprToType(Src.get(),
                       DestTy->castAs<ComplexType>()->getElementType(),
                       CK_IntegralToFloating);
       return CK_FloatingRealToComplex;
     case Type::STK_MemberPointer:
       llvm_unreachable("member pointer type in C");
     }
     llvm_unreachable("Should have returned before this");
 
   case Type::STK_Floating:
     switch (DestTy->getScalarTypeKind()) {
     case Type::STK_Floating:
       return CK_FloatingCast;
     case Type::STK_Bool:
       return CK_FloatingToBoolean;
     case Type::STK_Integral:
       return CK_FloatingToIntegral;
     case Type::STK_FloatingComplex:
       Src = ImpCastExprToType(Src.get(),
                               DestTy->castAs<ComplexType>()->getElementType(),
                               CK_FloatingCast);
       return CK_FloatingRealToComplex;
     case Type::STK_IntegralComplex:
       Src = ImpCastExprToType(Src.get(),
                               DestTy->castAs<ComplexType>()->getElementType(),
                               CK_FloatingToIntegral);
       return CK_IntegralRealToComplex;
     case Type::STK_CPointer:
     case Type::STK_ObjCObjectPointer:
     case Type::STK_BlockPointer:
       llvm_unreachable("valid float->pointer cast?");
     case Type::STK_MemberPointer:
       llvm_unreachable("member pointer type in C");
     }
     llvm_unreachable("Should have returned before this");
 
   case Type::STK_FloatingComplex:
     switch (DestTy->getScalarTypeKind()) {
     case Type::STK_FloatingComplex:
       return CK_FloatingComplexCast;
     case Type::STK_IntegralComplex:
       return CK_FloatingComplexToIntegralComplex;
     case Type::STK_Floating: {
       QualType ET = SrcTy->castAs<ComplexType>()->getElementType();
       if (Context.hasSameType(ET, DestTy))
         return CK_FloatingComplexToReal;
       Src = ImpCastExprToType(Src.get(), ET, CK_FloatingComplexToReal);
       return CK_FloatingCast;
     }
     case Type::STK_Bool:
       return CK_FloatingComplexToBoolean;
     case Type::STK_Integral:
       Src = ImpCastExprToType(Src.get(),
                               SrcTy->castAs<ComplexType>()->getElementType(),
                               CK_FloatingComplexToReal);
       return CK_FloatingToIntegral;
     case Type::STK_CPointer:
     case Type::STK_ObjCObjectPointer:
     case Type::STK_BlockPointer:
       llvm_unreachable("valid complex float->pointer cast?");
     case Type::STK_MemberPointer:
       llvm_unreachable("member pointer type in C");
     }
     llvm_unreachable("Should have returned before this");
 
   case Type::STK_IntegralComplex:
     switch (DestTy->getScalarTypeKind()) {
     case Type::STK_FloatingComplex:
       return CK_IntegralComplexToFloatingComplex;
     case Type::STK_IntegralComplex:
       return CK_IntegralComplexCast;
     case Type::STK_Integral: {
       QualType ET = SrcTy->castAs<ComplexType>()->getElementType();
       if (Context.hasSameType(ET, DestTy))
         return CK_IntegralComplexToReal;
       Src = ImpCastExprToType(Src.get(), ET, CK_IntegralComplexToReal);
       return CK_IntegralCast;
     }
     case Type::STK_Bool:
       return CK_IntegralComplexToBoolean;
     case Type::STK_Floating:
       Src = ImpCastExprToType(Src.get(),
                               SrcTy->castAs<ComplexType>()->getElementType(),
                               CK_IntegralComplexToReal);
       return CK_IntegralToFloating;
     case Type::STK_CPointer:
     case Type::STK_ObjCObjectPointer:
     case Type::STK_BlockPointer:
       llvm_unreachable("valid complex int->pointer cast?");
     case Type::STK_MemberPointer:
       llvm_unreachable("member pointer type in C");
     }
     llvm_unreachable("Should have returned before this");
   }
 
   llvm_unreachable("Unhandled scalar cast");
 }
 
 static bool breakDownVectorType(QualType type, uint64_t &len,
                                 QualType &eltType) {
   // Vectors are simple.
   if (const VectorType *vecType = type->getAs<VectorType>()) {
     len = vecType->getNumElements();
     eltType = vecType->getElementType();
     assert(eltType->isScalarType());
     return true;
   }
   
   // We allow lax conversion to and from non-vector types, but only if
   // they're real types (i.e. non-complex, non-pointer scalar types).
   if (!type->isRealType()) return false;
   
   len = 1;
   eltType = type;
   return true;
 }
 
 /// Are the two types lax-compatible vector types?  That is, given
 /// that one of them is a vector, do they have equal storage sizes,
 /// where the storage size is the number of elements times the element
 /// size?
 ///
 /// This will also return false if either of the types is neither a
 /// vector nor a real type.
 bool Sema::areLaxCompatibleVectorTypes(QualType srcTy, QualType destTy) {
   assert(destTy->isVectorType() || srcTy->isVectorType());
   
   // Disallow lax conversions between scalars and ExtVectors (these
   // conversions are allowed for other vector types because common headers
   // depend on them).  Most scalar OP ExtVector cases are handled by the
   // splat path anyway, which does what we want (convert, not bitcast).
   // What this rules out for ExtVectors is crazy things like char4*float.
   if (srcTy->isScalarType() && destTy->isExtVectorType()) return false;
   if (destTy->isScalarType() && srcTy->isExtVectorType()) return false;
 
   uint64_t srcLen, destLen;
   QualType srcEltTy, destEltTy;
   if (!breakDownVectorType(srcTy, srcLen, srcEltTy)) return false;
   if (!breakDownVectorType(destTy, destLen, destEltTy)) return false;
   
   // ASTContext::getTypeSize will return the size rounded up to a
   // power of 2, so instead of using that, we need to use the raw
   // element size multiplied by the element count.
   uint64_t srcEltSize = Context.getTypeSize(srcEltTy);
   uint64_t destEltSize = Context.getTypeSize(destEltTy);
   
   return (srcLen * srcEltSize == destLen * destEltSize);
 }
 
 /// Is this a legal conversion between two types, one of which is
 /// known to be a vector type?
 bool Sema::isLaxVectorConversion(QualType srcTy, QualType destTy) {
   assert(destTy->isVectorType() || srcTy->isVectorType());
   
   if (!Context.getLangOpts().LaxVectorConversions)
     return false;
   return areLaxCompatibleVectorTypes(srcTy, destTy);
 }
 
 bool Sema::CheckVectorCast(SourceRange R, QualType VectorTy, QualType Ty,
                            CastKind &Kind) {
   assert(VectorTy->isVectorType() && "Not a vector type!");
 
   if (Ty->isVectorType() || Ty->isIntegralType(Context)) {
     if (!areLaxCompatibleVectorTypes(Ty, VectorTy))
       return Diag(R.getBegin(),
                   Ty->isVectorType() ?
                   diag::err_invalid_conversion_between_vectors :
                   diag::err_invalid_conversion_between_vector_and_integer)
         << VectorTy << Ty << R;
   } else
     return Diag(R.getBegin(),
                 diag::err_invalid_conversion_between_vector_and_scalar)
       << VectorTy << Ty << R;
 
   Kind = CK_BitCast;
   return false;
 }
 
 ExprResult Sema::prepareVectorSplat(QualType VectorTy, Expr *SplattedExpr) {
   QualType DestElemTy = VectorTy->castAs<VectorType>()->getElementType();
 
   if (DestElemTy == SplattedExpr->getType())
     return SplattedExpr;
 
   assert(DestElemTy->isFloatingType() ||
          DestElemTy->isIntegralOrEnumerationType());
 
   CastKind CK;
   if (VectorTy->isExtVectorType() && SplattedExpr->getType()->isBooleanType()) {
     // OpenCL requires that we convert `true` boolean expressions to -1, but
     // only when splatting vectors.
     if (DestElemTy->isFloatingType()) {
       // To avoid having to have a CK_BooleanToSignedFloating cast kind, we cast
       // in two steps: boolean to signed integral, then to floating.
       ExprResult CastExprRes = ImpCastExprToType(SplattedExpr, Context.IntTy,
                                                  CK_BooleanToSignedIntegral);
       SplattedExpr = CastExprRes.get();
       CK = CK_IntegralToFloating;
     } else {
       CK = CK_BooleanToSignedIntegral;
     }
   } else {
     ExprResult CastExprRes = SplattedExpr;
     CK = PrepareScalarCast(CastExprRes, DestElemTy);
     if (CastExprRes.isInvalid())
       return ExprError();
     SplattedExpr = CastExprRes.get();
   }
   return ImpCastExprToType(SplattedExpr, DestElemTy, CK);
 }
 
 ExprResult Sema::CheckExtVectorCast(SourceRange R, QualType DestTy,
                                     Expr *CastExpr, CastKind &Kind) {
   assert(DestTy->isExtVectorType() && "Not an extended vector type!");
 
   QualType SrcTy = CastExpr->getType();
 
   // If SrcTy is a VectorType, the total size must match to explicitly cast to
   // an ExtVectorType.
   // In OpenCL, casts between vectors of different types are not allowed.
   // (See OpenCL 6.2).
   if (SrcTy->isVectorType()) {
     if (!areLaxCompatibleVectorTypes(SrcTy, DestTy) ||
         (getLangOpts().OpenCL &&
          !Context.hasSameUnqualifiedType(DestTy, SrcTy))) {
       Diag(R.getBegin(),diag::err_invalid_conversion_between_ext_vectors)
         << DestTy << SrcTy << R;
       return ExprError();
     }
     Kind = CK_BitCast;
     return CastExpr;
   }
 
   // All non-pointer scalars can be cast to ExtVector type.  The appropriate
   // conversion will take place first from scalar to elt type, and then
   // splat from elt type to vector.
   if (SrcTy->isPointerType())
     return Diag(R.getBegin(),
                 diag::err_invalid_conversion_between_vector_and_scalar)
       << DestTy << SrcTy << R;
 
   Kind = CK_VectorSplat;
   return prepareVectorSplat(DestTy, CastExpr);
 }
 
 ExprResult
 Sema::ActOnCastExpr(Scope *S, SourceLocation LParenLoc,
                     Declarator &D, ParsedType &Ty,
                     SourceLocation RParenLoc, Expr *CastExpr) {
   assert(!D.isInvalidType() && (CastExpr != nullptr) &&
          "ActOnCastExpr(): missing type or expr");
 
   TypeSourceInfo *castTInfo = GetTypeForDeclaratorCast(D, CastExpr->getType());
   if (D.isInvalidType())
     return ExprError();
 
   if (getLangOpts().CPlusPlus) {
     // Check that there are no default arguments (C++ only).
     CheckExtraCXXDefaultArguments(D);
   } else {
     // Make sure any TypoExprs have been dealt with.
     ExprResult Res = CorrectDelayedTyposInExpr(CastExpr);
     if (!Res.isUsable())
       return ExprError();
     CastExpr = Res.get();
   }
 
   checkUnusedDeclAttributes(D);
 
   QualType castType = castTInfo->getType();
   Ty = CreateParsedType(castType, castTInfo);
 
   bool isVectorLiteral = false;
 
   // Check for an altivec or OpenCL literal,
   // i.e. all the elements are integer constants.
   ParenExpr *PE = dyn_cast<ParenExpr>(CastExpr);
   ParenListExpr *PLE = dyn_cast<ParenListExpr>(CastExpr);
   if ((getLangOpts().AltiVec || getLangOpts().ZVector || getLangOpts().OpenCL)
        && castType->isVectorType() && (PE || PLE)) {
     if (PLE && PLE->getNumExprs() == 0) {
       Diag(PLE->getExprLoc(), diag::err_altivec_empty_initializer);
       return ExprError();
     }
     if (PE || PLE->getNumExprs() == 1) {
       Expr *E = (PE ? PE->getSubExpr() : PLE->getExpr(0));
       if (!E->getType()->isVectorType())
         isVectorLiteral = true;
     }
     else
       isVectorLiteral = true;
   }
 
   // If this is a vector initializer, '(' type ')' '(' init, ..., init ')'
   // then handle it as such.
   if (isVectorLiteral)
     return BuildVectorLiteral(LParenLoc, RParenLoc, CastExpr, castTInfo);
 
   // If the Expr being casted is a ParenListExpr, handle it specially.
   // This is not an AltiVec-style cast, so turn the ParenListExpr into a
   // sequence of BinOp comma operators.
   if (isa<ParenListExpr>(CastExpr)) {
     ExprResult Result = MaybeConvertParenListExprToParenExpr(S, CastExpr);
     if (Result.isInvalid()) return ExprError();
     CastExpr = Result.get();
   }
 
   if (getLangOpts().CPlusPlus && !castType->isVoidType() &&
       !getSourceManager().isInSystemMacro(LParenLoc))
     Diag(LParenLoc, diag::warn_old_style_cast) << CastExpr->getSourceRange();
   
   CheckTollFreeBridgeCast(castType, CastExpr);
   
   CheckObjCBridgeRelatedCast(castType, CastExpr);
 
   DiscardMisalignedMemberAddress(castType.getTypePtr(), CastExpr);
 
   return BuildCStyleCastExpr(LParenLoc, castTInfo, RParenLoc, CastExpr);
 }
 
 ExprResult Sema::BuildVectorLiteral(SourceLocation LParenLoc,
                                     SourceLocation RParenLoc, Expr *E,
                                     TypeSourceInfo *TInfo) {
   assert((isa<ParenListExpr>(E) || isa<ParenExpr>(E)) &&
          "Expected paren or paren list expression");
 
   Expr **exprs;
   unsigned numExprs;
   Expr *subExpr;
   SourceLocation LiteralLParenLoc, LiteralRParenLoc;
   if (ParenListExpr *PE = dyn_cast<ParenListExpr>(E)) {
     LiteralLParenLoc = PE->getLParenLoc();
     LiteralRParenLoc = PE->getRParenLoc();
     exprs = PE->getExprs();
     numExprs = PE->getNumExprs();
   } else { // isa<ParenExpr> by assertion at function entrance
     LiteralLParenLoc = cast<ParenExpr>(E)->getLParen();
     LiteralRParenLoc = cast<ParenExpr>(E)->getRParen();
     subExpr = cast<ParenExpr>(E)->getSubExpr();
     exprs = &subExpr;
     numExprs = 1;
   }
 
   QualType Ty = TInfo->getType();
   assert(Ty->isVectorType() && "Expected vector type");
 
   SmallVector<Expr *, 8> initExprs;
   const VectorType *VTy = Ty->getAs<VectorType>();
   unsigned numElems = Ty->getAs<VectorType>()->getNumElements();
   
   // '(...)' form of vector initialization in AltiVec: the number of
   // initializers must be one or must match the size of the vector.
   // If a single value is specified in the initializer then it will be
   // replicated to all the components of the vector
   if (VTy->getVectorKind() == VectorType::AltiVecVector) {
     // The number of initializers must be one or must match the size of the
     // vector. If a single value is specified in the initializer then it will
     // be replicated to all the components of the vector
     if (numExprs == 1) {
       QualType ElemTy = Ty->getAs<VectorType>()->getElementType();
       ExprResult Literal = DefaultLvalueConversion(exprs[0]);
       if (Literal.isInvalid())
         return ExprError();
       Literal = ImpCastExprToType(Literal.get(), ElemTy,
                                   PrepareScalarCast(Literal, ElemTy));
       return BuildCStyleCastExpr(LParenLoc, TInfo, RParenLoc, Literal.get());
     }
     else if (numExprs < numElems) {
       Diag(E->getExprLoc(),
            diag::err_incorrect_number_of_vector_initializers);
       return ExprError();
     }
     else
       initExprs.append(exprs, exprs + numExprs);
   }
   else {
     // For OpenCL, when the number of initializers is a single value,
     // it will be replicated to all components of the vector.
     if (getLangOpts().OpenCL &&
         VTy->getVectorKind() == VectorType::GenericVector &&
         numExprs == 1) {
         QualType ElemTy = Ty->getAs<VectorType>()->getElementType();
         ExprResult Literal = DefaultLvalueConversion(exprs[0]);
         if (Literal.isInvalid())
           return ExprError();
         Literal = ImpCastExprToType(Literal.get(), ElemTy,
                                     PrepareScalarCast(Literal, ElemTy));
         return BuildCStyleCastExpr(LParenLoc, TInfo, RParenLoc, Literal.get());
     }
     
     initExprs.append(exprs, exprs + numExprs);
   }
   // FIXME: This means that pretty-printing the final AST will produce curly
   // braces instead of the original commas.
   InitListExpr *initE = new (Context) InitListExpr(Context, LiteralLParenLoc,
                                                    initExprs, LiteralRParenLoc);
   initE->setType(Ty);
   return BuildCompoundLiteralExpr(LParenLoc, TInfo, RParenLoc, initE);
 }
 
 /// This is not an AltiVec-style cast or or C++ direct-initialization, so turn
 /// the ParenListExpr into a sequence of comma binary operators.
 ExprResult
 Sema::MaybeConvertParenListExprToParenExpr(Scope *S, Expr *OrigExpr) {
   ParenListExpr *E = dyn_cast<ParenListExpr>(OrigExpr);
   if (!E)
     return OrigExpr;
 
   ExprResult Result(E->getExpr(0));
 
   for (unsigned i = 1, e = E->getNumExprs(); i != e && !Result.isInvalid(); ++i)
     Result = ActOnBinOp(S, E->getExprLoc(), tok::comma, Result.get(),
                         E->getExpr(i));
 
   if (Result.isInvalid()) return ExprError();
 
   return ActOnParenExpr(E->getLParenLoc(), E->getRParenLoc(), Result.get());
 }
 
 ExprResult Sema::ActOnParenListExpr(SourceLocation L,
                                     SourceLocation R,
                                     MultiExprArg Val) {
   Expr *expr = new (Context) ParenListExpr(Context, L, Val, R);
   return expr;
 }
 
 /// \brief Emit a specialized diagnostic when one expression is a null pointer
 /// constant and the other is not a pointer.  Returns true if a diagnostic is
 /// emitted.
 bool Sema::DiagnoseConditionalForNull(Expr *LHSExpr, Expr *RHSExpr,
                                       SourceLocation QuestionLoc) {
   Expr *NullExpr = LHSExpr;
   Expr *NonPointerExpr = RHSExpr;
   Expr::NullPointerConstantKind NullKind =
       NullExpr->isNullPointerConstant(Context,
                                       Expr::NPC_ValueDependentIsNotNull);
 
   if (NullKind == Expr::NPCK_NotNull) {
     NullExpr = RHSExpr;
     NonPointerExpr = LHSExpr;
     NullKind =
         NullExpr->isNullPointerConstant(Context,
                                         Expr::NPC_ValueDependentIsNotNull);
   }
 
   if (NullKind == Expr::NPCK_NotNull)
     return false;
 
   if (NullKind == Expr::NPCK_ZeroExpression)
     return false;
 
   if (NullKind == Expr::NPCK_ZeroLiteral) {
     // In this case, check to make sure that we got here from a "NULL"
     // string in the source code.
     NullExpr = NullExpr->IgnoreParenImpCasts();
     SourceLocation loc = NullExpr->getExprLoc();
     if (!findMacroSpelling(loc, "NULL"))
       return false;
   }
 
   int DiagType = (NullKind == Expr::NPCK_CXX11_nullptr);
   Diag(QuestionLoc, diag::err_typecheck_cond_incompatible_operands_null)
       << NonPointerExpr->getType() << DiagType
       << NonPointerExpr->getSourceRange();
   return true;
 }
 
 /// \brief Return false if the condition expression is valid, true otherwise.
 static bool checkCondition(Sema &S, Expr *Cond, SourceLocation QuestionLoc) {
   QualType CondTy = Cond->getType();
 
   // OpenCL v1.1 s6.3.i says the condition cannot be a floating point type.
   if (S.getLangOpts().OpenCL && CondTy->isFloatingType()) {
     S.Diag(QuestionLoc, diag::err_typecheck_cond_expect_nonfloat)
       << CondTy << Cond->getSourceRange();
     return true;
   }
 
   // C99 6.5.15p2
   if (CondTy->isScalarType()) return false;
 
   S.Diag(QuestionLoc, diag::err_typecheck_cond_expect_scalar)
     << CondTy << Cond->getSourceRange();
   return true;
 }
 
 /// \brief Handle when one or both operands are void type.
 static QualType checkConditionalVoidType(Sema &S, ExprResult &LHS,
                                          ExprResult &RHS) {
     Expr *LHSExpr = LHS.get();
     Expr *RHSExpr = RHS.get();
 
     if (!LHSExpr->getType()->isVoidType())
       S.Diag(RHSExpr->getLocStart(), diag::ext_typecheck_cond_one_void)
         << RHSExpr->getSourceRange();
     if (!RHSExpr->getType()->isVoidType())
       S.Diag(LHSExpr->getLocStart(), diag::ext_typecheck_cond_one_void)
         << LHSExpr->getSourceRange();
     LHS = S.ImpCastExprToType(LHS.get(), S.Context.VoidTy, CK_ToVoid);
     RHS = S.ImpCastExprToType(RHS.get(), S.Context.VoidTy, CK_ToVoid);
     return S.Context.VoidTy;
 }
 
 /// \brief Return false if the NullExpr can be promoted to PointerTy,
 /// true otherwise.
 static bool checkConditionalNullPointer(Sema &S, ExprResult &NullExpr,
                                         QualType PointerTy) {
   if ((!PointerTy->isAnyPointerType() && !PointerTy->isBlockPointerType()) ||
       !NullExpr.get()->isNullPointerConstant(S.Context,
                                             Expr::NPC_ValueDependentIsNull))
     return true;
 
   NullExpr = S.ImpCastExprToType(NullExpr.get(), PointerTy, CK_NullToPointer);
   return false;
 }
 
 /// \brief Checks compatibility between two pointers and return the resulting
 /// type.
 static QualType checkConditionalPointerCompatibility(Sema &S, ExprResult &LHS,
                                                      ExprResult &RHS,
                                                      SourceLocation Loc) {
   QualType LHSTy = LHS.get()->getType();
   QualType RHSTy = RHS.get()->getType();
 
   if (S.Context.hasSameType(LHSTy, RHSTy)) {
     // Two identical pointers types are always compatible.
     return LHSTy;
   }
 
   QualType lhptee, rhptee;
 
   // Get the pointee types.
   bool IsBlockPointer = false;
   if (const BlockPointerType *LHSBTy = LHSTy->getAs<BlockPointerType>()) {
     lhptee = LHSBTy->getPointeeType();
     rhptee = RHSTy->castAs<BlockPointerType>()->getPointeeType();
     IsBlockPointer = true;
   } else {
     lhptee = LHSTy->castAs<PointerType>()->getPointeeType();
     rhptee = RHSTy->castAs<PointerType>()->getPointeeType();
   }
 
   // C99 6.5.15p6: If both operands are pointers to compatible types or to
   // differently qualified versions of compatible types, the result type is
   // a pointer to an appropriately qualified version of the composite
   // type.
 
   // Only CVR-qualifiers exist in the standard, and the differently-qualified
   // clause doesn't make sense for our extensions. E.g. address space 2 should
   // be incompatible with address space 3: they may live on different devices or
   // anything.
   Qualifiers lhQual = lhptee.getQualifiers();
   Qualifiers rhQual = rhptee.getQualifiers();
 
   LangAS ResultAddrSpace = LangAS::Default;
   LangAS LAddrSpace = lhQual.getAddressSpace();
   LangAS RAddrSpace = rhQual.getAddressSpace();
   if (S.getLangOpts().OpenCL) {
     // OpenCL v1.1 s6.5 - Conversion between pointers to distinct address
     // spaces is disallowed.
     if (lhQual.isAddressSpaceSupersetOf(rhQual))
       ResultAddrSpace = LAddrSpace;
     else if (rhQual.isAddressSpaceSupersetOf(lhQual))
       ResultAddrSpace = RAddrSpace;
     else {
       S.Diag(Loc,
              diag::err_typecheck_op_on_nonoverlapping_address_space_pointers)
           << LHSTy << RHSTy << 2 << LHS.get()->getSourceRange()
           << RHS.get()->getSourceRange();
       return QualType();
     }
   }
 
   unsigned MergedCVRQual = lhQual.getCVRQualifiers() | rhQual.getCVRQualifiers();
   auto LHSCastKind = CK_BitCast, RHSCastKind = CK_BitCast;
   lhQual.removeCVRQualifiers();
   rhQual.removeCVRQualifiers();
 
   // OpenCL v2.0 specification doesn't extend compatibility of type qualifiers
   // (C99 6.7.3) for address spaces. We assume that the check should behave in
   // the same manner as it's defined for CVR qualifiers, so for OpenCL two
   // qual types are compatible iff
   //  * corresponded types are compatible
   //  * CVR qualifiers are equal
   //  * address spaces are equal
   // Thus for conditional operator we merge CVR and address space unqualified
   // pointees and if there is a composite type we return a pointer to it with
   // merged qualifiers.
   if (S.getLangOpts().OpenCL) {
     LHSCastKind = LAddrSpace == ResultAddrSpace
                       ? CK_BitCast
                       : CK_AddressSpaceConversion;
     RHSCastKind = RAddrSpace == ResultAddrSpace
                       ? CK_BitCast
                       : CK_AddressSpaceConversion;
     lhQual.removeAddressSpace();
     rhQual.removeAddressSpace();
   }
 
   lhptee = S.Context.getQualifiedType(lhptee.getUnqualifiedType(), lhQual);
   rhptee = S.Context.getQualifiedType(rhptee.getUnqualifiedType(), rhQual);
 
   QualType CompositeTy = S.Context.mergeTypes(lhptee, rhptee);
 
   if (CompositeTy.isNull()) {
     // In this situation, we assume void* type. No especially good
     // reason, but this is what gcc does, and we do have to pick
     // to get a consistent AST.
     QualType incompatTy;
     incompatTy = S.Context.getPointerType(
         S.Context.getAddrSpaceQualType(S.Context.VoidTy, ResultAddrSpace));
     LHS = S.ImpCastExprToType(LHS.get(), incompatTy, LHSCastKind);
     RHS = S.ImpCastExprToType(RHS.get(), incompatTy, RHSCastKind);
     // FIXME: For OpenCL the warning emission and cast to void* leaves a room
     // for casts between types with incompatible address space qualifiers.
     // For the following code the compiler produces casts between global and
     // local address spaces of the corresponded innermost pointees:
     // local int *global *a;
     // global int *global *b;
     // a = (0 ? a : b); // see C99 6.5.16.1.p1.
     S.Diag(Loc, diag::ext_typecheck_cond_incompatible_pointers)
         << LHSTy << RHSTy << LHS.get()->getSourceRange()
         << RHS.get()->getSourceRange();
     return incompatTy;
   }
 
   // The pointer types are compatible.
   // In case of OpenCL ResultTy should have the address space qualifier
   // which is a superset of address spaces of both the 2nd and the 3rd
   // operands of the conditional operator.
   QualType ResultTy = [&, ResultAddrSpace]() {
     if (S.getLangOpts().OpenCL) {
       Qualifiers CompositeQuals = CompositeTy.getQualifiers();
       CompositeQuals.setAddressSpace(ResultAddrSpace);
       return S.Context
           .getQualifiedType(CompositeTy.getUnqualifiedType(), CompositeQuals)
           .withCVRQualifiers(MergedCVRQual);
     }
     return CompositeTy.withCVRQualifiers(MergedCVRQual);
   }();
   if (IsBlockPointer)
     ResultTy = S.Context.getBlockPointerType(ResultTy);
   else
     ResultTy = S.Context.getPointerType(ResultTy);
 
   LHS = S.ImpCastExprToType(LHS.get(), ResultTy, LHSCastKind);
   RHS = S.ImpCastExprToType(RHS.get(), ResultTy, RHSCastKind);
   return ResultTy;
 }
 
 /// \brief Return the resulting type when the operands are both block pointers.
 static QualType checkConditionalBlockPointerCompatibility(Sema &S,
                                                           ExprResult &LHS,
                                                           ExprResult &RHS,
                                                           SourceLocation Loc) {
   QualType LHSTy = LHS.get()->getType();
   QualType RHSTy = RHS.get()->getType();
 
   if (!LHSTy->isBlockPointerType() || !RHSTy->isBlockPointerType()) {
     if (LHSTy->isVoidPointerType() || RHSTy->isVoidPointerType()) {
       QualType destType = S.Context.getPointerType(S.Context.VoidTy);
       LHS = S.ImpCastExprToType(LHS.get(), destType, CK_BitCast);
       RHS = S.ImpCastExprToType(RHS.get(), destType, CK_BitCast);
       return destType;
     }
     S.Diag(Loc, diag::err_typecheck_cond_incompatible_operands)
       << LHSTy << RHSTy << LHS.get()->getSourceRange()
       << RHS.get()->getSourceRange();
     return QualType();
   }
 
   // We have 2 block pointer types.
   return checkConditionalPointerCompatibility(S, LHS, RHS, Loc);
 }
 
 /// \brief Return the resulting type when the operands are both pointers.
 static QualType
 checkConditionalObjectPointersCompatibility(Sema &S, ExprResult &LHS,
                                             ExprResult &RHS,
                                             SourceLocation Loc) {
   // get the pointer types
   QualType LHSTy = LHS.get()->getType();
   QualType RHSTy = RHS.get()->getType();
 
   // get the "pointed to" types
   QualType lhptee = LHSTy->getAs<PointerType>()->getPointeeType();
   QualType rhptee = RHSTy->getAs<PointerType>()->getPointeeType();
 
   // ignore qualifiers on void (C99 6.5.15p3, clause 6)
   if (lhptee->isVoidType() && rhptee->isIncompleteOrObjectType()) {
     // Figure out necessary qualifiers (C99 6.5.15p6)
     QualType destPointee
       = S.Context.getQualifiedType(lhptee, rhptee.getQualifiers());
     QualType destType = S.Context.getPointerType(destPointee);
     // Add qualifiers if necessary.
     LHS = S.ImpCastExprToType(LHS.get(), destType, CK_NoOp);
     // Promote to void*.
     RHS = S.ImpCastExprToType(RHS.get(), destType, CK_BitCast);
     return destType;
   }
   if (rhptee->isVoidType() && lhptee->isIncompleteOrObjectType()) {
     QualType destPointee
       = S.Context.getQualifiedType(rhptee, lhptee.getQualifiers());
     QualType destType = S.Context.getPointerType(destPointee);
     // Add qualifiers if necessary.
     RHS = S.ImpCastExprToType(RHS.get(), destType, CK_NoOp);
     // Promote to void*.
     LHS = S.ImpCastExprToType(LHS.get(), destType, CK_BitCast);
     return destType;
   }
 
   return checkConditionalPointerCompatibility(S, LHS, RHS, Loc);
 }
 
 /// \brief Return false if the first expression is not an integer and the second
 /// expression is not a pointer, true otherwise.
 static bool checkPointerIntegerMismatch(Sema &S, ExprResult &Int,
                                         Expr* PointerExpr, SourceLocation Loc,
                                         bool IsIntFirstExpr) {
   if (!PointerExpr->getType()->isPointerType() ||
       !Int.get()->getType()->isIntegerType())
     return false;
 
   Expr *Expr1 = IsIntFirstExpr ? Int.get() : PointerExpr;
   Expr *Expr2 = IsIntFirstExpr ? PointerExpr : Int.get();
 
   S.Diag(Loc, diag::ext_typecheck_cond_pointer_integer_mismatch)
     << Expr1->getType() << Expr2->getType()
     << Expr1->getSourceRange() << Expr2->getSourceRange();
   Int = S.ImpCastExprToType(Int.get(), PointerExpr->getType(),
                             CK_IntegralToPointer);
   return true;
 }
 
 /// \brief Simple conversion between integer and floating point types.
 ///
 /// Used when handling the OpenCL conditional operator where the
 /// condition is a vector while the other operands are scalar.
 ///
 /// OpenCL v1.1 s6.3.i and s6.11.6 together require that the scalar
 /// types are either integer or floating type. Between the two
 /// operands, the type with the higher rank is defined as the "result
 /// type". The other operand needs to be promoted to the same type. No
 /// other type promotion is allowed. We cannot use
 /// UsualArithmeticConversions() for this purpose, since it always
 /// promotes promotable types.
 static QualType OpenCLArithmeticConversions(Sema &S, ExprResult &LHS,
                                             ExprResult &RHS,
                                             SourceLocation QuestionLoc) {
   LHS = S.DefaultFunctionArrayLvalueConversion(LHS.get());
   if (LHS.isInvalid())
     return QualType();
   RHS = S.DefaultFunctionArrayLvalueConversion(RHS.get());
   if (RHS.isInvalid())
     return QualType();
 
   // For conversion purposes, we ignore any qualifiers.
   // For example, "const float" and "float" are equivalent.
   QualType LHSType =
     S.Context.getCanonicalType(LHS.get()->getType()).getUnqualifiedType();
   QualType RHSType =
     S.Context.getCanonicalType(RHS.get()->getType()).getUnqualifiedType();
 
   if (!LHSType->isIntegerType() && !LHSType->isRealFloatingType()) {
     S.Diag(QuestionLoc, diag::err_typecheck_cond_expect_int_float)
       << LHSType << LHS.get()->getSourceRange();
     return QualType();
   }
 
   if (!RHSType->isIntegerType() && !RHSType->isRealFloatingType()) {
     S.Diag(QuestionLoc, diag::err_typecheck_cond_expect_int_float)
       << RHSType << RHS.get()->getSourceRange();
     return QualType();
   }
 
   // If both types are identical, no conversion is needed.
   if (LHSType == RHSType)
     return LHSType;
 
   // Now handle "real" floating types (i.e. float, double, long double).
   if (LHSType->isRealFloatingType() || RHSType->isRealFloatingType())
     return handleFloatConversion(S, LHS, RHS, LHSType, RHSType,
                                  /*IsCompAssign = */ false);
 
   // Finally, we have two differing integer types.
   return handleIntegerConversion<doIntegralCast, doIntegralCast>
   (S, LHS, RHS, LHSType, RHSType, /*IsCompAssign = */ false);
 }
 
 /// \brief Convert scalar operands to a vector that matches the
 ///        condition in length.
 ///
 /// Used when handling the OpenCL conditional operator where the
 /// condition is a vector while the other operands are scalar.
 ///
 /// We first compute the "result type" for the scalar operands
 /// according to OpenCL v1.1 s6.3.i. Both operands are then converted
 /// into a vector of that type where the length matches the condition
 /// vector type. s6.11.6 requires that the element types of the result
 /// and the condition must have the same number of bits.
 static QualType
 OpenCLConvertScalarsToVectors(Sema &S, ExprResult &LHS, ExprResult &RHS,
                               QualType CondTy, SourceLocation QuestionLoc) {
   QualType ResTy = OpenCLArithmeticConversions(S, LHS, RHS, QuestionLoc);
   if (ResTy.isNull()) return QualType();
 
   const VectorType *CV = CondTy->getAs<VectorType>();
   assert(CV);
 
   // Determine the vector result type
   unsigned NumElements = CV->getNumElements();
   QualType VectorTy = S.Context.getExtVectorType(ResTy, NumElements);
 
   // Ensure that all types have the same number of bits
   if (S.Context.getTypeSize(CV->getElementType())
       != S.Context.getTypeSize(ResTy)) {
     // Since VectorTy is created internally, it does not pretty print
     // with an OpenCL name. Instead, we just print a description.
     std::string EleTyName = ResTy.getUnqualifiedType().getAsString();
     SmallString<64> Str;
     llvm::raw_svector_ostream OS(Str);
     OS << "(vector of " << NumElements << " '" << EleTyName << "' values)";
     S.Diag(QuestionLoc, diag::err_conditional_vector_element_size)
       << CondTy << OS.str();
     return QualType();
   }
 
   // Convert operands to the vector result type
   LHS = S.ImpCastExprToType(LHS.get(), VectorTy, CK_VectorSplat);
   RHS = S.ImpCastExprToType(RHS.get(), VectorTy, CK_VectorSplat);
 
   return VectorTy;
 }
 
 /// \brief Return false if this is a valid OpenCL condition vector
 static bool checkOpenCLConditionVector(Sema &S, Expr *Cond,
                                        SourceLocation QuestionLoc) {
   // OpenCL v1.1 s6.11.6 says the elements of the vector must be of
   // integral type.
   const VectorType *CondTy = Cond->getType()->getAs<VectorType>();
   assert(CondTy);
   QualType EleTy = CondTy->getElementType();
   if (EleTy->isIntegerType()) return false;
 
   S.Diag(QuestionLoc, diag::err_typecheck_cond_expect_nonfloat)
     << Cond->getType() << Cond->getSourceRange();
   return true;
 }
 
 /// \brief Return false if the vector condition type and the vector
 ///        result type are compatible.
 ///
 /// OpenCL v1.1 s6.11.6 requires that both vector types have the same
 /// number of elements, and their element types have the same number
 /// of bits.
 static bool checkVectorResult(Sema &S, QualType CondTy, QualType VecResTy,
                               SourceLocation QuestionLoc) {
   const VectorType *CV = CondTy->getAs<VectorType>();
   const VectorType *RV = VecResTy->getAs<VectorType>();
   assert(CV && RV);
 
   if (CV->getNumElements() != RV->getNumElements()) {
     S.Diag(QuestionLoc, diag::err_conditional_vector_size)
       << CondTy << VecResTy;
     return true;
   }
 
   QualType CVE = CV->getElementType();
   QualType RVE = RV->getElementType();
 
   if (S.Context.getTypeSize(CVE) != S.Context.getTypeSize(RVE)) {
     S.Diag(QuestionLoc, diag::err_conditional_vector_element_size)
       << CondTy << VecResTy;
     return true;
   }
 
   return false;
 }
 
 /// \brief Return the resulting type for the conditional operator in
 ///        OpenCL (aka "ternary selection operator", OpenCL v1.1
 ///        s6.3.i) when the condition is a vector type.
 static QualType
 OpenCLCheckVectorConditional(Sema &S, ExprResult &Cond,
                              ExprResult &LHS, ExprResult &RHS,
                              SourceLocation QuestionLoc) {
   Cond = S.DefaultFunctionArrayLvalueConversion(Cond.get()); 
   if (Cond.isInvalid())
     return QualType();
   QualType CondTy = Cond.get()->getType();
 
   if (checkOpenCLConditionVector(S, Cond.get(), QuestionLoc))
     return QualType();
 
   // If either operand is a vector then find the vector type of the
   // result as specified in OpenCL v1.1 s6.3.i.
   if (LHS.get()->getType()->isVectorType() ||
       RHS.get()->getType()->isVectorType()) {
     QualType VecResTy = S.CheckVectorOperands(LHS, RHS, QuestionLoc,
                                               /*isCompAssign*/false,
                                               /*AllowBothBool*/true,
                                               /*AllowBoolConversions*/false);
     if (VecResTy.isNull()) return QualType();
     // The result type must match the condition type as specified in
     // OpenCL v1.1 s6.11.6.
     if (checkVectorResult(S, CondTy, VecResTy, QuestionLoc))
       return QualType();
     return VecResTy;
   }
 
   // Both operands are scalar.
   return OpenCLConvertScalarsToVectors(S, LHS, RHS, CondTy, QuestionLoc);
 }
 
 /// \brief Return true if the Expr is block type
 static bool checkBlockType(Sema &S, const Expr *E) {
   if (const CallExpr *CE = dyn_cast<CallExpr>(E)) {
     QualType Ty = CE->getCallee()->getType();
     if (Ty->isBlockPointerType()) {
       S.Diag(E->getExprLoc(), diag::err_opencl_ternary_with_block);
       return true;
     }
   }
   return false;
 }
 
 /// Note that LHS is not null here, even if this is the gnu "x ?: y" extension.
 /// In that case, LHS = cond.
 /// C99 6.5.15
 QualType Sema::CheckConditionalOperands(ExprResult &Cond, ExprResult &LHS,
                                         ExprResult &RHS, ExprValueKind &VK,
                                         ExprObjectKind &OK,
                                         SourceLocation QuestionLoc) {
 
   ExprResult LHSResult = CheckPlaceholderExpr(LHS.get());
   if (!LHSResult.isUsable()) return QualType();
   LHS = LHSResult;
 
   ExprResult RHSResult = CheckPlaceholderExpr(RHS.get());
   if (!RHSResult.isUsable()) return QualType();
   RHS = RHSResult;
 
   // C++ is sufficiently different to merit its own checker.
   if (getLangOpts().CPlusPlus)
     return CXXCheckConditionalOperands(Cond, LHS, RHS, VK, OK, QuestionLoc);
 
   VK = VK_RValue;
   OK = OK_Ordinary;
 
   // The OpenCL operator with a vector condition is sufficiently
   // different to merit its own checker.
   if (getLangOpts().OpenCL && Cond.get()->getType()->isVectorType())
     return OpenCLCheckVectorConditional(*this, Cond, LHS, RHS, QuestionLoc);
 
   // First, check the condition.
   Cond = UsualUnaryConversions(Cond.get());
   if (Cond.isInvalid())
     return QualType();
   if (checkCondition(*this, Cond.get(), QuestionLoc))
     return QualType();
 
   // Now check the two expressions.
   if (LHS.get()->getType()->isVectorType() ||
       RHS.get()->getType()->isVectorType())
     return CheckVectorOperands(LHS, RHS, QuestionLoc, /*isCompAssign*/false,
                                /*AllowBothBool*/true,
                                /*AllowBoolConversions*/false);
 
   QualType ResTy = UsualArithmeticConversions(LHS, RHS);
   if (LHS.isInvalid() || RHS.isInvalid())
     return QualType();
 
   QualType LHSTy = LHS.get()->getType();
   QualType RHSTy = RHS.get()->getType();
 
   // Diagnose attempts to convert between __float128 and long double where
   // such conversions currently can't be handled.
   if (unsupportedTypeConversion(*this, LHSTy, RHSTy)) {
     Diag(QuestionLoc,
          diag::err_typecheck_cond_incompatible_operands) << LHSTy << RHSTy
       << LHS.get()->getSourceRange() << RHS.get()->getSourceRange();
     return QualType();
   }
 
   // OpenCL v2.0 s6.12.5 - Blocks cannot be used as expressions of the ternary
   // selection operator (?:).
   if (getLangOpts().OpenCL &&
       (checkBlockType(*this, LHS.get()) | checkBlockType(*this, RHS.get()))) {
     return QualType();
   }
 
   // If both operands have arithmetic type, do the usual arithmetic conversions
   // to find a common type: C99 6.5.15p3,5.
   if (LHSTy->isArithmeticType() && RHSTy->isArithmeticType()) {
     LHS = ImpCastExprToType(LHS.get(), ResTy, PrepareScalarCast(LHS, ResTy));
     RHS = ImpCastExprToType(RHS.get(), ResTy, PrepareScalarCast(RHS, ResTy));
 
     return ResTy;
   }
 
   // If both operands are the same structure or union type, the result is that
   // type.
   if (const RecordType *LHSRT = LHSTy->getAs<RecordType>()) {    // C99 6.5.15p3
     if (const RecordType *RHSRT = RHSTy->getAs<RecordType>())
       if (LHSRT->getDecl() == RHSRT->getDecl())
         // "If both the operands have structure or union type, the result has
         // that type."  This implies that CV qualifiers are dropped.
         return LHSTy.getUnqualifiedType();
     // FIXME: Type of conditional expression must be complete in C mode.
   }
 
   // C99 6.5.15p5: "If both operands have void type, the result has void type."
   // The following || allows only one side to be void (a GCC-ism).
   if (LHSTy->isVoidType() || RHSTy->isVoidType()) {
     return checkConditionalVoidType(*this, LHS, RHS);
   }
 
   // C99 6.5.15p6 - "if one operand is a null pointer constant, the result has
   // the type of the other operand."
   if (!checkConditionalNullPointer(*this, RHS, LHSTy)) return LHSTy;
   if (!checkConditionalNullPointer(*this, LHS, RHSTy)) return RHSTy;
 
   // All objective-c pointer type analysis is done here.
   QualType compositeType = FindCompositeObjCPointerType(LHS, RHS,
                                                         QuestionLoc);
   if (LHS.isInvalid() || RHS.isInvalid())
     return QualType();
   if (!compositeType.isNull())
     return compositeType;
 
 
   // Handle block pointer types.
   if (LHSTy->isBlockPointerType() || RHSTy->isBlockPointerType())
     return checkConditionalBlockPointerCompatibility(*this, LHS, RHS,
                                                      QuestionLoc);
 
   // Check constraints for C object pointers types (C99 6.5.15p3,6).
   if (LHSTy->isPointerType() && RHSTy->isPointerType())
     return checkConditionalObjectPointersCompatibility(*this, LHS, RHS,
                                                        QuestionLoc);
 
   // GCC compatibility: soften pointer/integer mismatch.  Note that
   // null pointers have been filtered out by this point.
   if (checkPointerIntegerMismatch(*this, LHS, RHS.get(), QuestionLoc,
       /*isIntFirstExpr=*/true))
     return RHSTy;
   if (checkPointerIntegerMismatch(*this, RHS, LHS.get(), QuestionLoc,
       /*isIntFirstExpr=*/false))
     return LHSTy;
 
   // Emit a better diagnostic if one of the expressions is a null pointer
   // constant and the other is not a pointer type. In this case, the user most
   // likely forgot to take the address of the other expression.
   if (DiagnoseConditionalForNull(LHS.get(), RHS.get(), QuestionLoc))
     return QualType();
 
   // Otherwise, the operands are not compatible.
   Diag(QuestionLoc, diag::err_typecheck_cond_incompatible_operands)
     << LHSTy << RHSTy << LHS.get()->getSourceRange()
     << RHS.get()->getSourceRange();
   return QualType();
 }
 
 /// FindCompositeObjCPointerType - Helper method to find composite type of
 /// two objective-c pointer types of the two input expressions.
 QualType Sema::FindCompositeObjCPointerType(ExprResult &LHS, ExprResult &RHS,
                                             SourceLocation QuestionLoc) {
   QualType LHSTy = LHS.get()->getType();
   QualType RHSTy = RHS.get()->getType();
 
   // Handle things like Class and struct objc_class*.  Here we case the result
   // to the pseudo-builtin, because that will be implicitly cast back to the
   // redefinition type if an attempt is made to access its fields.
   if (LHSTy->isObjCClassType() &&
       (Context.hasSameType(RHSTy, Context.getObjCClassRedefinitionType()))) {
     RHS = ImpCastExprToType(RHS.get(), LHSTy, CK_CPointerToObjCPointerCast);
     return LHSTy;
   }
   if (RHSTy->isObjCClassType() &&
       (Context.hasSameType(LHSTy, Context.getObjCClassRedefinitionType()))) {
     LHS = ImpCastExprToType(LHS.get(), RHSTy, CK_CPointerToObjCPointerCast);
     return RHSTy;
   }
   // And the same for struct objc_object* / id
   if (LHSTy->isObjCIdType() &&
       (Context.hasSameType(RHSTy, Context.getObjCIdRedefinitionType()))) {
     RHS = ImpCastExprToType(RHS.get(), LHSTy, CK_CPointerToObjCPointerCast);
     return LHSTy;
   }
   if (RHSTy->isObjCIdType() &&
       (Context.hasSameType(LHSTy, Context.getObjCIdRedefinitionType()))) {
     LHS = ImpCastExprToType(LHS.get(), RHSTy, CK_CPointerToObjCPointerCast);
     return RHSTy;
   }
   // And the same for struct objc_selector* / SEL
   if (Context.isObjCSelType(LHSTy) &&
       (Context.hasSameType(RHSTy, Context.getObjCSelRedefinitionType()))) {
     RHS = ImpCastExprToType(RHS.get(), LHSTy, CK_BitCast);
     return LHSTy;
   }
   if (Context.isObjCSelType(RHSTy) &&
       (Context.hasSameType(LHSTy, Context.getObjCSelRedefinitionType()))) {
     LHS = ImpCastExprToType(LHS.get(), RHSTy, CK_BitCast);
     return RHSTy;
   }
   // Check constraints for Objective-C object pointers types.
   if (LHSTy->isObjCObjectPointerType() && RHSTy->isObjCObjectPointerType()) {
 
     if (Context.getCanonicalType(LHSTy) == Context.getCanonicalType(RHSTy)) {
       // Two identical object pointer types are always compatible.
       return LHSTy;
     }
     const ObjCObjectPointerType *LHSOPT = LHSTy->castAs<ObjCObjectPointerType>();
     const ObjCObjectPointerType *RHSOPT = RHSTy->castAs<ObjCObjectPointerType>();
     QualType compositeType = LHSTy;
 
     // If both operands are interfaces and either operand can be
     // assigned to the other, use that type as the composite
     // type. This allows
     //   xxx ? (A*) a : (B*) b
     // where B is a subclass of A.
     //
     // Additionally, as for assignment, if either type is 'id'
     // allow silent coercion. Finally, if the types are
     // incompatible then make sure to use 'id' as the composite
     // type so the result is acceptable for sending messages to.
 
     // FIXME: Consider unifying with 'areComparableObjCPointerTypes'.
     // It could return the composite type.
     if (!(compositeType =
           Context.areCommonBaseCompatible(LHSOPT, RHSOPT)).isNull()) {
       // Nothing more to do.
     } else if (Context.canAssignObjCInterfaces(LHSOPT, RHSOPT)) {
       compositeType = RHSOPT->isObjCBuiltinType() ? RHSTy : LHSTy;
     } else if (Context.canAssignObjCInterfaces(RHSOPT, LHSOPT)) {
       compositeType = LHSOPT->isObjCBuiltinType() ? LHSTy : RHSTy;
     } else if ((LHSTy->isObjCQualifiedIdType() ||
                 RHSTy->isObjCQualifiedIdType()) &&
                Context.ObjCQualifiedIdTypesAreCompatible(LHSTy, RHSTy, true)) {
       // Need to handle "id<xx>" explicitly.
       // GCC allows qualified id and any Objective-C type to devolve to
       // id. Currently localizing to here until clear this should be
       // part of ObjCQualifiedIdTypesAreCompatible.
       compositeType = Context.getObjCIdType();
     } else if (LHSTy->isObjCIdType() || RHSTy->isObjCIdType()) {
       compositeType = Context.getObjCIdType();
     } else {
       Diag(QuestionLoc, diag::ext_typecheck_cond_incompatible_operands)
       << LHSTy << RHSTy
       << LHS.get()->getSourceRange() << RHS.get()->getSourceRange();
       QualType incompatTy = Context.getObjCIdType();
       LHS = ImpCastExprToType(LHS.get(), incompatTy, CK_BitCast);
       RHS = ImpCastExprToType(RHS.get(), incompatTy, CK_BitCast);
       return incompatTy;
     }
     // The object pointer types are compatible.
     LHS = ImpCastExprToType(LHS.get(), compositeType, CK_BitCast);
     RHS = ImpCastExprToType(RHS.get(), compositeType, CK_BitCast);
     return compositeType;
   }
   // Check Objective-C object pointer types and 'void *'
   if (LHSTy->isVoidPointerType() && RHSTy->isObjCObjectPointerType()) {
     if (getLangOpts().ObjCAutoRefCount) {
       // ARC forbids the implicit conversion of object pointers to 'void *',
       // so these types are not compatible.
       Diag(QuestionLoc, diag::err_cond_voidptr_arc) << LHSTy << RHSTy
           << LHS.get()->getSourceRange() << RHS.get()->getSourceRange();
       LHS = RHS = true;
       return QualType();
     }
     QualType lhptee = LHSTy->getAs<PointerType>()->getPointeeType();
     QualType rhptee = RHSTy->getAs<ObjCObjectPointerType>()->getPointeeType();
     QualType destPointee
     = Context.getQualifiedType(lhptee, rhptee.getQualifiers());
     QualType destType = Context.getPointerType(destPointee);
     // Add qualifiers if necessary.
     LHS = ImpCastExprToType(LHS.get(), destType, CK_NoOp);
     // Promote to void*.
     RHS = ImpCastExprToType(RHS.get(), destType, CK_BitCast);
     return destType;
   }
   if (LHSTy->isObjCObjectPointerType() && RHSTy->isVoidPointerType()) {
     if (getLangOpts().ObjCAutoRefCount) {
       // ARC forbids the implicit conversion of object pointers to 'void *',
       // so these types are not compatible.
       Diag(QuestionLoc, diag::err_cond_voidptr_arc) << LHSTy << RHSTy
           << LHS.get()->getSourceRange() << RHS.get()->getSourceRange();
       LHS = RHS = true;
       return QualType();
     }
     QualType lhptee = LHSTy->getAs<ObjCObjectPointerType>()->getPointeeType();
     QualType rhptee = RHSTy->getAs<PointerType>()->getPointeeType();
     QualType destPointee
     = Context.getQualifiedType(rhptee, lhptee.getQualifiers());
     QualType destType = Context.getPointerType(destPointee);
     // Add qualifiers if necessary.
     RHS = ImpCastExprToType(RHS.get(), destType, CK_NoOp);
     // Promote to void*.
     LHS = ImpCastExprToType(LHS.get(), destType, CK_BitCast);
     return destType;
   }
   return QualType();
 }
 
 /// SuggestParentheses - Emit a note with a fixit hint that wraps
 /// ParenRange in parentheses.
 static void SuggestParentheses(Sema &Self, SourceLocation Loc,
                                const PartialDiagnostic &Note,
                                SourceRange ParenRange) {
   SourceLocation EndLoc = Self.getLocForEndOfToken(ParenRange.getEnd());
   if (ParenRange.getBegin().isFileID() && ParenRange.getEnd().isFileID() &&
       EndLoc.isValid()) {
     Self.Diag(Loc, Note)
       << FixItHint::CreateInsertion(ParenRange.getBegin(), "(")
       << FixItHint::CreateInsertion(EndLoc, ")");
   } else {
     // We can't display the parentheses, so just show the bare note.
     Self.Diag(Loc, Note) << ParenRange;
   }
 }
 
 static bool IsArithmeticOp(BinaryOperatorKind Opc) {
   return BinaryOperator::isAdditiveOp(Opc) ||
          BinaryOperator::isMultiplicativeOp(Opc) ||
          BinaryOperator::isShiftOp(Opc);
 }
 
 /// IsArithmeticBinaryExpr - Returns true if E is an arithmetic binary
 /// expression, either using a built-in or overloaded operator,
 /// and sets *OpCode to the opcode and *RHSExprs to the right-hand side
 /// expression.
 static bool IsArithmeticBinaryExpr(Expr *E, BinaryOperatorKind *Opcode,
                                    Expr **RHSExprs) {
   // Don't strip parenthesis: we should not warn if E is in parenthesis.
   E = E->IgnoreImpCasts();
   E = E->IgnoreConversionOperator();
   E = E->IgnoreImpCasts();
 
   // Built-in binary operator.
   if (BinaryOperator *OP = dyn_cast<BinaryOperator>(E)) {
     if (IsArithmeticOp(OP->getOpcode())) {
       *Opcode = OP->getOpcode();
       *RHSExprs = OP->getRHS();
       return true;
     }
   }
 
   // Overloaded operator.
   if (CXXOperatorCallExpr *Call = dyn_cast<CXXOperatorCallExpr>(E)) {
     if (Call->getNumArgs() != 2)
       return false;
 
     // Make sure this is really a binary operator that is safe to pass into
     // BinaryOperator::getOverloadedOpcode(), e.g. it's not a subscript op.
     OverloadedOperatorKind OO = Call->getOperator();
     if (OO < OO_Plus || OO > OO_Arrow ||
         OO == OO_PlusPlus || OO == OO_MinusMinus)
       return false;
 
     BinaryOperatorKind OpKind = BinaryOperator::getOverloadedOpcode(OO);
     if (IsArithmeticOp(OpKind)) {
       *Opcode = OpKind;
       *RHSExprs = Call->getArg(1);
       return true;
     }
   }
 
   return false;
 }
 
 /// ExprLooksBoolean - Returns true if E looks boolean, i.e. it has boolean type
 /// or is a logical expression such as (x==y) which has int type, but is
 /// commonly interpreted as boolean.
 static bool ExprLooksBoolean(Expr *E) {
   E = E->IgnoreParenImpCasts();
 
   if (E->getType()->isBooleanType())
     return true;
   if (BinaryOperator *OP = dyn_cast<BinaryOperator>(E))
     return OP->isComparisonOp() || OP->isLogicalOp();
   if (UnaryOperator *OP = dyn_cast<UnaryOperator>(E))
     return OP->getOpcode() == UO_LNot;
   if (E->getType()->isPointerType())
     return true;
 
   return false;
 }
 
 /// DiagnoseConditionalPrecedence - Emit a warning when a conditional operator
 /// and binary operator are mixed in a way that suggests the programmer assumed
 /// the conditional operator has higher precedence, for example:
 /// "int x = a + someBinaryCondition ? 1 : 2".
 static void DiagnoseConditionalPrecedence(Sema &Self,
                                           SourceLocation OpLoc,
                                           Expr *Condition,
                                           Expr *LHSExpr,
                                           Expr *RHSExpr) {
   BinaryOperatorKind CondOpcode;
   Expr *CondRHS;
 
   if (!IsArithmeticBinaryExpr(Condition, &CondOpcode, &CondRHS))
     return;
   if (!ExprLooksBoolean(CondRHS))
     return;
 
   // The condition is an arithmetic binary expression, with a right-
   // hand side that looks boolean, so warn.
 
   Self.Diag(OpLoc, diag::warn_precedence_conditional)
       << Condition->getSourceRange()
       << BinaryOperator::getOpcodeStr(CondOpcode);
 
   SuggestParentheses(Self, OpLoc,
     Self.PDiag(diag::note_precedence_silence)
       << BinaryOperator::getOpcodeStr(CondOpcode),
     SourceRange(Condition->getLocStart(), Condition->getLocEnd()));
 
   SuggestParentheses(Self, OpLoc,
     Self.PDiag(diag::note_precedence_conditional_first),
     SourceRange(CondRHS->getLocStart(), RHSExpr->getLocEnd()));
 }
 
 /// Compute the nullability of a conditional expression.
 static QualType computeConditionalNullability(QualType ResTy, bool IsBin,
                                               QualType LHSTy, QualType RHSTy,
                                               ASTContext &Ctx) {
   if (!ResTy->isAnyPointerType())
     return ResTy;
 
   auto GetNullability = [&Ctx](QualType Ty) {
     Optional<NullabilityKind> Kind = Ty->getNullability(Ctx);
     if (Kind)
       return *Kind;
     return NullabilityKind::Unspecified;
   };
 
   auto LHSKind = GetNullability(LHSTy), RHSKind = GetNullability(RHSTy);
   NullabilityKind MergedKind;
 
   // Compute nullability of a binary conditional expression.
   if (IsBin) {
     if (LHSKind == NullabilityKind::NonNull)
       MergedKind = NullabilityKind::NonNull;
     else
       MergedKind = RHSKind;
   // Compute nullability of a normal conditional expression.
   } else {
     if (LHSKind == NullabilityKind::Nullable ||
         RHSKind == NullabilityKind::Nullable)
       MergedKind = NullabilityKind::Nullable;
     else if (LHSKind == NullabilityKind::NonNull)
       MergedKind = RHSKind;
     else if (RHSKind == NullabilityKind::NonNull)
       MergedKind = LHSKind;
     else
       MergedKind = NullabilityKind::Unspecified;
   }
 
   // Return if ResTy already has the correct nullability.
   if (GetNullability(ResTy) == MergedKind)
     return ResTy;
 
   // Strip all nullability from ResTy.
   while (ResTy->getNullability(Ctx))
     ResTy = ResTy.getSingleStepDesugaredType(Ctx);
 
   // Create a new AttributedType with the new nullability kind.
   auto NewAttr = AttributedType::getNullabilityAttrKind(MergedKind);
   return Ctx.getAttributedType(NewAttr, ResTy, ResTy);
 }
 
 /// ActOnConditionalOp - Parse a ?: operation.  Note that 'LHS' may be null
 /// in the case of a the GNU conditional expr extension.
 ExprResult Sema::ActOnConditionalOp(SourceLocation QuestionLoc,
                                     SourceLocation ColonLoc,
                                     Expr *CondExpr, Expr *LHSExpr,
                                     Expr *RHSExpr) {
   if (!getLangOpts().CPlusPlus) {
     // C cannot handle TypoExpr nodes in the condition because it
     // doesn't handle dependent types properly, so make sure any TypoExprs have
     // been dealt with before checking the operands.
     ExprResult CondResult = CorrectDelayedTyposInExpr(CondExpr);
     ExprResult LHSResult = CorrectDelayedTyposInExpr(LHSExpr);
     ExprResult RHSResult = CorrectDelayedTyposInExpr(RHSExpr);
 
     if (!CondResult.isUsable())
       return ExprError();
 
     if (LHSExpr) {
       if (!LHSResult.isUsable())
         return ExprError();
     }
 
     if (!RHSResult.isUsable())
       return ExprError();
 
     CondExpr = CondResult.get();
     LHSExpr = LHSResult.get();
     RHSExpr = RHSResult.get();
   }
 
   // If this is the gnu "x ?: y" extension, analyze the types as though the LHS
   // was the condition.
   OpaqueValueExpr *opaqueValue = nullptr;
   Expr *commonExpr = nullptr;
   if (!LHSExpr) {
     commonExpr = CondExpr;
     // Lower out placeholder types first.  This is important so that we don't
     // try to capture a placeholder. This happens in few cases in C++; such
     // as Objective-C++'s dictionary subscripting syntax.
     if (commonExpr->hasPlaceholderType()) {
       ExprResult result = CheckPlaceholderExpr(commonExpr);
       if (!result.isUsable()) return ExprError();
       commonExpr = result.get();
     }
     // We usually want to apply unary conversions *before* saving, except
     // in the special case of a C++ l-value conditional.
     if (!(getLangOpts().CPlusPlus
           && !commonExpr->isTypeDependent()
           && commonExpr->getValueKind() == RHSExpr->getValueKind()
           && commonExpr->isGLValue()
           && commonExpr->isOrdinaryOrBitFieldObject()
           && RHSExpr->isOrdinaryOrBitFieldObject()
           && Context.hasSameType(commonExpr->getType(), RHSExpr->getType()))) {
       ExprResult commonRes = UsualUnaryConversions(commonExpr);
       if (commonRes.isInvalid())
         return ExprError();
       commonExpr = commonRes.get();
     }
 
     // If the common expression is a class or array prvalue, materialize it
     // so that we can safely refer to it multiple times.
     if (commonExpr->isRValue() && (commonExpr->getType()->isRecordType() ||
                                    commonExpr->getType()->isArrayType())) {
       ExprResult MatExpr = TemporaryMaterializationConversion(commonExpr);
       if (MatExpr.isInvalid())
         return ExprError();
       commonExpr = MatExpr.get();
     }
 
     opaqueValue = new (Context) OpaqueValueExpr(commonExpr->getExprLoc(),
                                                 commonExpr->getType(),
                                                 commonExpr->getValueKind(),
                                                 commonExpr->getObjectKind(),
                                                 commonExpr);
     LHSExpr = CondExpr = opaqueValue;
   }
 
   QualType LHSTy = LHSExpr->getType(), RHSTy = RHSExpr->getType();
   ExprValueKind VK = VK_RValue;
   ExprObjectKind OK = OK_Ordinary;
   ExprResult Cond = CondExpr, LHS = LHSExpr, RHS = RHSExpr;
   QualType result = CheckConditionalOperands(Cond, LHS, RHS, 
                                              VK, OK, QuestionLoc);
   if (result.isNull() || Cond.isInvalid() || LHS.isInvalid() ||
       RHS.isInvalid())
     return ExprError();
 
   DiagnoseConditionalPrecedence(*this, QuestionLoc, Cond.get(), LHS.get(),
                                 RHS.get());
 
   CheckBoolLikeConversion(Cond.get(), QuestionLoc);
 
   result = computeConditionalNullability(result, commonExpr, LHSTy, RHSTy,
                                          Context);
 
   if (!commonExpr)
     return new (Context)
         ConditionalOperator(Cond.get(), QuestionLoc, LHS.get(), ColonLoc,
                             RHS.get(), result, VK, OK);
 
   return new (Context) BinaryConditionalOperator(
       commonExpr, opaqueValue, Cond.get(), LHS.get(), RHS.get(), QuestionLoc,
       ColonLoc, result, VK, OK);
 }
 
 // checkPointerTypesForAssignment - This is a very tricky routine (despite
 // being closely modeled after the C99 spec:-). The odd characteristic of this
 // routine is it effectively iqnores the qualifiers on the top level pointee.
 // This circumvents the usual type rules specified in 6.2.7p1 & 6.7.5.[1-3].
 // FIXME: add a couple examples in this comment.
 static Sema::AssignConvertType
 checkPointerTypesForAssignment(Sema &S, QualType LHSType, QualType RHSType) {
   assert(LHSType.isCanonical() && "LHS not canonicalized!");
   assert(RHSType.isCanonical() && "RHS not canonicalized!");
 
   // get the "pointed to" type (ignoring qualifiers at the top level)
   const Type *lhptee, *rhptee;
   Qualifiers lhq, rhq;
   std::tie(lhptee, lhq) =
       cast<PointerType>(LHSType)->getPointeeType().split().asPair();
   std::tie(rhptee, rhq) =
       cast<PointerType>(RHSType)->getPointeeType().split().asPair();
 
   Sema::AssignConvertType ConvTy = Sema::Compatible;
 
   // C99 6.5.16.1p1: This following citation is common to constraints
   // 3 & 4 (below). ...and the type *pointed to* by the left has all the
   // qualifiers of the type *pointed to* by the right;
 
   // As a special case, 'non-__weak A *' -> 'non-__weak const *' is okay.
   if (lhq.getObjCLifetime() != rhq.getObjCLifetime() &&
       lhq.compatiblyIncludesObjCLifetime(rhq)) {
     // Ignore lifetime for further calculation.
     lhq.removeObjCLifetime();
     rhq.removeObjCLifetime();
   }
 
   if (!lhq.compatiblyIncludes(rhq)) {
     // Treat address-space mismatches as fatal.  TODO: address subspaces
     if (!lhq.isAddressSpaceSupersetOf(rhq))
       ConvTy = Sema::IncompatiblePointerDiscardsQualifiers;
 
     // It's okay to add or remove GC or lifetime qualifiers when converting to
     // and from void*.
     else if (lhq.withoutObjCGCAttr().withoutObjCLifetime()
                         .compatiblyIncludes(
                                 rhq.withoutObjCGCAttr().withoutObjCLifetime())
              && (lhptee->isVoidType() || rhptee->isVoidType()))
       ; // keep old
 
     // Treat lifetime mismatches as fatal.
     else if (lhq.getObjCLifetime() != rhq.getObjCLifetime())
       ConvTy = Sema::IncompatiblePointerDiscardsQualifiers;
     
     // For GCC/MS compatibility, other qualifier mismatches are treated
     // as still compatible in C.
     else ConvTy = Sema::CompatiblePointerDiscardsQualifiers;
   }
 
   // C99 6.5.16.1p1 (constraint 4): If one operand is a pointer to an object or
   // incomplete type and the other is a pointer to a qualified or unqualified
   // version of void...
   if (lhptee->isVoidType()) {
     if (rhptee->isIncompleteOrObjectType())
       return ConvTy;
 
     // As an extension, we allow cast to/from void* to function pointer.
     assert(rhptee->isFunctionType());
     return Sema::FunctionVoidPointer;
   }
 
   if (rhptee->isVoidType()) {
     if (lhptee->isIncompleteOrObjectType())
       return ConvTy;
 
     // As an extension, we allow cast to/from void* to function pointer.
     assert(lhptee->isFunctionType());
     return Sema::FunctionVoidPointer;
   }
 
   // C99 6.5.16.1p1 (constraint 3): both operands are pointers to qualified or
   // unqualified versions of compatible types, ...
   QualType ltrans = QualType(lhptee, 0), rtrans = QualType(rhptee, 0);
   if (!S.Context.typesAreCompatible(ltrans, rtrans)) {
     // Check if the pointee types are compatible ignoring the sign.
     // We explicitly check for char so that we catch "char" vs
     // "unsigned char" on systems where "char" is unsigned.
     if (lhptee->isCharType())
       ltrans = S.Context.UnsignedCharTy;
     else if (lhptee->hasSignedIntegerRepresentation())
       ltrans = S.Context.getCorrespondingUnsignedType(ltrans);
 
     if (rhptee->isCharType())
       rtrans = S.Context.UnsignedCharTy;
     else if (rhptee->hasSignedIntegerRepresentation())
       rtrans = S.Context.getCorrespondingUnsignedType(rtrans);
 
     if (ltrans == rtrans) {
       // Types are compatible ignoring the sign. Qualifier incompatibility
       // takes priority over sign incompatibility because the sign
       // warning can be disabled.
       if (ConvTy != Sema::Compatible)
         return ConvTy;
 
       return Sema::IncompatiblePointerSign;
     }
 
     // If we are a multi-level pointer, it's possible that our issue is simply
     // one of qualification - e.g. char ** -> const char ** is not allowed. If
     // the eventual target type is the same and the pointers have the same
     // level of indirection, this must be the issue.
     if (isa<PointerType>(lhptee) && isa<PointerType>(rhptee)) {
       do {
         lhptee = cast<PointerType>(lhptee)->getPointeeType().getTypePtr();
         rhptee = cast<PointerType>(rhptee)->getPointeeType().getTypePtr();
       } while (isa<PointerType>(lhptee) && isa<PointerType>(rhptee));
 
       if (lhptee == rhptee)
         return Sema::IncompatibleNestedPointerQualifiers;
     }
 
     // General pointer incompatibility takes priority over qualifiers.
     return Sema::IncompatiblePointer;
   }
   if (!S.getLangOpts().CPlusPlus &&
       S.IsFunctionConversion(ltrans, rtrans, ltrans))
     return Sema::IncompatiblePointer;
   return ConvTy;
 }
 
 /// checkBlockPointerTypesForAssignment - This routine determines whether two
 /// block pointer types are compatible or whether a block and normal pointer
 /// are compatible. It is more restrict than comparing two function pointer
 // types.
 static Sema::AssignConvertType
 checkBlockPointerTypesForAssignment(Sema &S, QualType LHSType,
                                     QualType RHSType) {
   assert(LHSType.isCanonical() && "LHS not canonicalized!");
   assert(RHSType.isCanonical() && "RHS not canonicalized!");
 
   QualType lhptee, rhptee;
 
   // get the "pointed to" type (ignoring qualifiers at the top level)
   lhptee = cast<BlockPointerType>(LHSType)->getPointeeType();
   rhptee = cast<BlockPointerType>(RHSType)->getPointeeType();
 
   // In C++, the types have to match exactly.
   if (S.getLangOpts().CPlusPlus)
     return Sema::IncompatibleBlockPointer;
 
   Sema::AssignConvertType ConvTy = Sema::Compatible;
 
   // For blocks we enforce that qualifiers are identical.
   Qualifiers LQuals = lhptee.getLocalQualifiers();
   Qualifiers RQuals = rhptee.getLocalQualifiers();
   if (S.getLangOpts().OpenCL) {
     LQuals.removeAddressSpace();
     RQuals.removeAddressSpace();
   }
   if (LQuals != RQuals)
     ConvTy = Sema::CompatiblePointerDiscardsQualifiers;
 
   // FIXME: OpenCL doesn't define the exact compile time semantics for a block
   // assignment.
   // The current behavior is similar to C++ lambdas. A block might be
   // assigned to a variable iff its return type and parameters are compatible
   // (C99 6.2.7) with the corresponding return type and parameters of the LHS of
   // an assignment. Presumably it should behave in way that a function pointer
   // assignment does in C, so for each parameter and return type:
   //  * CVR and address space of LHS should be a superset of CVR and address
   //  space of RHS.
   //  * unqualified types should be compatible.
   if (S.getLangOpts().OpenCL) {
     if (!S.Context.typesAreBlockPointerCompatible(
             S.Context.getQualifiedType(LHSType.getUnqualifiedType(), LQuals),
             S.Context.getQualifiedType(RHSType.getUnqualifiedType(), RQuals)))
       return Sema::IncompatibleBlockPointer;
   } else if (!S.Context.typesAreBlockPointerCompatible(LHSType, RHSType))
     return Sema::IncompatibleBlockPointer;
 
   return ConvTy;
 }
 
 /// checkObjCPointerTypesForAssignment - Compares two objective-c pointer types
 /// for assignment compatibility.
 static Sema::AssignConvertType
 checkObjCPointerTypesForAssignment(Sema &S, QualType LHSType,
                                    QualType RHSType) {
   assert(LHSType.isCanonical() && "LHS was not canonicalized!");
   assert(RHSType.isCanonical() && "RHS was not canonicalized!");
 
   if (LHSType->isObjCBuiltinType()) {
     // Class is not compatible with ObjC object pointers.
     if (LHSType->isObjCClassType() && !RHSType->isObjCBuiltinType() &&
         !RHSType->isObjCQualifiedClassType())
       return Sema::IncompatiblePointer;
     return Sema::Compatible;
   }
   if (RHSType->isObjCBuiltinType()) {
     if (RHSType->isObjCClassType() && !LHSType->isObjCBuiltinType() &&
         !LHSType->isObjCQualifiedClassType())
       return Sema::IncompatiblePointer;
     return Sema::Compatible;
   }
   QualType lhptee = LHSType->getAs<ObjCObjectPointerType>()->getPointeeType();
   QualType rhptee = RHSType->getAs<ObjCObjectPointerType>()->getPointeeType();
 
   if (!lhptee.isAtLeastAsQualifiedAs(rhptee) &&
       // make an exception for id<P>
       !LHSType->isObjCQualifiedIdType())
     return Sema::CompatiblePointerDiscardsQualifiers;
 
   if (S.Context.typesAreCompatible(LHSType, RHSType))
     return Sema::Compatible;
   if (LHSType->isObjCQualifiedIdType() || RHSType->isObjCQualifiedIdType())
     return Sema::IncompatibleObjCQualifiedId;
   return Sema::IncompatiblePointer;
 }
 
 Sema::AssignConvertType
 Sema::CheckAssignmentConstraints(SourceLocation Loc,
                                  QualType LHSType, QualType RHSType) {
   // Fake up an opaque expression.  We don't actually care about what
   // cast operations are required, so if CheckAssignmentConstraints
   // adds casts to this they'll be wasted, but fortunately that doesn't
   // usually happen on valid code.
   OpaqueValueExpr RHSExpr(Loc, RHSType, VK_RValue);
   ExprResult RHSPtr = &RHSExpr;
   CastKind K;
 
   return CheckAssignmentConstraints(LHSType, RHSPtr, K, /*ConvertRHS=*/false);
 }
 
 /// This helper function returns true if QT is a vector type that has element
 /// type ElementType.
 static bool isVector(QualType QT, QualType ElementType) {
   if (const VectorType *VT = QT->getAs<VectorType>())
     return VT->getElementType() == ElementType;
   return false;
 }
 
 /// CheckAssignmentConstraints (C99 6.5.16) - This routine currently
 /// has code to accommodate several GCC extensions when type checking
 /// pointers. Here are some objectionable examples that GCC considers warnings:
 ///
 ///  int a, *pint;
 ///  short *pshort;
 ///  struct foo *pfoo;
 ///
 ///  pint = pshort; // warning: assignment from incompatible pointer type
 ///  a = pint; // warning: assignment makes integer from pointer without a cast
 ///  pint = a; // warning: assignment makes pointer from integer without a cast
 ///  pint = pfoo; // warning: assignment from incompatible pointer type
 ///
 /// As a result, the code for dealing with pointers is more complex than the
 /// C99 spec dictates.
 ///
 /// Sets 'Kind' for any result kind except Incompatible.
 Sema::AssignConvertType
 Sema::CheckAssignmentConstraints(QualType LHSType, ExprResult &RHS,
                                  CastKind &Kind, bool ConvertRHS) {
   QualType RHSType = RHS.get()->getType();
   QualType OrigLHSType = LHSType;
 
   // Get canonical types.  We're not formatting these types, just comparing
   // them.
   LHSType = Context.getCanonicalType(LHSType).getUnqualifiedType();
   RHSType = Context.getCanonicalType(RHSType).getUnqualifiedType();
 
   // Common case: no conversion required.
   if (LHSType == RHSType) {
     Kind = CK_NoOp;
     return Compatible;
   }
 
   // If we have an atomic type, try a non-atomic assignment, then just add an
   // atomic qualification step.
   if (const AtomicType *AtomicTy = dyn_cast<AtomicType>(LHSType)) {
     Sema::AssignConvertType result =
       CheckAssignmentConstraints(AtomicTy->getValueType(), RHS, Kind);
     if (result != Compatible)
       return result;
     if (Kind != CK_NoOp && ConvertRHS)
       RHS = ImpCastExprToType(RHS.get(), AtomicTy->getValueType(), Kind);
     Kind = CK_NonAtomicToAtomic;
     return Compatible;
   }
 
   // If the left-hand side is a reference type, then we are in a
   // (rare!) case where we've allowed the use of references in C,
   // e.g., as a parameter type in a built-in function. In this case,
   // just make sure that the type referenced is compatible with the
   // right-hand side type. The caller is responsible for adjusting
   // LHSType so that the resulting expression does not have reference
   // type.
   if (const ReferenceType *LHSTypeRef = LHSType->getAs<ReferenceType>()) {
     if (Context.typesAreCompatible(LHSTypeRef->getPointeeType(), RHSType)) {
       Kind = CK_LValueBitCast;
       return Compatible;
     }
     return Incompatible;
   }
 
   // Allow scalar to ExtVector assignments, and assignments of an ExtVector type
   // to the same ExtVector type.
   if (LHSType->isExtVectorType()) {
     if (RHSType->isExtVectorType())
       return Incompatible;
     if (RHSType->isArithmeticType()) {
       // CK_VectorSplat does T -> vector T, so first cast to the element type.
       if (ConvertRHS)
         RHS = prepareVectorSplat(LHSType, RHS.get());
       Kind = CK_VectorSplat;
       return Compatible;
     }
   }
 
   // Conversions to or from vector type.
   if (LHSType->isVectorType() || RHSType->isVectorType()) {
     if (LHSType->isVectorType() && RHSType->isVectorType()) {
       // Allow assignments of an AltiVec vector type to an equivalent GCC
       // vector type and vice versa
       if (Context.areCompatibleVectorTypes(LHSType, RHSType)) {
         Kind = CK_BitCast;
         return Compatible;
       }
 
       // If we are allowing lax vector conversions, and LHS and RHS are both
       // vectors, the total size only needs to be the same. This is a bitcast;
       // no bits are changed but the result type is different.
       if (isLaxVectorConversion(RHSType, LHSType)) {
         Kind = CK_BitCast;
         return IncompatibleVectors;
       }
     }
 
     // When the RHS comes from another lax conversion (e.g. binops between
     // scalars and vectors) the result is canonicalized as a vector. When the
     // LHS is also a vector, the lax is allowed by the condition above. Handle
     // the case where LHS is a scalar.
     if (LHSType->isScalarType()) {
       const VectorType *VecType = RHSType->getAs<VectorType>();
       if (VecType && VecType->getNumElements() == 1 &&
           isLaxVectorConversion(RHSType, LHSType)) {
         ExprResult *VecExpr = &RHS;
         *VecExpr = ImpCastExprToType(VecExpr->get(), LHSType, CK_BitCast);
         Kind = CK_BitCast;
         return Compatible;
       }
     }
 
     return Incompatible;
   }
 
   // Diagnose attempts to convert between __float128 and long double where
   // such conversions currently can't be handled.
   if (unsupportedTypeConversion(*this, LHSType, RHSType))
     return Incompatible;
 
   // Disallow assigning a _Complex to a real type in C++ mode since it simply
   // discards the imaginary part.
   if (getLangOpts().CPlusPlus && RHSType->getAs<ComplexType>() &&
       !LHSType->getAs<ComplexType>())
     return Incompatible;
 
   // Arithmetic conversions.
   if (LHSType->isArithmeticType() && RHSType->isArithmeticType() &&
       !(getLangOpts().CPlusPlus && LHSType->isEnumeralType())) {
     if (ConvertRHS)
       Kind = PrepareScalarCast(RHS, LHSType);
     return Compatible;
   }
 
   // Conversions to normal pointers.
   if (const PointerType *LHSPointer = dyn_cast<PointerType>(LHSType)) {
     // U* -> T*
     if (isa<PointerType>(RHSType)) {
       LangAS AddrSpaceL = LHSPointer->getPointeeType().getAddressSpace();
       LangAS AddrSpaceR = RHSType->getPointeeType().getAddressSpace();
       Kind = AddrSpaceL != AddrSpaceR ? CK_AddressSpaceConversion : CK_BitCast;
       return checkPointerTypesForAssignment(*this, LHSType, RHSType);
     }
 
     // int -> T*
     if (RHSType->isIntegerType()) {
       Kind = CK_IntegralToPointer; // FIXME: null?
       return IntToPointer;
     }
 
     // C pointers are not compatible with ObjC object pointers,
     // with two exceptions:
     if (isa<ObjCObjectPointerType>(RHSType)) {
       //  - conversions to void*
       if (LHSPointer->getPointeeType()->isVoidType()) {
         Kind = CK_BitCast;
         return Compatible;
       }
 
       //  - conversions from 'Class' to the redefinition type
       if (RHSType->isObjCClassType() &&
           Context.hasSameType(LHSType, 
                               Context.getObjCClassRedefinitionType())) {
         Kind = CK_BitCast;
         return Compatible;
       }
 
       Kind = CK_BitCast;
       return IncompatiblePointer;
     }
 
     // U^ -> void*
     if (RHSType->getAs<BlockPointerType>()) {
       if (LHSPointer->getPointeeType()->isVoidType()) {
         LangAS AddrSpaceL = LHSPointer->getPointeeType().getAddressSpace();
         LangAS AddrSpaceR = RHSType->getAs<BlockPointerType>()
                                 ->getPointeeType()
                                 .getAddressSpace();
         Kind =
             AddrSpaceL != AddrSpaceR ? CK_AddressSpaceConversion : CK_BitCast;
         return Compatible;
       }
     }
 
     return Incompatible;
   }
 
   // Conversions to block pointers.
   if (isa<BlockPointerType>(LHSType)) {
     // U^ -> T^
     if (RHSType->isBlockPointerType()) {
       LangAS AddrSpaceL = LHSType->getAs<BlockPointerType>()
                               ->getPointeeType()
                               .getAddressSpace();
       LangAS AddrSpaceR = RHSType->getAs<BlockPointerType>()
                               ->getPointeeType()
                               .getAddressSpace();
       Kind = AddrSpaceL != AddrSpaceR ? CK_AddressSpaceConversion : CK_BitCast;
       return checkBlockPointerTypesForAssignment(*this, LHSType, RHSType);
     }
 
     // int or null -> T^
     if (RHSType->isIntegerType()) {
       Kind = CK_IntegralToPointer; // FIXME: null
       return IntToBlockPointer;
     }
 
     // id -> T^
     if (getLangOpts().ObjC1 && RHSType->isObjCIdType()) {
       Kind = CK_AnyPointerToBlockPointerCast;
       return Compatible;
     }
 
     // void* -> T^
     if (const PointerType *RHSPT = RHSType->getAs<PointerType>())
       if (RHSPT->getPointeeType()->isVoidType()) {
         Kind = CK_AnyPointerToBlockPointerCast;
         return Compatible;
       }
 
     return Incompatible;
   }
 
   // Conversions to Objective-C pointers.
   if (isa<ObjCObjectPointerType>(LHSType)) {
     // A* -> B*
     if (RHSType->isObjCObjectPointerType()) {
       Kind = CK_BitCast;
       Sema::AssignConvertType result = 
         checkObjCPointerTypesForAssignment(*this, LHSType, RHSType);
       if (getLangOpts().allowsNonTrivialObjCLifetimeQualifiers() &&
           result == Compatible && 
           !CheckObjCARCUnavailableWeakConversion(OrigLHSType, RHSType))
         result = IncompatibleObjCWeakRef;
       return result;
     }
 
     // int or null -> A*
     if (RHSType->isIntegerType()) {
       Kind = CK_IntegralToPointer; // FIXME: null
       return IntToPointer;
     }
 
     // In general, C pointers are not compatible with ObjC object pointers,
     // with two exceptions:
     if (isa<PointerType>(RHSType)) {
       Kind = CK_CPointerToObjCPointerCast;
 
       //  - conversions from 'void*'
       if (RHSType->isVoidPointerType()) {
         return Compatible;
       }
 
       //  - conversions to 'Class' from its redefinition type
       if (LHSType->isObjCClassType() &&
           Context.hasSameType(RHSType, 
                               Context.getObjCClassRedefinitionType())) {
         return Compatible;
       }
 
       return IncompatiblePointer;
     }
 
     // Only under strict condition T^ is compatible with an Objective-C pointer.
     if (RHSType->isBlockPointerType() && 
         LHSType->isBlockCompatibleObjCPointerType(Context)) {
       if (ConvertRHS)
         maybeExtendBlockObject(RHS);
       Kind = CK_BlockPointerToObjCPointerCast;
       return Compatible;
     }
 
     return Incompatible;
   }
 
   // Conversions from pointers that are not covered by the above.
   if (isa<PointerType>(RHSType)) {
     // T* -> _Bool
     if (LHSType == Context.BoolTy) {
       Kind = CK_PointerToBoolean;
       return Compatible;
     }
 
     // T* -> int
     if (LHSType->isIntegerType()) {
       Kind = CK_PointerToIntegral;
       return PointerToInt;
     }
 
     return Incompatible;
   }
 
   // Conversions from Objective-C pointers that are not covered by the above.
   if (isa<ObjCObjectPointerType>(RHSType)) {
     // T* -> _Bool
     if (LHSType == Context.BoolTy) {
       Kind = CK_PointerToBoolean;
       return Compatible;
     }
 
     // T* -> int
     if (LHSType->isIntegerType()) {
       Kind = CK_PointerToIntegral;
       return PointerToInt;
     }
 
     return Incompatible;
   }
 
   // struct A -> struct B
   if (isa<TagType>(LHSType) && isa<TagType>(RHSType)) {
     if (Context.typesAreCompatible(LHSType, RHSType)) {
       Kind = CK_NoOp;
       return Compatible;
     }
   }
 
   if (LHSType->isSamplerT() && RHSType->isIntegerType()) {
     Kind = CK_IntToOCLSampler;
     return Compatible;
   }
 
   return Incompatible;
 }
 
 /// \brief Constructs a transparent union from an expression that is
 /// used to initialize the transparent union.
 static void ConstructTransparentUnion(Sema &S, ASTContext &C,
                                       ExprResult &EResult, QualType UnionType,
                                       FieldDecl *Field) {
   // Build an initializer list that designates the appropriate member
   // of the transparent union.
   Expr *E = EResult.get();
   InitListExpr *Initializer = new (C) InitListExpr(C, SourceLocation(),
                                                    E, SourceLocation());
   Initializer->setType(UnionType);
   Initializer->setInitializedFieldInUnion(Field);
 
   // Build a compound literal constructing a value of the transparent
   // union type from this initializer list.
   TypeSourceInfo *unionTInfo = C.getTrivialTypeSourceInfo(UnionType);
   EResult = new (C) CompoundLiteralExpr(SourceLocation(), unionTInfo, UnionType,
                                         VK_RValue, Initializer, false);
 }
 
 Sema::AssignConvertType
 Sema::CheckTransparentUnionArgumentConstraints(QualType ArgType,
                                                ExprResult &RHS) {
   QualType RHSType = RHS.get()->getType();
 
   // If the ArgType is a Union type, we want to handle a potential
   // transparent_union GCC extension.
   const RecordType *UT = ArgType->getAsUnionType();
   if (!UT || !UT->getDecl()->hasAttr<TransparentUnionAttr>())
     return Incompatible;
 
   // The field to initialize within the transparent union.
   RecordDecl *UD = UT->getDecl();
   FieldDecl *InitField = nullptr;
   // It's compatible if the expression matches any of the fields.
   for (auto *it : UD->fields()) {
     if (it->getType()->isPointerType()) {
       // If the transparent union contains a pointer type, we allow:
       // 1) void pointer
       // 2) null pointer constant
       if (RHSType->isPointerType())
         if (RHSType->castAs<PointerType>()->getPointeeType()->isVoidType()) {
           RHS = ImpCastExprToType(RHS.get(), it->getType(), CK_BitCast);
           InitField = it;
           break;
         }
 
       if (RHS.get()->isNullPointerConstant(Context,
                                            Expr::NPC_ValueDependentIsNull)) {
         RHS = ImpCastExprToType(RHS.get(), it->getType(),
                                 CK_NullToPointer);
         InitField = it;
         break;
       }
     }
 
     CastKind Kind;
     if (CheckAssignmentConstraints(it->getType(), RHS, Kind)
           == Compatible) {
       RHS = ImpCastExprToType(RHS.get(), it->getType(), Kind);
       InitField = it;
       break;
     }
   }
 
   if (!InitField)
     return Incompatible;
 
   ConstructTransparentUnion(*this, Context, RHS, ArgType, InitField);
   return Compatible;
 }
 
 Sema::AssignConvertType
 Sema::CheckSingleAssignmentConstraints(QualType LHSType, ExprResult &CallerRHS,
                                        bool Diagnose,
                                        bool DiagnoseCFAudited,
                                        bool ConvertRHS) {
   // We need to be able to tell the caller whether we diagnosed a problem, if
   // they ask us to issue diagnostics.
   assert((ConvertRHS || !Diagnose) && "can't indicate whether we diagnosed");
 
   // If ConvertRHS is false, we want to leave the caller's RHS untouched. Sadly,
   // we can't avoid *all* modifications at the moment, so we need some somewhere
   // to put the updated value.
   ExprResult LocalRHS = CallerRHS;
   ExprResult &RHS = ConvertRHS ? CallerRHS : LocalRHS;
 
   if (getLangOpts().CPlusPlus) {
     if (!LHSType->isRecordType() && !LHSType->isAtomicType()) {
       // C++ 5.17p3: If the left operand is not of class type, the
       // expression is implicitly converted (C++ 4) to the
       // cv-unqualified type of the left operand.
       QualType RHSType = RHS.get()->getType();
       if (Diagnose) {
         RHS = PerformImplicitConversion(RHS.get(), LHSType.getUnqualifiedType(),
                                         AA_Assigning);
       } else {
         ImplicitConversionSequence ICS =
             TryImplicitConversion(RHS.get(), LHSType.getUnqualifiedType(),
                                   /*SuppressUserConversions=*/false,
                                   /*AllowExplicit=*/false,
                                   /*InOverloadResolution=*/false,
                                   /*CStyle=*/false,
                                   /*AllowObjCWritebackConversion=*/false);
         if (ICS.isFailure())
           return Incompatible;
         RHS = PerformImplicitConversion(RHS.get(), LHSType.getUnqualifiedType(),
                                         ICS, AA_Assigning);
       }
       if (RHS.isInvalid())
         return Incompatible;
       Sema::AssignConvertType result = Compatible;
       if (getLangOpts().allowsNonTrivialObjCLifetimeQualifiers() &&
           !CheckObjCARCUnavailableWeakConversion(LHSType, RHSType))
         result = IncompatibleObjCWeakRef;
       return result;
     }
 
     // FIXME: Currently, we fall through and treat C++ classes like C
     // structures.
     // FIXME: We also fall through for atomics; not sure what should
     // happen there, though.
   } else if (RHS.get()->getType() == Context.OverloadTy) {
     // As a set of extensions to C, we support overloading on functions. These
     // functions need to be resolved here.
     DeclAccessPair DAP;
     if (FunctionDecl *FD = ResolveAddressOfOverloadedFunction(
             RHS.get(), LHSType, /*Complain=*/false, DAP))
       RHS = FixOverloadedFunctionReference(RHS.get(), DAP, FD);
     else
       return Incompatible;
   }
 
   // C99 6.5.16.1p1: the left operand is a pointer and the right is
   // a null pointer constant.
   if ((LHSType->isPointerType() || LHSType->isObjCObjectPointerType() ||
        LHSType->isBlockPointerType()) &&
       RHS.get()->isNullPointerConstant(Context,
                                        Expr::NPC_ValueDependentIsNull)) {
     if (Diagnose || ConvertRHS) {
       CastKind Kind;
       CXXCastPath Path;
       CheckPointerConversion(RHS.get(), LHSType, Kind, Path,
                              /*IgnoreBaseAccess=*/false, Diagnose);
       if (ConvertRHS)
         RHS = ImpCastExprToType(RHS.get(), LHSType, Kind, VK_RValue, &Path);
     }
     return Compatible;
   }
 
   // This check seems unnatural, however it is necessary to ensure the proper
   // conversion of functions/arrays. If the conversion were done for all
   // DeclExpr's (created by ActOnIdExpression), it would mess up the unary
   // expressions that suppress this implicit conversion (&, sizeof).
   //
   // Suppress this for references: C++ 8.5.3p5.
   if (!LHSType->isReferenceType()) {
     // FIXME: We potentially allocate here even if ConvertRHS is false.
     RHS = DefaultFunctionArrayLvalueConversion(RHS.get(), Diagnose);
     if (RHS.isInvalid())
       return Incompatible;
   }
 
   Expr *PRE = RHS.get()->IgnoreParenCasts();
   if (Diagnose && isa<ObjCProtocolExpr>(PRE)) {
     ObjCProtocolDecl *PDecl = cast<ObjCProtocolExpr>(PRE)->getProtocol();
     if (PDecl && !PDecl->hasDefinition()) {
       Diag(PRE->getExprLoc(), diag::warn_atprotocol_protocol) << PDecl->getName();
       Diag(PDecl->getLocation(), diag::note_entity_declared_at) << PDecl;
     }
   }
   
   CastKind Kind;
   Sema::AssignConvertType result =
     CheckAssignmentConstraints(LHSType, RHS, Kind, ConvertRHS);
 
   // C99 6.5.16.1p2: The value of the right operand is converted to the
   // type of the assignment expression.
   // CheckAssignmentConstraints allows the left-hand side to be a reference,
   // so that we can use references in built-in functions even in C.
   // The getNonReferenceType() call makes sure that the resulting expression
   // does not have reference type.
   if (result != Incompatible && RHS.get()->getType() != LHSType) {
     QualType Ty = LHSType.getNonLValueExprType(Context);
     Expr *E = RHS.get();
 
     // Check for various Objective-C errors. If we are not reporting
     // diagnostics and just checking for errors, e.g., during overload
     // resolution, return Incompatible to indicate the failure.
     if (getLangOpts().allowsNonTrivialObjCLifetimeQualifiers() &&
         CheckObjCConversion(SourceRange(), Ty, E, CCK_ImplicitConversion,
                             Diagnose, DiagnoseCFAudited) != ACR_okay) {
       if (!Diagnose)
         return Incompatible;
     }
     if (getLangOpts().ObjC1 &&
         (CheckObjCBridgeRelatedConversions(E->getLocStart(), LHSType,
                                            E->getType(), E, Diagnose) ||
          ConversionToObjCStringLiteralCheck(LHSType, E, Diagnose))) {
       if (!Diagnose)
         return Incompatible;
       // Replace the expression with a corrected version and continue so we
       // can find further errors.
       RHS = E;
       return Compatible;
     }
     
     if (ConvertRHS)
       RHS = ImpCastExprToType(E, Ty, Kind);
   }
   return result;
 }
 
 QualType Sema::InvalidOperands(SourceLocation Loc, ExprResult &LHS,
                                ExprResult &RHS) {
   Diag(Loc, diag::err_typecheck_invalid_operands)
     << LHS.get()->getType() << RHS.get()->getType()
     << LHS.get()->getSourceRange() << RHS.get()->getSourceRange();
   return QualType();
 }
 
 // Diagnose cases where a scalar was implicitly converted to a vector and
 // diagnose the underlying types. Otherwise, diagnose the error
 // as invalid vector logical operands for non-C++ cases.
 QualType Sema::InvalidLogicalVectorOperands(SourceLocation Loc, ExprResult &LHS,
                                             ExprResult &RHS) {
   QualType LHSType = LHS.get()->IgnoreImpCasts()->getType();
   QualType RHSType = RHS.get()->IgnoreImpCasts()->getType();
 
   bool LHSNatVec = LHSType->isVectorType();
   bool RHSNatVec = RHSType->isVectorType();
 
   if (!(LHSNatVec && RHSNatVec)) {
     Expr *Vector = LHSNatVec ? LHS.get() : RHS.get();
     Expr *NonVector = !LHSNatVec ? LHS.get() : RHS.get();
     Diag(Loc, diag::err_typecheck_logical_vector_expr_gnu_cpp_restrict)
         << 0 << Vector->getType() << NonVector->IgnoreImpCasts()->getType()
         << Vector->getSourceRange();
     return QualType();
   }
 
   Diag(Loc, diag::err_typecheck_logical_vector_expr_gnu_cpp_restrict)
       << 1 << LHSType << RHSType << LHS.get()->getSourceRange()
       << RHS.get()->getSourceRange();
 
   return QualType();
 }
 
 /// Try to convert a value of non-vector type to a vector type by converting
 /// the type to the element type of the vector and then performing a splat.
 /// If the language is OpenCL, we only use conversions that promote scalar
 /// rank; for C, Obj-C, and C++ we allow any real scalar conversion except
 /// for float->int.
 ///
 /// OpenCL V2.0 6.2.6.p2:
 /// An error shall occur if any scalar operand type has greater rank
 /// than the type of the vector element.
 ///
 /// \param scalar - if non-null, actually perform the conversions
 /// \return true if the operation fails (but without diagnosing the failure)
 static bool tryVectorConvertAndSplat(Sema &S, ExprResult *scalar,
                                      QualType scalarTy,
                                      QualType vectorEltTy,
                                      QualType vectorTy,
                                      unsigned &DiagID) {
   // The conversion to apply to the scalar before splatting it,
   // if necessary.
   CastKind scalarCast = CK_NoOp;
   
   if (vectorEltTy->isIntegralType(S.Context)) {
     if (S.getLangOpts().OpenCL && (scalarTy->isRealFloatingType() ||
         (scalarTy->isIntegerType() &&
          S.Context.getIntegerTypeOrder(vectorEltTy, scalarTy) < 0))) {
       DiagID = diag::err_opencl_scalar_type_rank_greater_than_vector_type;
       return true;
     }
     if (!scalarTy->isIntegralType(S.Context))
       return true;
     scalarCast = CK_IntegralCast;
   } else if (vectorEltTy->isRealFloatingType()) {
     if (scalarTy->isRealFloatingType()) {
       if (S.getLangOpts().OpenCL &&
           S.Context.getFloatingTypeOrder(vectorEltTy, scalarTy) < 0) {
         DiagID = diag::err_opencl_scalar_type_rank_greater_than_vector_type;
         return true;
       }
       scalarCast = CK_FloatingCast;
     }
     else if (scalarTy->isIntegralType(S.Context))
       scalarCast = CK_IntegralToFloating;
     else
       return true;
   } else {
     return true;
   }
 
   // Adjust scalar if desired.
   if (scalar) {
     if (scalarCast != CK_NoOp)
       *scalar = S.ImpCastExprToType(scalar->get(), vectorEltTy, scalarCast);
     *scalar = S.ImpCastExprToType(scalar->get(), vectorTy, CK_VectorSplat);
   }
   return false;
 }
 
 /// Convert vector E to a vector with the same number of elements but different
 /// element type.
 static ExprResult convertVector(Expr *E, QualType ElementType, Sema &S) {
   const auto *VecTy = E->getType()->getAs<VectorType>();
   assert(VecTy && "Expression E must be a vector");
   QualType NewVecTy = S.Context.getVectorType(ElementType,
                                               VecTy->getNumElements(),
                                               VecTy->getVectorKind());
 
   // Look through the implicit cast. Return the subexpression if its type is
   // NewVecTy.
   if (auto *ICE = dyn_cast<ImplicitCastExpr>(E))
     if (ICE->getSubExpr()->getType() == NewVecTy)
       return ICE->getSubExpr();
 
   auto Cast = ElementType->isIntegerType() ? CK_IntegralCast : CK_FloatingCast;
   return S.ImpCastExprToType(E, NewVecTy, Cast);
 }
 
 /// Test if a (constant) integer Int can be casted to another integer type
 /// IntTy without losing precision.
 static bool canConvertIntToOtherIntTy(Sema &S, ExprResult *Int,
                                       QualType OtherIntTy) {
   QualType IntTy = Int->get()->getType().getUnqualifiedType();
 
   // Reject cases where the value of the Int is unknown as that would
   // possibly cause truncation, but accept cases where the scalar can be
   // demoted without loss of precision.
   llvm::APSInt Result;
   bool CstInt = Int->get()->EvaluateAsInt(Result, S.Context);
   int Order = S.Context.getIntegerTypeOrder(OtherIntTy, IntTy);
   bool IntSigned = IntTy->hasSignedIntegerRepresentation();
   bool OtherIntSigned = OtherIntTy->hasSignedIntegerRepresentation();
 
   if (CstInt) {
     // If the scalar is constant and is of a higher order and has more active
     // bits that the vector element type, reject it.
     unsigned NumBits = IntSigned
                            ? (Result.isNegative() ? Result.getMinSignedBits()
                                                   : Result.getActiveBits())
                            : Result.getActiveBits();
     if (Order < 0 && S.Context.getIntWidth(OtherIntTy) < NumBits)
       return true;
 
     // If the signedness of the scalar type and the vector element type
     // differs and the number of bits is greater than that of the vector
     // element reject it.
     return (IntSigned != OtherIntSigned &&
             NumBits > S.Context.getIntWidth(OtherIntTy));
   }
 
   // Reject cases where the value of the scalar is not constant and it's
   // order is greater than that of the vector element type.
   return (Order < 0);
 }
 
 /// Test if a (constant) integer Int can be casted to floating point type
 /// FloatTy without losing precision.
 static bool canConvertIntTyToFloatTy(Sema &S, ExprResult *Int,
                                      QualType FloatTy) {
   QualType IntTy = Int->get()->getType().getUnqualifiedType();
 
   // Determine if the integer constant can be expressed as a floating point
   // number of the appropiate type.
   llvm::APSInt Result;
   bool CstInt = Int->get()->EvaluateAsInt(Result, S.Context);
   uint64_t Bits = 0;
   if (CstInt) {
     // Reject constants that would be truncated if they were converted to
     // the floating point type. Test by simple to/from conversion.
     // FIXME: Ideally the conversion to an APFloat and from an APFloat
     //        could be avoided if there was a convertFromAPInt method
     //        which could signal back if implicit truncation occurred.
     llvm::APFloat Float(S.Context.getFloatTypeSemantics(FloatTy));
     Float.convertFromAPInt(Result, IntTy->hasSignedIntegerRepresentation(),
                            llvm::APFloat::rmTowardZero);
     llvm::APSInt ConvertBack(S.Context.getIntWidth(IntTy),
                              !IntTy->hasSignedIntegerRepresentation());
     bool Ignored = false;
     Float.convertToInteger(ConvertBack, llvm::APFloat::rmNearestTiesToEven,
                            &Ignored);
     if (Result != ConvertBack)
       return true;
   } else {
     // Reject types that cannot be fully encoded into the mantissa of
     // the float.
     Bits = S.Context.getTypeSize(IntTy);
     unsigned FloatPrec = llvm::APFloat::semanticsPrecision(
         S.Context.getFloatTypeSemantics(FloatTy));
     if (Bits > FloatPrec)
       return true;
   }
 
   return false;
 }
 
 /// Attempt to convert and splat Scalar into a vector whose types matches
 /// Vector following GCC conversion rules. The rule is that implicit
 /// conversion can occur when Scalar can be casted to match Vector's element
 /// type without causing truncation of Scalar.
 static bool tryGCCVectorConvertAndSplat(Sema &S, ExprResult *Scalar,
                                         ExprResult *Vector) {
   QualType ScalarTy = Scalar->get()->getType().getUnqualifiedType();
   QualType VectorTy = Vector->get()->getType().getUnqualifiedType();
   const VectorType *VT = VectorTy->getAs<VectorType>();
 
   assert(!isa<ExtVectorType>(VT) &&
          "ExtVectorTypes should not be handled here!");
 
   QualType VectorEltTy = VT->getElementType();
 
   // Reject cases where the vector element type or the scalar element type are
   // not integral or floating point types.
   if (!VectorEltTy->isArithmeticType() || !ScalarTy->isArithmeticType())
     return true;
 
   // The conversion to apply to the scalar before splatting it,
   // if necessary.
   CastKind ScalarCast = CK_NoOp;
 
   // Accept cases where the vector elements are integers and the scalar is
   // an integer.
   // FIXME: Notionally if the scalar was a floating point value with a precise
   //        integral representation, we could cast it to an appropriate integer
   //        type and then perform the rest of the checks here. GCC will perform
   //        this conversion in some cases as determined by the input language.
   //        We should accept it on a language independent basis.
   if (VectorEltTy->isIntegralType(S.Context) &&
       ScalarTy->isIntegralType(S.Context) &&
       S.Context.getIntegerTypeOrder(VectorEltTy, ScalarTy)) {
 
     if (canConvertIntToOtherIntTy(S, Scalar, VectorEltTy))
       return true;
 
     ScalarCast = CK_IntegralCast;
   } else if (VectorEltTy->isRealFloatingType()) {
     if (ScalarTy->isRealFloatingType()) {
 
       // Reject cases where the scalar type is not a constant and has a higher
       // Order than the vector element type.
       llvm::APFloat Result(0.0);
       bool CstScalar = Scalar->get()->EvaluateAsFloat(Result, S.Context);
       int Order = S.Context.getFloatingTypeOrder(VectorEltTy, ScalarTy);
       if (!CstScalar && Order < 0)
         return true;
 
       // If the scalar cannot be safely casted to the vector element type,
       // reject it.
       if (CstScalar) {
         bool Truncated = false;
         Result.convert(S.Context.getFloatTypeSemantics(VectorEltTy),
                        llvm::APFloat::rmNearestTiesToEven, &Truncated);
         if (Truncated)
           return true;
       }
 
       ScalarCast = CK_FloatingCast;
     } else if (ScalarTy->isIntegralType(S.Context)) {
       if (canConvertIntTyToFloatTy(S, Scalar, VectorEltTy))
         return true;
 
       ScalarCast = CK_IntegralToFloating;
     } else
       return true;
   }
 
   // Adjust scalar if desired.
   if (Scalar) {
     if (ScalarCast != CK_NoOp)
       *Scalar = S.ImpCastExprToType(Scalar->get(), VectorEltTy, ScalarCast);
     *Scalar = S.ImpCastExprToType(Scalar->get(), VectorTy, CK_VectorSplat);
   }
   return false;
 }
 
 QualType Sema::CheckVectorOperands(ExprResult &LHS, ExprResult &RHS,
                                    SourceLocation Loc, bool IsCompAssign,
                                    bool AllowBothBool,
                                    bool AllowBoolConversions) {
   if (!IsCompAssign) {
     LHS = DefaultFunctionArrayLvalueConversion(LHS.get());
     if (LHS.isInvalid())
       return QualType();
   }
   RHS = DefaultFunctionArrayLvalueConversion(RHS.get());
   if (RHS.isInvalid())
     return QualType();
 
   // For conversion purposes, we ignore any qualifiers.
   // For example, "const float" and "float" are equivalent.
   QualType LHSType = LHS.get()->getType().getUnqualifiedType();
   QualType RHSType = RHS.get()->getType().getUnqualifiedType();
 
   const VectorType *LHSVecType = LHSType->getAs<VectorType>();
   const VectorType *RHSVecType = RHSType->getAs<VectorType>();
   assert(LHSVecType || RHSVecType);
 
   // AltiVec-style "vector bool op vector bool" combinations are allowed
   // for some operators but not others.
   if (!AllowBothBool &&
       LHSVecType && LHSVecType->getVectorKind() == VectorType::AltiVecBool &&
       RHSVecType && RHSVecType->getVectorKind() == VectorType::AltiVecBool)
     return InvalidOperands(Loc, LHS, RHS);
 
   // If the vector types are identical, return.
   if (Context.hasSameType(LHSType, RHSType))
     return LHSType;
 
   // If we have compatible AltiVec and GCC vector types, use the AltiVec type.
   if (LHSVecType && RHSVecType &&
       Context.areCompatibleVectorTypes(LHSType, RHSType)) {
     if (isa<ExtVectorType>(LHSVecType)) {
       RHS = ImpCastExprToType(RHS.get(), LHSType, CK_BitCast);
       return LHSType;
     }
 
     if (!IsCompAssign)
       LHS = ImpCastExprToType(LHS.get(), RHSType, CK_BitCast);
     return RHSType;
   }
 
   // AllowBoolConversions says that bool and non-bool AltiVec vectors
   // can be mixed, with the result being the non-bool type.  The non-bool
   // operand must have integer element type.
   if (AllowBoolConversions && LHSVecType && RHSVecType &&
       LHSVecType->getNumElements() == RHSVecType->getNumElements() &&
       (Context.getTypeSize(LHSVecType->getElementType()) ==
        Context.getTypeSize(RHSVecType->getElementType()))) {
     if (LHSVecType->getVectorKind() == VectorType::AltiVecVector &&
         LHSVecType->getElementType()->isIntegerType() &&
         RHSVecType->getVectorKind() == VectorType::AltiVecBool) {
       RHS = ImpCastExprToType(RHS.get(), LHSType, CK_BitCast);
       return LHSType;
     }
     if (!IsCompAssign &&
         LHSVecType->getVectorKind() == VectorType::AltiVecBool &&
         RHSVecType->getVectorKind() == VectorType::AltiVecVector &&
         RHSVecType->getElementType()->isIntegerType()) {
       LHS = ImpCastExprToType(LHS.get(), RHSType, CK_BitCast);
       return RHSType;
     }
   }
 
   // If there's a vector type and a scalar, try to convert the scalar to
   // the vector element type and splat.
   unsigned DiagID = diag::err_typecheck_vector_not_convertable;
   if (!RHSVecType) {
     if (isa<ExtVectorType>(LHSVecType)) {
       if (!tryVectorConvertAndSplat(*this, &RHS, RHSType,
                                     LHSVecType->getElementType(), LHSType,
                                     DiagID))
         return LHSType;
     } else {
       if (!tryGCCVectorConvertAndSplat(*this, &RHS, &LHS))
         return LHSType;
     }
   }
   if (!LHSVecType) {
     if (isa<ExtVectorType>(RHSVecType)) {
       if (!tryVectorConvertAndSplat(*this, (IsCompAssign ? nullptr : &LHS),
                                     LHSType, RHSVecType->getElementType(),
                                     RHSType, DiagID))
         return RHSType;
     } else {
       if (LHS.get()->getValueKind() == VK_LValue ||
           !tryGCCVectorConvertAndSplat(*this, &LHS, &RHS))
         return RHSType;
     }
   }
 
   // FIXME: The code below also handles conversion between vectors and
   // non-scalars, we should break this down into fine grained specific checks
   // and emit proper diagnostics.
   QualType VecType = LHSVecType ? LHSType : RHSType;
   const VectorType *VT = LHSVecType ? LHSVecType : RHSVecType;
   QualType OtherType = LHSVecType ? RHSType : LHSType;
   ExprResult *OtherExpr = LHSVecType ? &RHS : &LHS;
   if (isLaxVectorConversion(OtherType, VecType)) {
     // If we're allowing lax vector conversions, only the total (data) size
     // needs to be the same. For non compound assignment, if one of the types is
     // scalar, the result is always the vector type.
     if (!IsCompAssign) {
       *OtherExpr = ImpCastExprToType(OtherExpr->get(), VecType, CK_BitCast);
       return VecType;
     // In a compound assignment, lhs += rhs, 'lhs' is a lvalue src, forbidding
     // any implicit cast. Here, the 'rhs' should be implicit casted to 'lhs'
     // type. Note that this is already done by non-compound assignments in
     // CheckAssignmentConstraints. If it's a scalar type, only bitcast for
     // <1 x T> -> T. The result is also a vector type.
     } else if (OtherType->isExtVectorType() || OtherType->isVectorType() ||
                (OtherType->isScalarType() && VT->getNumElements() == 1)) {
       ExprResult *RHSExpr = &RHS;
       *RHSExpr = ImpCastExprToType(RHSExpr->get(), LHSType, CK_BitCast);
       return VecType;
     }
   }
 
   // Okay, the expression is invalid.
 
   // If there's a non-vector, non-real operand, diagnose that.
   if ((!RHSVecType && !RHSType->isRealType()) ||
       (!LHSVecType && !LHSType->isRealType())) {
     Diag(Loc, diag::err_typecheck_vector_not_convertable_non_scalar)
       << LHSType << RHSType
       << LHS.get()->getSourceRange() << RHS.get()->getSourceRange();
     return QualType();
   }
 
   // OpenCL V1.1 6.2.6.p1:
   // If the operands are of more than one vector type, then an error shall
   // occur. Implicit conversions between vector types are not permitted, per
   // section 6.2.1.
   if (getLangOpts().OpenCL &&
       RHSVecType && isa<ExtVectorType>(RHSVecType) &&
       LHSVecType && isa<ExtVectorType>(LHSVecType)) {
     Diag(Loc, diag::err_opencl_implicit_vector_conversion) << LHSType
                                                            << RHSType;
     return QualType();
   }
 
 
   // If there is a vector type that is not a ExtVector and a scalar, we reach
   // this point if scalar could not be converted to the vector's element type
   // without truncation.
   if ((RHSVecType && !isa<ExtVectorType>(RHSVecType)) ||
       (LHSVecType && !isa<ExtVectorType>(LHSVecType))) {
     QualType Scalar = LHSVecType ? RHSType : LHSType;
     QualType Vector = LHSVecType ? LHSType : RHSType;
     unsigned ScalarOrVector = LHSVecType && RHSVecType ? 1 : 0;
     Diag(Loc,
          diag::err_typecheck_vector_not_convertable_implict_truncation)
         << ScalarOrVector << Scalar << Vector;
 
     return QualType();
   }
 
   // Otherwise, use the generic diagnostic.
   Diag(Loc, DiagID)
     << LHSType << RHSType
     << LHS.get()->getSourceRange() << RHS.get()->getSourceRange();
   return QualType();
 }
 
 // checkArithmeticNull - Detect when a NULL constant is used improperly in an
 // expression.  These are mainly cases where the null pointer is used as an
 // integer instead of a pointer.
 static void checkArithmeticNull(Sema &S, ExprResult &LHS, ExprResult &RHS,
                                 SourceLocation Loc, bool IsCompare) {
   // The canonical way to check for a GNU null is with isNullPointerConstant,
   // but we use a bit of a hack here for speed; this is a relatively
   // hot path, and isNullPointerConstant is slow.
   bool LHSNull = isa<GNUNullExpr>(LHS.get()->IgnoreParenImpCasts());
   bool RHSNull = isa<GNUNullExpr>(RHS.get()->IgnoreParenImpCasts());
 
   QualType NonNullType = LHSNull ? RHS.get()->getType() : LHS.get()->getType();
 
   // Avoid analyzing cases where the result will either be invalid (and
   // diagnosed as such) or entirely valid and not something to warn about.
   if ((!LHSNull && !RHSNull) || NonNullType->isBlockPointerType() ||
       NonNullType->isMemberPointerType() || NonNullType->isFunctionType())
     return;
 
   // Comparison operations would not make sense with a null pointer no matter
   // what the other expression is.
   if (!IsCompare) {
     S.Diag(Loc, diag::warn_null_in_arithmetic_operation)
         << (LHSNull ? LHS.get()->getSourceRange() : SourceRange())
         << (RHSNull ? RHS.get()->getSourceRange() : SourceRange());
     return;
   }
 
   // The rest of the operations only make sense with a null pointer
   // if the other expression is a pointer.
   if (LHSNull == RHSNull || NonNullType->isAnyPointerType() ||
       NonNullType->canDecayToPointerType())
     return;
 
   S.Diag(Loc, diag::warn_null_in_comparison_operation)
       << LHSNull /* LHS is NULL */ << NonNullType
       << LHS.get()->getSourceRange() << RHS.get()->getSourceRange();
 }
 
 static void DiagnoseBadDivideOrRemainderValues(Sema& S, ExprResult &LHS,
                                                ExprResult &RHS,
                                                SourceLocation Loc, bool IsDiv) {
   // Check for division/remainder by zero.
   llvm::APSInt RHSValue;
   if (!RHS.get()->isValueDependent() &&
       RHS.get()->EvaluateAsInt(RHSValue, S.Context) && RHSValue == 0)
     S.DiagRuntimeBehavior(Loc, RHS.get(),
                           S.PDiag(diag::warn_remainder_division_by_zero)
                             << IsDiv << RHS.get()->getSourceRange());
 }
 
 QualType Sema::CheckMultiplyDivideOperands(ExprResult &LHS, ExprResult &RHS,
                                            SourceLocation Loc,
                                            bool IsCompAssign, bool IsDiv) {
   checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/false);
 
   if (LHS.get()->getType()->isVectorType() ||
       RHS.get()->getType()->isVectorType())
     return CheckVectorOperands(LHS, RHS, Loc, IsCompAssign,
                                /*AllowBothBool*/getLangOpts().AltiVec,
                                /*AllowBoolConversions*/false);
 
   QualType compType = UsualArithmeticConversions(LHS, RHS, IsCompAssign);
   if (LHS.isInvalid() || RHS.isInvalid())
     return QualType();
 
 
   if (compType.isNull() || !compType->isArithmeticType())
     return InvalidOperands(Loc, LHS, RHS);
   if (IsDiv)
     DiagnoseBadDivideOrRemainderValues(*this, LHS, RHS, Loc, IsDiv);
   return compType;
 }
 
 QualType Sema::CheckRemainderOperands(
   ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, bool IsCompAssign) {
   checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/false);
 
   if (LHS.get()->getType()->isVectorType() ||
       RHS.get()->getType()->isVectorType()) {
     if (LHS.get()->getType()->hasIntegerRepresentation() && 
         RHS.get()->getType()->hasIntegerRepresentation())
       return CheckVectorOperands(LHS, RHS, Loc, IsCompAssign,
                                  /*AllowBothBool*/getLangOpts().AltiVec,
                                  /*AllowBoolConversions*/false);
     return InvalidOperands(Loc, LHS, RHS);
   }
 
   QualType compType = UsualArithmeticConversions(LHS, RHS, IsCompAssign);
   if (LHS.isInvalid() || RHS.isInvalid())
     return QualType();
 
   if (compType.isNull() || !compType->isIntegerType())
     return InvalidOperands(Loc, LHS, RHS);
   DiagnoseBadDivideOrRemainderValues(*this, LHS, RHS, Loc, false /* IsDiv */);
   return compType;
 }
 
 /// \brief Diagnose invalid arithmetic on two void pointers.
 static void diagnoseArithmeticOnTwoVoidPointers(Sema &S, SourceLocation Loc,
                                                 Expr *LHSExpr, Expr *RHSExpr) {
   S.Diag(Loc, S.getLangOpts().CPlusPlus
                 ? diag::err_typecheck_pointer_arith_void_type
                 : diag::ext_gnu_void_ptr)
     << 1 /* two pointers */ << LHSExpr->getSourceRange()
                             << RHSExpr->getSourceRange();
 }
 
 /// \brief Diagnose invalid arithmetic on a void pointer.
 static void diagnoseArithmeticOnVoidPointer(Sema &S, SourceLocation Loc,
                                             Expr *Pointer) {
   S.Diag(Loc, S.getLangOpts().CPlusPlus
                 ? diag::err_typecheck_pointer_arith_void_type
                 : diag::ext_gnu_void_ptr)
     << 0 /* one pointer */ << Pointer->getSourceRange();
 }
 
 /// \brief Diagnose invalid arithmetic on a null pointer.
 ///
 /// If \p IsGNUIdiom is true, the operation is using the 'p = (i8*)nullptr + n'
 /// idiom, which we recognize as a GNU extension.
 ///
 static void diagnoseArithmeticOnNullPointer(Sema &S, SourceLocation Loc,
                                             Expr *Pointer, bool IsGNUIdiom) {
   if (IsGNUIdiom)
     S.Diag(Loc, diag::warn_gnu_null_ptr_arith)
       << Pointer->getSourceRange();
   else
     S.Diag(Loc, diag::warn_pointer_arith_null_ptr)
       << S.getLangOpts().CPlusPlus << Pointer->getSourceRange();
 }
 
 /// \brief Diagnose invalid arithmetic on two function pointers.
 static void diagnoseArithmeticOnTwoFunctionPointers(Sema &S, SourceLocation Loc,
                                                     Expr *LHS, Expr *RHS) {
   assert(LHS->getType()->isAnyPointerType());
   assert(RHS->getType()->isAnyPointerType());
   S.Diag(Loc, S.getLangOpts().CPlusPlus
                 ? diag::err_typecheck_pointer_arith_function_type
                 : diag::ext_gnu_ptr_func_arith)
     << 1 /* two pointers */ << LHS->getType()->getPointeeType()
     // We only show the second type if it differs from the first.
     << (unsigned)!S.Context.hasSameUnqualifiedType(LHS->getType(),
                                                    RHS->getType())
     << RHS->getType()->getPointeeType()
     << LHS->getSourceRange() << RHS->getSourceRange();
 }
 
 /// \brief Diagnose invalid arithmetic on a function pointer.
 static void diagnoseArithmeticOnFunctionPointer(Sema &S, SourceLocation Loc,
                                                 Expr *Pointer) {
   assert(Pointer->getType()->isAnyPointerType());
   S.Diag(Loc, S.getLangOpts().CPlusPlus
                 ? diag::err_typecheck_pointer_arith_function_type
                 : diag::ext_gnu_ptr_func_arith)
     << 0 /* one pointer */ << Pointer->getType()->getPointeeType()
     << 0 /* one pointer, so only one type */
     << Pointer->getSourceRange();
 }
 
 /// \brief Emit error if Operand is incomplete pointer type
 ///
 /// \returns True if pointer has incomplete type
 static bool checkArithmeticIncompletePointerType(Sema &S, SourceLocation Loc,
                                                  Expr *Operand) {
   QualType ResType = Operand->getType();
   if (const AtomicType *ResAtomicType = ResType->getAs<AtomicType>())
     ResType = ResAtomicType->getValueType();
 
   assert(ResType->isAnyPointerType() && !ResType->isDependentType());
   QualType PointeeTy = ResType->getPointeeType();
   return S.RequireCompleteType(Loc, PointeeTy,
                                diag::err_typecheck_arithmetic_incomplete_type,
                                PointeeTy, Operand->getSourceRange());
 }
 
 /// \brief Check the validity of an arithmetic pointer operand.
 ///
 /// If the operand has pointer type, this code will check for pointer types
 /// which are invalid in arithmetic operations. These will be diagnosed
 /// appropriately, including whether or not the use is supported as an
 /// extension.
 ///
 /// \returns True when the operand is valid to use (even if as an extension).
 static bool checkArithmeticOpPointerOperand(Sema &S, SourceLocation Loc,
                                             Expr *Operand) {
   QualType ResType = Operand->getType();
   if (const AtomicType *ResAtomicType = ResType->getAs<AtomicType>())
     ResType = ResAtomicType->getValueType();
 
   if (!ResType->isAnyPointerType()) return true;
 
   QualType PointeeTy = ResType->getPointeeType();
   if (PointeeTy->isVoidType()) {
     diagnoseArithmeticOnVoidPointer(S, Loc, Operand);
     return !S.getLangOpts().CPlusPlus;
   }
   if (PointeeTy->isFunctionType()) {
     diagnoseArithmeticOnFunctionPointer(S, Loc, Operand);
     return !S.getLangOpts().CPlusPlus;
   }
 
   if (checkArithmeticIncompletePointerType(S, Loc, Operand)) return false;
 
   return true;
 }
 
 /// \brief Check the validity of a binary arithmetic operation w.r.t. pointer
 /// operands.
 ///
 /// This routine will diagnose any invalid arithmetic on pointer operands much
 /// like \see checkArithmeticOpPointerOperand. However, it has special logic
 /// for emitting a single diagnostic even for operations where both LHS and RHS
 /// are (potentially problematic) pointers.
 ///
 /// \returns True when the operand is valid to use (even if as an extension).
 static bool checkArithmeticBinOpPointerOperands(Sema &S, SourceLocation Loc,
                                                 Expr *LHSExpr, Expr *RHSExpr) {
   bool isLHSPointer = LHSExpr->getType()->isAnyPointerType();
   bool isRHSPointer = RHSExpr->getType()->isAnyPointerType();
   if (!isLHSPointer && !isRHSPointer) return true;
 
   QualType LHSPointeeTy, RHSPointeeTy;
   if (isLHSPointer) LHSPointeeTy = LHSExpr->getType()->getPointeeType();
   if (isRHSPointer) RHSPointeeTy = RHSExpr->getType()->getPointeeType();
 
   // if both are pointers check if operation is valid wrt address spaces
   if (S.getLangOpts().OpenCL && isLHSPointer && isRHSPointer) {
     const PointerType *lhsPtr = LHSExpr->getType()->getAs<PointerType>();
     const PointerType *rhsPtr = RHSExpr->getType()->getAs<PointerType>();
     if (!lhsPtr->isAddressSpaceOverlapping(*rhsPtr)) {
       S.Diag(Loc,
              diag::err_typecheck_op_on_nonoverlapping_address_space_pointers)
           << LHSExpr->getType() << RHSExpr->getType() << 1 /*arithmetic op*/
           << LHSExpr->getSourceRange() << RHSExpr->getSourceRange();
       return false;
     }
   }
 
   // Check for arithmetic on pointers to incomplete types.
   bool isLHSVoidPtr = isLHSPointer && LHSPointeeTy->isVoidType();
   bool isRHSVoidPtr = isRHSPointer && RHSPointeeTy->isVoidType();
   if (isLHSVoidPtr || isRHSVoidPtr) {
     if (!isRHSVoidPtr) diagnoseArithmeticOnVoidPointer(S, Loc, LHSExpr);
     else if (!isLHSVoidPtr) diagnoseArithmeticOnVoidPointer(S, Loc, RHSExpr);
     else diagnoseArithmeticOnTwoVoidPointers(S, Loc, LHSExpr, RHSExpr);
 
     return !S.getLangOpts().CPlusPlus;
   }
 
   bool isLHSFuncPtr = isLHSPointer && LHSPointeeTy->isFunctionType();
   bool isRHSFuncPtr = isRHSPointer && RHSPointeeTy->isFunctionType();
   if (isLHSFuncPtr || isRHSFuncPtr) {
     if (!isRHSFuncPtr) diagnoseArithmeticOnFunctionPointer(S, Loc, LHSExpr);
     else if (!isLHSFuncPtr) diagnoseArithmeticOnFunctionPointer(S, Loc,
                                                                 RHSExpr);
     else diagnoseArithmeticOnTwoFunctionPointers(S, Loc, LHSExpr, RHSExpr);
 
     return !S.getLangOpts().CPlusPlus;
   }
 
   if (isLHSPointer && checkArithmeticIncompletePointerType(S, Loc, LHSExpr))
     return false;
   if (isRHSPointer && checkArithmeticIncompletePointerType(S, Loc, RHSExpr))
     return false;
 
   return true;
 }
 
 /// diagnoseStringPlusInt - Emit a warning when adding an integer to a string
 /// literal.
 static void diagnoseStringPlusInt(Sema &Self, SourceLocation OpLoc,
                                   Expr *LHSExpr, Expr *RHSExpr) {
   StringLiteral* StrExpr = dyn_cast<StringLiteral>(LHSExpr->IgnoreImpCasts());
   Expr* IndexExpr = RHSExpr;
   if (!StrExpr) {
     StrExpr = dyn_cast<StringLiteral>(RHSExpr->IgnoreImpCasts());
     IndexExpr = LHSExpr;
   }
 
   bool IsStringPlusInt = StrExpr &&
       IndexExpr->getType()->isIntegralOrUnscopedEnumerationType();
   if (!IsStringPlusInt || IndexExpr->isValueDependent())
     return;
 
   llvm::APSInt index;
   if (IndexExpr->EvaluateAsInt(index, Self.getASTContext())) {
     unsigned StrLenWithNull = StrExpr->getLength() + 1;
     if (index.isNonNegative() &&
         index <= llvm::APSInt(llvm::APInt(index.getBitWidth(), StrLenWithNull),
                               index.isUnsigned()))
       return;
   }
 
   SourceRange DiagRange(LHSExpr->getLocStart(), RHSExpr->getLocEnd());
   Self.Diag(OpLoc, diag::warn_string_plus_int)
       << DiagRange << IndexExpr->IgnoreImpCasts()->getType();
 
   // Only print a fixit for "str" + int, not for int + "str".
   if (IndexExpr == RHSExpr) {
     SourceLocation EndLoc = Self.getLocForEndOfToken(RHSExpr->getLocEnd());
     Self.Diag(OpLoc, diag::note_string_plus_scalar_silence)
         << FixItHint::CreateInsertion(LHSExpr->getLocStart(), "&")
         << FixItHint::CreateReplacement(SourceRange(OpLoc), "[")
         << FixItHint::CreateInsertion(EndLoc, "]");
   } else
     Self.Diag(OpLoc, diag::note_string_plus_scalar_silence);
 }
 
 /// \brief Emit a warning when adding a char literal to a string.
 static void diagnoseStringPlusChar(Sema &Self, SourceLocation OpLoc,
                                    Expr *LHSExpr, Expr *RHSExpr) {
   const Expr *StringRefExpr = LHSExpr;
   const CharacterLiteral *CharExpr =
       dyn_cast<CharacterLiteral>(RHSExpr->IgnoreImpCasts());
 
   if (!CharExpr) {
     CharExpr = dyn_cast<CharacterLiteral>(LHSExpr->IgnoreImpCasts());
     StringRefExpr = RHSExpr;
   }
 
   if (!CharExpr || !StringRefExpr)
     return;
 
   const QualType StringType = StringRefExpr->getType();
 
   // Return if not a PointerType.
   if (!StringType->isAnyPointerType())
     return;
 
   // Return if not a CharacterType.
   if (!StringType->getPointeeType()->isAnyCharacterType())
     return;
 
   ASTContext &Ctx = Self.getASTContext();
   SourceRange DiagRange(LHSExpr->getLocStart(), RHSExpr->getLocEnd());
 
   const QualType CharType = CharExpr->getType();
   if (!CharType->isAnyCharacterType() &&
       CharType->isIntegerType() &&
       llvm::isUIntN(Ctx.getCharWidth(), CharExpr->getValue())) {
     Self.Diag(OpLoc, diag::warn_string_plus_char)
         << DiagRange << Ctx.CharTy;
   } else {
     Self.Diag(OpLoc, diag::warn_string_plus_char)
         << DiagRange << CharExpr->getType();
   }
 
   // Only print a fixit for str + char, not for char + str.
   if (isa<CharacterLiteral>(RHSExpr->IgnoreImpCasts())) {
     SourceLocation EndLoc = Self.getLocForEndOfToken(RHSExpr->getLocEnd());
     Self.Diag(OpLoc, diag::note_string_plus_scalar_silence)
         << FixItHint::CreateInsertion(LHSExpr->getLocStart(), "&")
         << FixItHint::CreateReplacement(SourceRange(OpLoc), "[")
         << FixItHint::CreateInsertion(EndLoc, "]");
   } else {
     Self.Diag(OpLoc, diag::note_string_plus_scalar_silence);
   }
 }
 
 /// \brief Emit error when two pointers are incompatible.
 static void diagnosePointerIncompatibility(Sema &S, SourceLocation Loc,
                                            Expr *LHSExpr, Expr *RHSExpr) {
   assert(LHSExpr->getType()->isAnyPointerType());
   assert(RHSExpr->getType()->isAnyPointerType());
   S.Diag(Loc, diag::err_typecheck_sub_ptr_compatible)
     << LHSExpr->getType() << RHSExpr->getType() << LHSExpr->getSourceRange()
     << RHSExpr->getSourceRange();
 }
 
 // C99 6.5.6
 QualType Sema::CheckAdditionOperands(ExprResult &LHS, ExprResult &RHS,
                                      SourceLocation Loc, BinaryOperatorKind Opc,
                                      QualType* CompLHSTy) {
   checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/false);
 
   if (LHS.get()->getType()->isVectorType() ||
       RHS.get()->getType()->isVectorType()) {
     QualType compType = CheckVectorOperands(
         LHS, RHS, Loc, CompLHSTy,
         /*AllowBothBool*/getLangOpts().AltiVec,
         /*AllowBoolConversions*/getLangOpts().ZVector);
     if (CompLHSTy) *CompLHSTy = compType;
     return compType;
   }
 
   QualType compType = UsualArithmeticConversions(LHS, RHS, CompLHSTy);
   if (LHS.isInvalid() || RHS.isInvalid())
     return QualType();
 
   // Diagnose "string literal" '+' int and string '+' "char literal".
   if (Opc == BO_Add) {
     diagnoseStringPlusInt(*this, Loc, LHS.get(), RHS.get());
     diagnoseStringPlusChar(*this, Loc, LHS.get(), RHS.get());
   }
 
   // handle the common case first (both operands are arithmetic).
   if (!compType.isNull() && compType->isArithmeticType()) {
     if (CompLHSTy) *CompLHSTy = compType;
     return compType;
   }
 
   // Type-checking.  Ultimately the pointer's going to be in PExp;
   // note that we bias towards the LHS being the pointer.
   Expr *PExp = LHS.get(), *IExp = RHS.get();
 
   bool isObjCPointer;
   if (PExp->getType()->isPointerType()) {
     isObjCPointer = false;
   } else if (PExp->getType()->isObjCObjectPointerType()) {
     isObjCPointer = true;
   } else {
     std::swap(PExp, IExp);
     if (PExp->getType()->isPointerType()) {
       isObjCPointer = false;
     } else if (PExp->getType()->isObjCObjectPointerType()) {
       isObjCPointer = true;
     } else {
       return InvalidOperands(Loc, LHS, RHS);
     }
   }
   assert(PExp->getType()->isAnyPointerType());
 
   if (!IExp->getType()->isIntegerType())
     return InvalidOperands(Loc, LHS, RHS);
 
   // Adding to a null pointer results in undefined behavior.
   if (PExp->IgnoreParenCasts()->isNullPointerConstant(
           Context, Expr::NPC_ValueDependentIsNotNull)) {
     // In C++ adding zero to a null pointer is defined.
     llvm::APSInt KnownVal;
     if (!getLangOpts().CPlusPlus ||
         (!IExp->isValueDependent() && 
          (!IExp->EvaluateAsInt(KnownVal, Context) || KnownVal != 0))) {
       // Check the conditions to see if this is the 'p = nullptr + n' idiom.
       bool IsGNUIdiom = BinaryOperator::isNullPointerArithmeticExtension(
           Context, BO_Add, PExp, IExp);
       diagnoseArithmeticOnNullPointer(*this, Loc, PExp, IsGNUIdiom);
     }
   }
 
   if (!checkArithmeticOpPointerOperand(*this, Loc, PExp))
     return QualType();
 
   if (isObjCPointer && checkArithmeticOnObjCPointer(*this, Loc, PExp))
     return QualType();
 
   // Check array bounds for pointer arithemtic
   CheckArrayAccess(PExp, IExp);
 
   if (CompLHSTy) {
     QualType LHSTy = Context.isPromotableBitField(LHS.get());
     if (LHSTy.isNull()) {
       LHSTy = LHS.get()->getType();
       if (LHSTy->isPromotableIntegerType())
         LHSTy = Context.getPromotedIntegerType(LHSTy);
     }
     *CompLHSTy = LHSTy;
   }
 
   return PExp->getType();
 }
 
 // C99 6.5.6
 QualType Sema::CheckSubtractionOperands(ExprResult &LHS, ExprResult &RHS,
                                         SourceLocation Loc,
                                         QualType* CompLHSTy) {
   checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/false);
 
   if (LHS.get()->getType()->isVectorType() ||
       RHS.get()->getType()->isVectorType()) {
     QualType compType = CheckVectorOperands(
         LHS, RHS, Loc, CompLHSTy,
         /*AllowBothBool*/getLangOpts().AltiVec,
         /*AllowBoolConversions*/getLangOpts().ZVector);
     if (CompLHSTy) *CompLHSTy = compType;
     return compType;
   }
 
   QualType compType = UsualArithmeticConversions(LHS, RHS, CompLHSTy);
   if (LHS.isInvalid() || RHS.isInvalid())
     return QualType();
 
   // Enforce type constraints: C99 6.5.6p3.
 
   // Handle the common case first (both operands are arithmetic).
   if (!compType.isNull() && compType->isArithmeticType()) {
     if (CompLHSTy) *CompLHSTy = compType;
     return compType;
   }
 
   // Either ptr - int   or   ptr - ptr.
   if (LHS.get()->getType()->isAnyPointerType()) {
     QualType lpointee = LHS.get()->getType()->getPointeeType();
 
     // Diagnose bad cases where we step over interface counts.
     if (LHS.get()->getType()->isObjCObjectPointerType() &&
         checkArithmeticOnObjCPointer(*this, Loc, LHS.get()))
       return QualType();
 
     // The result type of a pointer-int computation is the pointer type.
     if (RHS.get()->getType()->isIntegerType()) {
       // Subtracting from a null pointer should produce a warning.
       // The last argument to the diagnose call says this doesn't match the
       // GNU int-to-pointer idiom.
       if (LHS.get()->IgnoreParenCasts()->isNullPointerConstant(Context,
                                            Expr::NPC_ValueDependentIsNotNull)) {
         // In C++ adding zero to a null pointer is defined.
         llvm::APSInt KnownVal;
         if (!getLangOpts().CPlusPlus || 
             (!RHS.get()->isValueDependent() &&
              (!RHS.get()->EvaluateAsInt(KnownVal, Context) || KnownVal != 0))) {
           diagnoseArithmeticOnNullPointer(*this, Loc, LHS.get(), false);
         }
       }
 
       if (!checkArithmeticOpPointerOperand(*this, Loc, LHS.get()))
         return QualType();
 
       // Check array bounds for pointer arithemtic
       CheckArrayAccess(LHS.get(), RHS.get(), /*ArraySubscriptExpr*/nullptr,
                        /*AllowOnePastEnd*/true, /*IndexNegated*/true);
 
       if (CompLHSTy) *CompLHSTy = LHS.get()->getType();
       return LHS.get()->getType();
     }
 
     // Handle pointer-pointer subtractions.
     if (const PointerType *RHSPTy
           = RHS.get()->getType()->getAs<PointerType>()) {
       QualType rpointee = RHSPTy->getPointeeType();
 
       if (getLangOpts().CPlusPlus) {
         // Pointee types must be the same: C++ [expr.add]
         if (!Context.hasSameUnqualifiedType(lpointee, rpointee)) {
           diagnosePointerIncompatibility(*this, Loc, LHS.get(), RHS.get());
         }
       } else {
         // Pointee types must be compatible C99 6.5.6p3
         if (!Context.typesAreCompatible(
                 Context.getCanonicalType(lpointee).getUnqualifiedType(),
                 Context.getCanonicalType(rpointee).getUnqualifiedType())) {
           diagnosePointerIncompatibility(*this, Loc, LHS.get(), RHS.get());
           return QualType();
         }
       }
 
       if (!checkArithmeticBinOpPointerOperands(*this, Loc,
                                                LHS.get(), RHS.get()))
         return QualType();
 
       // FIXME: Add warnings for nullptr - ptr.
 
       // The pointee type may have zero size.  As an extension, a structure or
       // union may have zero size or an array may have zero length.  In this
       // case subtraction does not make sense.
       if (!rpointee->isVoidType() && !rpointee->isFunctionType()) {
         CharUnits ElementSize = Context.getTypeSizeInChars(rpointee);
         if (ElementSize.isZero()) {
           Diag(Loc,diag::warn_sub_ptr_zero_size_types)
             << rpointee.getUnqualifiedType()
             << LHS.get()->getSourceRange() << RHS.get()->getSourceRange();
         }
       }
 
       if (CompLHSTy) *CompLHSTy = LHS.get()->getType();
       return Context.getPointerDiffType();
     }
   }
 
   return InvalidOperands(Loc, LHS, RHS);
 }
 
 static bool isScopedEnumerationType(QualType T) {
   if (const EnumType *ET = T->getAs<EnumType>())
     return ET->getDecl()->isScoped();
   return false;
 }
 
 static void DiagnoseBadShiftValues(Sema& S, ExprResult &LHS, ExprResult &RHS,
                                    SourceLocation Loc, BinaryOperatorKind Opc,
                                    QualType LHSType) {
   // OpenCL 6.3j: shift values are effectively % word size of LHS (more defined),
   // so skip remaining warnings as we don't want to modify values within Sema.
   if (S.getLangOpts().OpenCL)
     return;
 
   llvm::APSInt Right;
   // Check right/shifter operand
   if (RHS.get()->isValueDependent() ||
       !RHS.get()->EvaluateAsInt(Right, S.Context))
     return;
 
   if (Right.isNegative()) {
     S.DiagRuntimeBehavior(Loc, RHS.get(),
                           S.PDiag(diag::warn_shift_negative)
                             << RHS.get()->getSourceRange());
     return;
   }
   llvm::APInt LeftBits(Right.getBitWidth(),
                        S.Context.getTypeSize(LHS.get()->getType()));
   if (Right.uge(LeftBits)) {
     S.DiagRuntimeBehavior(Loc, RHS.get(),
                           S.PDiag(diag::warn_shift_gt_typewidth)
                             << RHS.get()->getSourceRange());
     return;
   }
   if (Opc != BO_Shl)
     return;
 
   // When left shifting an ICE which is signed, we can check for overflow which
   // according to C++ has undefined behavior ([expr.shift] 5.8/2). Unsigned
   // integers have defined behavior modulo one more than the maximum value
   // representable in the result type, so never warn for those.
   llvm::APSInt Left;
   if (LHS.get()->isValueDependent() ||
       LHSType->hasUnsignedIntegerRepresentation() ||
       !LHS.get()->EvaluateAsInt(Left, S.Context))
     return;
 
   // If LHS does not have a signed type and non-negative value
   // then, the behavior is undefined. Warn about it.
   if (Left.isNegative() && !S.getLangOpts().isSignedOverflowDefined()) {
     S.DiagRuntimeBehavior(Loc, LHS.get(),
                           S.PDiag(diag::warn_shift_lhs_negative)
                             << LHS.get()->getSourceRange());
     return;
   }
 
   llvm::APInt ResultBits =
       static_cast<llvm::APInt&>(Right) + Left.getMinSignedBits();
   if (LeftBits.uge(ResultBits))
     return;
   llvm::APSInt Result = Left.extend(ResultBits.getLimitedValue());
   Result = Result.shl(Right);
 
   // Print the bit representation of the signed integer as an unsigned
   // hexadecimal number.
   SmallString<40> HexResult;
   Result.toString(HexResult, 16, /*Signed =*/false, /*Literal =*/true);
 
   // If we are only missing a sign bit, this is less likely to result in actual
   // bugs -- if the result is cast back to an unsigned type, it will have the
   // expected value. Thus we place this behind a different warning that can be
   // turned off separately if needed.
   if (LeftBits == ResultBits - 1) {
     S.Diag(Loc, diag::warn_shift_result_sets_sign_bit)
         << HexResult << LHSType
         << LHS.get()->getSourceRange() << RHS.get()->getSourceRange();
     return;
   }
 
   S.Diag(Loc, diag::warn_shift_result_gt_typewidth)
     << HexResult.str() << Result.getMinSignedBits() << LHSType
     << Left.getBitWidth() << LHS.get()->getSourceRange()
     << RHS.get()->getSourceRange();
 }
 
 /// \brief Return the resulting type when a vector is shifted
 ///        by a scalar or vector shift amount.
 static QualType checkVectorShift(Sema &S, ExprResult &LHS, ExprResult &RHS,
                                  SourceLocation Loc, bool IsCompAssign) {
   // OpenCL v1.1 s6.3.j says RHS can be a vector only if LHS is a vector.
   if ((S.LangOpts.OpenCL || S.LangOpts.ZVector) &&
       !LHS.get()->getType()->isVectorType()) {
     S.Diag(Loc, diag::err_shift_rhs_only_vector)
       << RHS.get()->getType() << LHS.get()->getType()
       << LHS.get()->getSourceRange() << RHS.get()->getSourceRange();
     return QualType();
   }
 
   if (!IsCompAssign) {
     LHS = S.UsualUnaryConversions(LHS.get());
     if (LHS.isInvalid()) return QualType();
   }
 
   RHS = S.UsualUnaryConversions(RHS.get());
   if (RHS.isInvalid()) return QualType();
 
   QualType LHSType = LHS.get()->getType();
   // Note that LHS might be a scalar because the routine calls not only in
   // OpenCL case.
   const VectorType *LHSVecTy = LHSType->getAs<VectorType>();
   QualType LHSEleType = LHSVecTy ? LHSVecTy->getElementType() : LHSType;
 
   // Note that RHS might not be a vector.
   QualType RHSType = RHS.get()->getType();
   const VectorType *RHSVecTy = RHSType->getAs<VectorType>();
   QualType RHSEleType = RHSVecTy ? RHSVecTy->getElementType() : RHSType;
 
   // The operands need to be integers.
   if (!LHSEleType->isIntegerType()) {
     S.Diag(Loc, diag::err_typecheck_expect_int)
       << LHS.get()->getType() << LHS.get()->getSourceRange();
     return QualType();
   }
 
   if (!RHSEleType->isIntegerType()) {
     S.Diag(Loc, diag::err_typecheck_expect_int)
       << RHS.get()->getType() << RHS.get()->getSourceRange();
     return QualType();
   }
 
   if (!LHSVecTy) {
     assert(RHSVecTy);
     if (IsCompAssign)
       return RHSType;
     if (LHSEleType != RHSEleType) {
       LHS = S.ImpCastExprToType(LHS.get(),RHSEleType, CK_IntegralCast);
       LHSEleType = RHSEleType;
     }
     QualType VecTy =
         S.Context.getExtVectorType(LHSEleType, RHSVecTy->getNumElements());
     LHS = S.ImpCastExprToType(LHS.get(), VecTy, CK_VectorSplat);
     LHSType = VecTy;
   } else if (RHSVecTy) {
     // OpenCL v1.1 s6.3.j says that for vector types, the operators
     // are applied component-wise. So if RHS is a vector, then ensure
     // that the number of elements is the same as LHS...
     if (RHSVecTy->getNumElements() != LHSVecTy->getNumElements()) {
       S.Diag(Loc, diag::err_typecheck_vector_lengths_not_equal)
         << LHS.get()->getType() << RHS.get()->getType()
         << LHS.get()->getSourceRange() << RHS.get()->getSourceRange();
       return QualType();
     }
     if (!S.LangOpts.OpenCL && !S.LangOpts.ZVector) {
       const BuiltinType *LHSBT = LHSEleType->getAs<clang::BuiltinType>();
       const BuiltinType *RHSBT = RHSEleType->getAs<clang::BuiltinType>();
       if (LHSBT != RHSBT &&
           S.Context.getTypeSize(LHSBT) != S.Context.getTypeSize(RHSBT)) {
         S.Diag(Loc, diag::warn_typecheck_vector_element_sizes_not_equal)
             << LHS.get()->getType() << RHS.get()->getType()
             << LHS.get()->getSourceRange() << RHS.get()->getSourceRange();
       }
     }
   } else {
     // ...else expand RHS to match the number of elements in LHS.
     QualType VecTy =
       S.Context.getExtVectorType(RHSEleType, LHSVecTy->getNumElements());
     RHS = S.ImpCastExprToType(RHS.get(), VecTy, CK_VectorSplat);
   }
 
   return LHSType;
 }
 
 // C99 6.5.7
 QualType Sema::CheckShiftOperands(ExprResult &LHS, ExprResult &RHS,
                                   SourceLocation Loc, BinaryOperatorKind Opc,
                                   bool IsCompAssign) {
   checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/false);
 
   // Vector shifts promote their scalar inputs to vector type.
   if (LHS.get()->getType()->isVectorType() ||
       RHS.get()->getType()->isVectorType()) {
     if (LangOpts.ZVector) {
       // The shift operators for the z vector extensions work basically
       // like general shifts, except that neither the LHS nor the RHS is
       // allowed to be a "vector bool".
       if (auto LHSVecType = LHS.get()->getType()->getAs<VectorType>())
         if (LHSVecType->getVectorKind() == VectorType::AltiVecBool)
           return InvalidOperands(Loc, LHS, RHS);
       if (auto RHSVecType = RHS.get()->getType()->getAs<VectorType>())
         if (RHSVecType->getVectorKind() == VectorType::AltiVecBool)
           return InvalidOperands(Loc, LHS, RHS);
     }
     return checkVectorShift(*this, LHS, RHS, Loc, IsCompAssign);
   }
 
   // Shifts don't perform usual arithmetic conversions, they just do integer
   // promotions on each operand. C99 6.5.7p3
 
   // For the LHS, do usual unary conversions, but then reset them away
   // if this is a compound assignment.
   ExprResult OldLHS = LHS;
   LHS = UsualUnaryConversions(LHS.get());
   if (LHS.isInvalid())
     return QualType();
   QualType LHSType = LHS.get()->getType();
   if (IsCompAssign) LHS = OldLHS;
 
   // The RHS is simpler.
   RHS = UsualUnaryConversions(RHS.get());
   if (RHS.isInvalid())
     return QualType();
   QualType RHSType = RHS.get()->getType();
 
   // C99 6.5.7p2: Each of the operands shall have integer type.
   if (!LHSType->hasIntegerRepresentation() ||
       !RHSType->hasIntegerRepresentation())
     return InvalidOperands(Loc, LHS, RHS);
 
   // C++0x: Don't allow scoped enums. FIXME: Use something better than
   // hasIntegerRepresentation() above instead of this.
   if (isScopedEnumerationType(LHSType) ||
       isScopedEnumerationType(RHSType)) {
     return InvalidOperands(Loc, LHS, RHS);
   }
   // Sanity-check shift operands
   DiagnoseBadShiftValues(*this, LHS, RHS, Loc, Opc, LHSType);
 
   // "The type of the result is that of the promoted left operand."
   return LHSType;
 }
 
 static bool IsWithinTemplateSpecialization(Decl *D) {
   if (DeclContext *DC = D->getDeclContext()) {
     if (isa<ClassTemplateSpecializationDecl>(DC))
       return true;
     if (FunctionDecl *FD = dyn_cast<FunctionDecl>(DC))
       return FD->isFunctionTemplateSpecialization();
   }
   return false;
 }
 
 /// If two different enums are compared, raise a warning.
 static void checkEnumComparison(Sema &S, SourceLocation Loc, Expr *LHS,
                                 Expr *RHS) {
   QualType LHSStrippedType = LHS->IgnoreParenImpCasts()->getType();
   QualType RHSStrippedType = RHS->IgnoreParenImpCasts()->getType();
 
   const EnumType *LHSEnumType = LHSStrippedType->getAs<EnumType>();
   if (!LHSEnumType)
     return;
   const EnumType *RHSEnumType = RHSStrippedType->getAs<EnumType>();
   if (!RHSEnumType)
     return;
 
   // Ignore anonymous enums.
   if (!LHSEnumType->getDecl()->getIdentifier() &&
       !LHSEnumType->getDecl()->getTypedefNameForAnonDecl())
     return;
   if (!RHSEnumType->getDecl()->getIdentifier() &&
       !RHSEnumType->getDecl()->getTypedefNameForAnonDecl())
     return;
 
   if (S.Context.hasSameUnqualifiedType(LHSStrippedType, RHSStrippedType))
     return;
 
   S.Diag(Loc, diag::warn_comparison_of_mixed_enum_types)
       << LHSStrippedType << RHSStrippedType
       << LHS->getSourceRange() << RHS->getSourceRange();
 }
 
 /// \brief Diagnose bad pointer comparisons.
 static void diagnoseDistinctPointerComparison(Sema &S, SourceLocation Loc,
                                               ExprResult &LHS, ExprResult &RHS,
                                               bool IsError) {
   S.Diag(Loc, IsError ? diag::err_typecheck_comparison_of_distinct_pointers
                       : diag::ext_typecheck_comparison_of_distinct_pointers)
     << LHS.get()->getType() << RHS.get()->getType()
     << LHS.get()->getSourceRange() << RHS.get()->getSourceRange();
 }
 
 /// \brief Returns false if the pointers are converted to a composite type,
 /// true otherwise.
 static bool convertPointersToCompositeType(Sema &S, SourceLocation Loc,
                                            ExprResult &LHS, ExprResult &RHS) {
   // C++ [expr.rel]p2:
   //   [...] Pointer conversions (4.10) and qualification
   //   conversions (4.4) are performed on pointer operands (or on
   //   a pointer operand and a null pointer constant) to bring
   //   them to their composite pointer type. [...]
   //
   // C++ [expr.eq]p1 uses the same notion for (in)equality
   // comparisons of pointers.
 
   QualType LHSType = LHS.get()->getType();
   QualType RHSType = RHS.get()->getType();
   assert(LHSType->isPointerType() || RHSType->isPointerType() ||
          LHSType->isMemberPointerType() || RHSType->isMemberPointerType());
 
   QualType T = S.FindCompositePointerType(Loc, LHS, RHS);
   if (T.isNull()) {
     if ((LHSType->isPointerType() || LHSType->isMemberPointerType()) &&
         (RHSType->isPointerType() || RHSType->isMemberPointerType()))
       diagnoseDistinctPointerComparison(S, Loc, LHS, RHS, /*isError*/true);
     else
       S.InvalidOperands(Loc, LHS, RHS);
     return true;
   }
 
   LHS = S.ImpCastExprToType(LHS.get(), T, CK_BitCast);
   RHS = S.ImpCastExprToType(RHS.get(), T, CK_BitCast);
   return false;
 }
 
 static void diagnoseFunctionPointerToVoidComparison(Sema &S, SourceLocation Loc,
                                                     ExprResult &LHS,
                                                     ExprResult &RHS,
                                                     bool IsError) {
   S.Diag(Loc, IsError ? diag::err_typecheck_comparison_of_fptr_to_void
                       : diag::ext_typecheck_comparison_of_fptr_to_void)
     << LHS.get()->getType() << RHS.get()->getType()
     << LHS.get()->getSourceRange() << RHS.get()->getSourceRange();
 }
 
 static bool isObjCObjectLiteral(ExprResult &E) {
   switch (E.get()->IgnoreParenImpCasts()->getStmtClass()) {
   case Stmt::ObjCArrayLiteralClass:
   case Stmt::ObjCDictionaryLiteralClass:
   case Stmt::ObjCStringLiteralClass:
   case Stmt::ObjCBoxedExprClass:
     return true;
   default:
     // Note that ObjCBoolLiteral is NOT an object literal!
     return false;
   }
 }
 
 static bool hasIsEqualMethod(Sema &S, const Expr *LHS, const Expr *RHS) {
   const ObjCObjectPointerType *Type =
     LHS->getType()->getAs<ObjCObjectPointerType>();
 
   // If this is not actually an Objective-C object, bail out.
   if (!Type)
     return false;
 
   // Get the LHS object's interface type.
   QualType InterfaceType = Type->getPointeeType();
 
   // If the RHS isn't an Objective-C object, bail out.
   if (!RHS->getType()->isObjCObjectPointerType())
     return false;
 
   // Try to find the -isEqual: method.
   Selector IsEqualSel = S.NSAPIObj->getIsEqualSelector();
   ObjCMethodDecl *Method = S.LookupMethodInObjectType(IsEqualSel,
                                                       InterfaceType,
                                                       /*instance=*/true);
   if (!Method) {
     if (Type->isObjCIdType()) {
       // For 'id', just check the global pool.
       Method = S.LookupInstanceMethodInGlobalPool(IsEqualSel, SourceRange(),
                                                   /*receiverId=*/true);
     } else {
       // Check protocols.
       Method = S.LookupMethodInQualifiedType(IsEqualSel, Type,
                                              /*instance=*/true);
     }
   }
 
   if (!Method)
     return false;
 
   QualType T = Method->parameters()[0]->getType();
   if (!T->isObjCObjectPointerType())
     return false;
 
   QualType R = Method->getReturnType();
   if (!R->isScalarType())
     return false;
 
   return true;
 }
 
 Sema::ObjCLiteralKind Sema::CheckLiteralKind(Expr *FromE) {
   FromE = FromE->IgnoreParenImpCasts();
   switch (FromE->getStmtClass()) {
     default:
       break;
     case Stmt::ObjCStringLiteralClass:
       // "string literal"
       return LK_String;
     case Stmt::ObjCArrayLiteralClass:
       // "array literal"
       return LK_Array;
     case Stmt::ObjCDictionaryLiteralClass:
       // "dictionary literal"
       return LK_Dictionary;
     case Stmt::BlockExprClass:
       return LK_Block;
     case Stmt::ObjCBoxedExprClass: {
       Expr *Inner = cast<ObjCBoxedExpr>(FromE)->getSubExpr()->IgnoreParens();
       switch (Inner->getStmtClass()) {
         case Stmt::IntegerLiteralClass:
         case Stmt::FloatingLiteralClass:
         case Stmt::CharacterLiteralClass:
         case Stmt::ObjCBoolLiteralExprClass:
         case Stmt::CXXBoolLiteralExprClass:
           // "numeric literal"
           return LK_Numeric;
         case Stmt::ImplicitCastExprClass: {
           CastKind CK = cast<CastExpr>(Inner)->getCastKind();
           // Boolean literals can be represented by implicit casts.
           if (CK == CK_IntegralToBoolean || CK == CK_IntegralCast)
             return LK_Numeric;
           break;
         }
         default:
           break;
       }
       return LK_Boxed;
     }
   }
   return LK_None;
 }
 
 static void diagnoseObjCLiteralComparison(Sema &S, SourceLocation Loc,
                                           ExprResult &LHS, ExprResult &RHS,
                                           BinaryOperator::Opcode Opc){
   Expr *Literal;
   Expr *Other;
   if (isObjCObjectLiteral(LHS)) {
     Literal = LHS.get();
     Other = RHS.get();
   } else {
     Literal = RHS.get();
     Other = LHS.get();
   }
 
   // Don't warn on comparisons against nil.
   Other = Other->IgnoreParenCasts();
   if (Other->isNullPointerConstant(S.getASTContext(),
                                    Expr::NPC_ValueDependentIsNotNull))
     return;
 
   // This should be kept in sync with warn_objc_literal_comparison.
   // LK_String should always be after the other literals, since it has its own
   // warning flag.
   Sema::ObjCLiteralKind LiteralKind = S.CheckLiteralKind(Literal);
   assert(LiteralKind != Sema::LK_Block);
   if (LiteralKind == Sema::LK_None) {
     llvm_unreachable("Unknown Objective-C object literal kind");
   }
 
   if (LiteralKind == Sema::LK_String)
     S.Diag(Loc, diag::warn_objc_string_literal_comparison)
       << Literal->getSourceRange();
   else
     S.Diag(Loc, diag::warn_objc_literal_comparison)
       << LiteralKind << Literal->getSourceRange();
 
   if (BinaryOperator::isEqualityOp(Opc) &&
       hasIsEqualMethod(S, LHS.get(), RHS.get())) {
     SourceLocation Start = LHS.get()->getLocStart();
     SourceLocation End = S.getLocForEndOfToken(RHS.get()->getLocEnd());
     CharSourceRange OpRange =
       CharSourceRange::getCharRange(Loc, S.getLocForEndOfToken(Loc));
 
     S.Diag(Loc, diag::note_objc_literal_comparison_isequal)
       << FixItHint::CreateInsertion(Start, Opc == BO_EQ ? "[" : "![")
       << FixItHint::CreateReplacement(OpRange, " isEqual:")
       << FixItHint::CreateInsertion(End, "]");
   }
 }
 
 /// Warns on !x < y, !x & y where !(x < y), !(x & y) was probably intended.
 static void diagnoseLogicalNotOnLHSofCheck(Sema &S, ExprResult &LHS,
                                            ExprResult &RHS, SourceLocation Loc,
                                            BinaryOperatorKind Opc) {
   // Check that left hand side is !something.
   UnaryOperator *UO = dyn_cast<UnaryOperator>(LHS.get()->IgnoreImpCasts());
   if (!UO || UO->getOpcode() != UO_LNot) return;
 
   // Only check if the right hand side is non-bool arithmetic type.
   if (RHS.get()->isKnownToHaveBooleanValue()) return;
 
   // Make sure that the something in !something is not bool.
   Expr *SubExpr = UO->getSubExpr()->IgnoreImpCasts();
   if (SubExpr->isKnownToHaveBooleanValue()) return;
 
   // Emit warning.
   bool IsBitwiseOp = Opc == BO_And || Opc == BO_Or || Opc == BO_Xor;
   S.Diag(UO->getOperatorLoc(), diag::warn_logical_not_on_lhs_of_check)
       << Loc << IsBitwiseOp;
 
   // First note suggest !(x < y)
   SourceLocation FirstOpen = SubExpr->getLocStart();
   SourceLocation FirstClose = RHS.get()->getLocEnd();
   FirstClose = S.getLocForEndOfToken(FirstClose);
   if (FirstClose.isInvalid())
     FirstOpen = SourceLocation();
   S.Diag(UO->getOperatorLoc(), diag::note_logical_not_fix)
       << IsBitwiseOp
       << FixItHint::CreateInsertion(FirstOpen, "(")
       << FixItHint::CreateInsertion(FirstClose, ")");
 
   // Second note suggests (!x) < y
   SourceLocation SecondOpen = LHS.get()->getLocStart();
   SourceLocation SecondClose = LHS.get()->getLocEnd();
   SecondClose = S.getLocForEndOfToken(SecondClose);
   if (SecondClose.isInvalid())
     SecondOpen = SourceLocation();
   S.Diag(UO->getOperatorLoc(), diag::note_logical_not_silence_with_parens)
       << FixItHint::CreateInsertion(SecondOpen, "(")
       << FixItHint::CreateInsertion(SecondClose, ")");
 }
 
 // Get the decl for a simple expression: a reference to a variable,
 // an implicit C++ field reference, or an implicit ObjC ivar reference.
 static ValueDecl *getCompareDecl(Expr *E) {
   if (DeclRefExpr* DR = dyn_cast<DeclRefExpr>(E))
     return DR->getDecl();
   if (ObjCIvarRefExpr* Ivar = dyn_cast<ObjCIvarRefExpr>(E)) {
     if (Ivar->isFreeIvar())
       return Ivar->getDecl();
   }
   if (MemberExpr* Mem = dyn_cast<MemberExpr>(E)) {
     if (Mem->isImplicitAccess())
       return Mem->getMemberDecl();
   }
   return nullptr;
 }
 
 // C99 6.5.8, C++ [expr.rel]
 QualType Sema::CheckCompareOperands(ExprResult &LHS, ExprResult &RHS,
                                     SourceLocation Loc, BinaryOperatorKind Opc,
                                     bool IsRelational) {
   checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/true);
 
   // Handle vector comparisons separately.
   if (LHS.get()->getType()->isVectorType() ||
       RHS.get()->getType()->isVectorType())
     return CheckVectorCompareOperands(LHS, RHS, Loc, IsRelational);
 
   QualType LHSType = LHS.get()->getType();
   QualType RHSType = RHS.get()->getType();
 
   Expr *LHSStripped = LHS.get()->IgnoreParenImpCasts();
   Expr *RHSStripped = RHS.get()->IgnoreParenImpCasts();
 
   checkEnumComparison(*this, Loc, LHS.get(), RHS.get());
   diagnoseLogicalNotOnLHSofCheck(*this, LHS, RHS, Loc, Opc);
 
   if (!LHSType->hasFloatingRepresentation() &&
       !(LHSType->isBlockPointerType() && IsRelational) &&
       !LHS.get()->getLocStart().isMacroID() &&
       !RHS.get()->getLocStart().isMacroID() &&
       !inTemplateInstantiation()) {
     // For non-floating point types, check for self-comparisons of the form
     // x == x, x != x, x < x, etc.  These always evaluate to a constant, and
     // often indicate logic errors in the program.
     //
     // NOTE: Don't warn about comparison expressions resulting from macro
     // expansion. Also don't warn about comparisons which are only self
     // comparisons within a template specialization. The warnings should catch
     // obvious cases in the definition of the template anyways. The idea is to
     // warn when the typed comparison operator will always evaluate to the same
     // result.
     ValueDecl *DL = getCompareDecl(LHSStripped);
     ValueDecl *DR = getCompareDecl(RHSStripped);
     if (DL && DR && DL == DR && !IsWithinTemplateSpecialization(DL)) {
       DiagRuntimeBehavior(Loc, nullptr, PDiag(diag::warn_comparison_always)
                           << 0 // self-
                           << (Opc == BO_EQ
                               || Opc == BO_LE
                               || Opc == BO_GE));
     } else if (DL && DR && LHSType->isArrayType() && RHSType->isArrayType() &&
                !DL->getType()->isReferenceType() &&
                !DR->getType()->isReferenceType()) {
         // what is it always going to eval to?
         char always_evals_to;
         switch(Opc) {
         case BO_EQ: // e.g. array1 == array2
           always_evals_to = 0; // false
           break;
         case BO_NE: // e.g. array1 != array2
           always_evals_to = 1; // true
           break;
         default:
           // best we can say is 'a constant'
           always_evals_to = 2; // e.g. array1 <= array2
           break;
         }
         DiagRuntimeBehavior(Loc, nullptr, PDiag(diag::warn_comparison_always)
                             << 1 // array
                             << always_evals_to);
     }
 
     if (isa<CastExpr>(LHSStripped))
       LHSStripped = LHSStripped->IgnoreParenCasts();
     if (isa<CastExpr>(RHSStripped))
       RHSStripped = RHSStripped->IgnoreParenCasts();
 
     // Warn about comparisons against a string constant (unless the other
     // operand is null), the user probably wants strcmp.
     Expr *literalString = nullptr;
     Expr *literalStringStripped = nullptr;
     if ((isa<StringLiteral>(LHSStripped) || isa<ObjCEncodeExpr>(LHSStripped)) &&
         !RHSStripped->isNullPointerConstant(Context,
                                             Expr::NPC_ValueDependentIsNull)) {
       literalString = LHS.get();
       literalStringStripped = LHSStripped;
     } else if ((isa<StringLiteral>(RHSStripped) ||
                 isa<ObjCEncodeExpr>(RHSStripped)) &&
                !LHSStripped->isNullPointerConstant(Context,
                                             Expr::NPC_ValueDependentIsNull)) {
       literalString = RHS.get();
       literalStringStripped = RHSStripped;
     }
 
     if (literalString) {
       DiagRuntimeBehavior(Loc, nullptr,
         PDiag(diag::warn_stringcompare)
           << isa<ObjCEncodeExpr>(literalStringStripped)
           << literalString->getSourceRange());
     }
   }
 
   // C99 6.5.8p3 / C99 6.5.9p4
   UsualArithmeticConversions(LHS, RHS);
   if (LHS.isInvalid() || RHS.isInvalid())
     return QualType();
 
   LHSType = LHS.get()->getType();
   RHSType = RHS.get()->getType();
 
   // The result of comparisons is 'bool' in C++, 'int' in C.
   QualType ResultTy = Context.getLogicalOperationType();
 
   if (IsRelational) {
     if (LHSType->isRealType() && RHSType->isRealType())
       return ResultTy;
   } else {
     // Check for comparisons of floating point operands using != and ==.
     if (LHSType->hasFloatingRepresentation())
       CheckFloatComparison(Loc, LHS.get(), RHS.get());
 
     if (LHSType->isArithmeticType() && RHSType->isArithmeticType())
       return ResultTy;
   }
 
   const Expr::NullPointerConstantKind LHSNullKind =
       LHS.get()->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull);
   const Expr::NullPointerConstantKind RHSNullKind =
       RHS.get()->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull);
   bool LHSIsNull = LHSNullKind != Expr::NPCK_NotNull;
   bool RHSIsNull = RHSNullKind != Expr::NPCK_NotNull;
 
   if (!IsRelational && LHSIsNull != RHSIsNull) {
     bool IsEquality = Opc == BO_EQ;
     if (RHSIsNull)
       DiagnoseAlwaysNonNullPointer(LHS.get(), RHSNullKind, IsEquality,
                                    RHS.get()->getSourceRange());
     else
       DiagnoseAlwaysNonNullPointer(RHS.get(), LHSNullKind, IsEquality,
                                    LHS.get()->getSourceRange());
   }
 
   if ((LHSType->isIntegerType() && !LHSIsNull) ||
       (RHSType->isIntegerType() && !RHSIsNull)) {
     // Skip normal pointer conversion checks in this case; we have better
     // diagnostics for this below.
   } else if (getLangOpts().CPlusPlus) {
     // Equality comparison of a function pointer to a void pointer is invalid,
     // but we allow it as an extension.
     // FIXME: If we really want to allow this, should it be part of composite
     // pointer type computation so it works in conditionals too?
     if (!IsRelational &&
         ((LHSType->isFunctionPointerType() && RHSType->isVoidPointerType()) ||
          (RHSType->isFunctionPointerType() && LHSType->isVoidPointerType()))) {
       // This is a gcc extension compatibility comparison.
       // In a SFINAE context, we treat this as a hard error to maintain
       // conformance with the C++ standard.
       diagnoseFunctionPointerToVoidComparison(
           *this, Loc, LHS, RHS, /*isError*/ (bool)isSFINAEContext());
       
       if (isSFINAEContext())
         return QualType();
       
       RHS = ImpCastExprToType(RHS.get(), LHSType, CK_BitCast);
       return ResultTy;
     }
 
     // C++ [expr.eq]p2:
     //   If at least one operand is a pointer [...] bring them to their
     //   composite pointer type.
     // C++ [expr.rel]p2:
     //   If both operands are pointers, [...] bring them to their composite
     //   pointer type.
     if ((int)LHSType->isPointerType() + (int)RHSType->isPointerType() >=
             (IsRelational ? 2 : 1) &&
         (!LangOpts.ObjCAutoRefCount ||
          !(LHSType->isObjCObjectPointerType() ||
            RHSType->isObjCObjectPointerType()))) {
       if (convertPointersToCompositeType(*this, Loc, LHS, RHS))
         return QualType();
       else
         return ResultTy;
     }
   } else if (LHSType->isPointerType() &&
              RHSType->isPointerType()) { // C99 6.5.8p2
     // All of the following pointer-related warnings are GCC extensions, except
     // when handling null pointer constants.
     QualType LCanPointeeTy =
       LHSType->castAs<PointerType>()->getPointeeType().getCanonicalType();
     QualType RCanPointeeTy =
       RHSType->castAs<PointerType>()->getPointeeType().getCanonicalType();
 
     // C99 6.5.9p2 and C99 6.5.8p2
     if (Context.typesAreCompatible(LCanPointeeTy.getUnqualifiedType(),
                                    RCanPointeeTy.getUnqualifiedType())) {
       // Valid unless a relational comparison of function pointers
       if (IsRelational && LCanPointeeTy->isFunctionType()) {
         Diag(Loc, diag::ext_typecheck_ordered_comparison_of_function_pointers)
           << LHSType << RHSType << LHS.get()->getSourceRange()
           << RHS.get()->getSourceRange();
       }
     } else if (!IsRelational &&
                (LCanPointeeTy->isVoidType() || RCanPointeeTy->isVoidType())) {
       // Valid unless comparison between non-null pointer and function pointer
       if ((LCanPointeeTy->isFunctionType() || RCanPointeeTy->isFunctionType())
           && !LHSIsNull && !RHSIsNull)
         diagnoseFunctionPointerToVoidComparison(*this, Loc, LHS, RHS,
                                                 /*isError*/false);
     } else {
       // Invalid
       diagnoseDistinctPointerComparison(*this, Loc, LHS, RHS, /*isError*/false);
     }
     if (LCanPointeeTy != RCanPointeeTy) {
       // Treat NULL constant as a special case in OpenCL.
       if (getLangOpts().OpenCL && !LHSIsNull && !RHSIsNull) {
         const PointerType *LHSPtr = LHSType->getAs<PointerType>();
         if (!LHSPtr->isAddressSpaceOverlapping(*RHSType->getAs<PointerType>())) {
           Diag(Loc,
                diag::err_typecheck_op_on_nonoverlapping_address_space_pointers)
               << LHSType << RHSType << 0 /* comparison */
               << LHS.get()->getSourceRange() << RHS.get()->getSourceRange();
         }
       }
       LangAS AddrSpaceL = LCanPointeeTy.getAddressSpace();
       LangAS AddrSpaceR = RCanPointeeTy.getAddressSpace();
       CastKind Kind = AddrSpaceL != AddrSpaceR ? CK_AddressSpaceConversion
                                                : CK_BitCast;
       if (LHSIsNull && !RHSIsNull)
         LHS = ImpCastExprToType(LHS.get(), RHSType, Kind);
       else
         RHS = ImpCastExprToType(RHS.get(), LHSType, Kind);
     }
     return ResultTy;
   }
 
   if (getLangOpts().CPlusPlus) {
     // C++ [expr.eq]p4:
     //   Two operands of type std::nullptr_t or one operand of type
     //   std::nullptr_t and the other a null pointer constant compare equal.
     if (!IsRelational && LHSIsNull && RHSIsNull) {
       if (LHSType->isNullPtrType()) {
         RHS = ImpCastExprToType(RHS.get(), LHSType, CK_NullToPointer);
         return ResultTy;
       }
       if (RHSType->isNullPtrType()) {
         LHS = ImpCastExprToType(LHS.get(), RHSType, CK_NullToPointer);
         return ResultTy;
       }
     }
 
     // Comparison of Objective-C pointers and block pointers against nullptr_t.
     // These aren't covered by the composite pointer type rules.
     if (!IsRelational && RHSType->isNullPtrType() &&
         (LHSType->isObjCObjectPointerType() || LHSType->isBlockPointerType())) {
       RHS = ImpCastExprToType(RHS.get(), LHSType, CK_NullToPointer);
       return ResultTy;
     }
     if (!IsRelational && LHSType->isNullPtrType() &&
         (RHSType->isObjCObjectPointerType() || RHSType->isBlockPointerType())) {
       LHS = ImpCastExprToType(LHS.get(), RHSType, CK_NullToPointer);
       return ResultTy;
     }
 
     if (IsRelational &&
         ((LHSType->isNullPtrType() && RHSType->isPointerType()) ||
          (RHSType->isNullPtrType() && LHSType->isPointerType()))) {
       // HACK: Relational comparison of nullptr_t against a pointer type is
       // invalid per DR583, but we allow it within std::less<> and friends,
       // since otherwise common uses of it break.
       // FIXME: Consider removing this hack once LWG fixes std::less<> and
       // friends to have std::nullptr_t overload candidates.
       DeclContext *DC = CurContext;
       if (isa<FunctionDecl>(DC))
         DC = DC->getParent();
       if (auto *CTSD = dyn_cast<ClassTemplateSpecializationDecl>(DC)) {
         if (CTSD->isInStdNamespace() &&
             llvm::StringSwitch<bool>(CTSD->getName())
                 .Cases("less", "less_equal", "greater", "greater_equal", true)
                 .Default(false)) {
           if (RHSType->isNullPtrType())
             RHS = ImpCastExprToType(RHS.get(), LHSType, CK_NullToPointer);
           else
             LHS = ImpCastExprToType(LHS.get(), RHSType, CK_NullToPointer);
           return ResultTy;
         }
       }
     }
 
     // C++ [expr.eq]p2:
     //   If at least one operand is a pointer to member, [...] bring them to
     //   their composite pointer type.
     if (!IsRelational &&
         (LHSType->isMemberPointerType() || RHSType->isMemberPointerType())) {
       if (convertPointersToCompositeType(*this, Loc, LHS, RHS))
         return QualType();
       else
         return ResultTy;
     }
 
     // Handle scoped enumeration types specifically, since they don't promote
     // to integers.
     if (LHS.get()->getType()->isEnumeralType() &&
         Context.hasSameUnqualifiedType(LHS.get()->getType(),
                                        RHS.get()->getType()))
       return ResultTy;
   }
 
   // Handle block pointer types.
   if (!IsRelational && LHSType->isBlockPointerType() &&
       RHSType->isBlockPointerType()) {
     QualType lpointee = LHSType->castAs<BlockPointerType>()->getPointeeType();
     QualType rpointee = RHSType->castAs<BlockPointerType>()->getPointeeType();
 
     if (!LHSIsNull && !RHSIsNull &&
         !Context.typesAreCompatible(lpointee, rpointee)) {
       Diag(Loc, diag::err_typecheck_comparison_of_distinct_blocks)
         << LHSType << RHSType << LHS.get()->getSourceRange()
         << RHS.get()->getSourceRange();
     }
     RHS = ImpCastExprToType(RHS.get(), LHSType, CK_BitCast);
     return ResultTy;
   }
 
   // Allow block pointers to be compared with null pointer constants.
   if (!IsRelational
       && ((LHSType->isBlockPointerType() && RHSType->isPointerType())
           || (LHSType->isPointerType() && RHSType->isBlockPointerType()))) {
     if (!LHSIsNull && !RHSIsNull) {
       if (!((RHSType->isPointerType() && RHSType->castAs<PointerType>()
              ->getPointeeType()->isVoidType())
             || (LHSType->isPointerType() && LHSType->castAs<PointerType>()
                 ->getPointeeType()->isVoidType())))
         Diag(Loc, diag::err_typecheck_comparison_of_distinct_blocks)
           << LHSType << RHSType << LHS.get()->getSourceRange()
           << RHS.get()->getSourceRange();
     }
     if (LHSIsNull && !RHSIsNull)
       LHS = ImpCastExprToType(LHS.get(), RHSType,
                               RHSType->isPointerType() ? CK_BitCast
                                 : CK_AnyPointerToBlockPointerCast);
     else
       RHS = ImpCastExprToType(RHS.get(), LHSType,
                               LHSType->isPointerType() ? CK_BitCast
                                 : CK_AnyPointerToBlockPointerCast);
     return ResultTy;
   }
 
   if (LHSType->isObjCObjectPointerType() ||
       RHSType->isObjCObjectPointerType()) {
     const PointerType *LPT = LHSType->getAs<PointerType>();
     const PointerType *RPT = RHSType->getAs<PointerType>();
     if (LPT || RPT) {
       bool LPtrToVoid = LPT ? LPT->getPointeeType()->isVoidType() : false;
       bool RPtrToVoid = RPT ? RPT->getPointeeType()->isVoidType() : false;
 
       if (!LPtrToVoid && !RPtrToVoid &&
           !Context.typesAreCompatible(LHSType, RHSType)) {
         diagnoseDistinctPointerComparison(*this, Loc, LHS, RHS,
                                           /*isError*/false);
       }
       if (LHSIsNull && !RHSIsNull) {
         Expr *E = LHS.get();
         if (getLangOpts().ObjCAutoRefCount)
           CheckObjCConversion(SourceRange(), RHSType, E,
                               CCK_ImplicitConversion);
         LHS = ImpCastExprToType(E, RHSType,
                                 RPT ? CK_BitCast :CK_CPointerToObjCPointerCast);
       }
       else {
         Expr *E = RHS.get();
         if (getLangOpts().ObjCAutoRefCount)
           CheckObjCConversion(SourceRange(), LHSType, E, CCK_ImplicitConversion,
                               /*Diagnose=*/true,
                               /*DiagnoseCFAudited=*/false, Opc);
         RHS = ImpCastExprToType(E, LHSType,
                                 LPT ? CK_BitCast :CK_CPointerToObjCPointerCast);
       }
       return ResultTy;
     }
     if (LHSType->isObjCObjectPointerType() &&
         RHSType->isObjCObjectPointerType()) {
       if (!Context.areComparableObjCPointerTypes(LHSType, RHSType))
         diagnoseDistinctPointerComparison(*this, Loc, LHS, RHS,
                                           /*isError*/false);
       if (isObjCObjectLiteral(LHS) || isObjCObjectLiteral(RHS))
         diagnoseObjCLiteralComparison(*this, Loc, LHS, RHS, Opc);
 
       if (LHSIsNull && !RHSIsNull)
         LHS = ImpCastExprToType(LHS.get(), RHSType, CK_BitCast);
       else
         RHS = ImpCastExprToType(RHS.get(), LHSType, CK_BitCast);
       return ResultTy;
     }
   }
   if ((LHSType->isAnyPointerType() && RHSType->isIntegerType()) ||
       (LHSType->isIntegerType() && RHSType->isAnyPointerType())) {
     unsigned DiagID = 0;
     bool isError = false;
     if (LangOpts.DebuggerSupport) {
       // Under a debugger, allow the comparison of pointers to integers,
       // since users tend to want to compare addresses.
     } else if ((LHSIsNull && LHSType->isIntegerType()) ||
                (RHSIsNull && RHSType->isIntegerType())) {
       if (IsRelational) {
         isError = getLangOpts().CPlusPlus;
         DiagID =
           isError ? diag::err_typecheck_ordered_comparison_of_pointer_and_zero
                   : diag::ext_typecheck_ordered_comparison_of_pointer_and_zero;
       }
     } else if (getLangOpts().CPlusPlus) {
       DiagID = diag::err_typecheck_comparison_of_pointer_integer;
       isError = true;
     } else if (IsRelational)
       DiagID = diag::ext_typecheck_ordered_comparison_of_pointer_integer;
     else
       DiagID = diag::ext_typecheck_comparison_of_pointer_integer;
 
     if (DiagID) {
       Diag(Loc, DiagID)
         << LHSType << RHSType << LHS.get()->getSourceRange()
         << RHS.get()->getSourceRange();
       if (isError)
         return QualType();
     }
     
     if (LHSType->isIntegerType())
       LHS = ImpCastExprToType(LHS.get(), RHSType,
                         LHSIsNull ? CK_NullToPointer : CK_IntegralToPointer);
     else
       RHS = ImpCastExprToType(RHS.get(), LHSType,
                         RHSIsNull ? CK_NullToPointer : CK_IntegralToPointer);
     return ResultTy;
   }
   
   // Handle block pointers.
   if (!IsRelational && RHSIsNull
       && LHSType->isBlockPointerType() && RHSType->isIntegerType()) {
     RHS = ImpCastExprToType(RHS.get(), LHSType, CK_NullToPointer);
     return ResultTy;
   }
   if (!IsRelational && LHSIsNull
       && LHSType->isIntegerType() && RHSType->isBlockPointerType()) {
     LHS = ImpCastExprToType(LHS.get(), RHSType, CK_NullToPointer);
     return ResultTy;
   }
 
   if (getLangOpts().OpenCLVersion >= 200) {
     if (LHSIsNull && RHSType->isQueueT()) {
       LHS = ImpCastExprToType(LHS.get(), RHSType, CK_NullToPointer);
       return ResultTy;
     }
 
     if (LHSType->isQueueT() && RHSIsNull) {
       RHS = ImpCastExprToType(RHS.get(), LHSType, CK_NullToPointer);
       return ResultTy;
     }
   }
 
   return InvalidOperands(Loc, LHS, RHS);
 }
 
 // Return a signed ext_vector_type that is of identical size and number of
 // elements. For floating point vectors, return an integer type of identical
 // size and number of elements. In the non ext_vector_type case, search from
 // the largest type to the smallest type to avoid cases where long long == long,
 // where long gets picked over long long.
 QualType Sema::GetSignedVectorType(QualType V) {
   const VectorType *VTy = V->getAs<VectorType>();
   unsigned TypeSize = Context.getTypeSize(VTy->getElementType());
 
   if (isa<ExtVectorType>(VTy)) {
     if (TypeSize == Context.getTypeSize(Context.CharTy))
       return Context.getExtVectorType(Context.CharTy, VTy->getNumElements());
     else if (TypeSize == Context.getTypeSize(Context.ShortTy))
       return Context.getExtVectorType(Context.ShortTy, VTy->getNumElements());
     else if (TypeSize == Context.getTypeSize(Context.IntTy))
       return Context.getExtVectorType(Context.IntTy, VTy->getNumElements());
     else if (TypeSize == Context.getTypeSize(Context.LongTy))
       return Context.getExtVectorType(Context.LongTy, VTy->getNumElements());
     assert(TypeSize == Context.getTypeSize(Context.LongLongTy) &&
            "Unhandled vector element size in vector compare");
     return Context.getExtVectorType(Context.LongLongTy, VTy->getNumElements());
   }
 
   if (TypeSize == Context.getTypeSize(Context.LongLongTy))
     return Context.getVectorType(Context.LongLongTy, VTy->getNumElements(),
                                  VectorType::GenericVector);
   else if (TypeSize == Context.getTypeSize(Context.LongTy))
     return Context.getVectorType(Context.LongTy, VTy->getNumElements(),
                                  VectorType::GenericVector);
   else if (TypeSize == Context.getTypeSize(Context.IntTy))
     return Context.getVectorType(Context.IntTy, VTy->getNumElements(),
                                  VectorType::GenericVector);
   else if (TypeSize == Context.getTypeSize(Context.ShortTy))
     return Context.getVectorType(Context.ShortTy, VTy->getNumElements(),
                                  VectorType::GenericVector);
   assert(TypeSize == Context.getTypeSize(Context.CharTy) &&
          "Unhandled vector element size in vector compare");
   return Context.getVectorType(Context.CharTy, VTy->getNumElements(),
                                VectorType::GenericVector);
 }
 
 /// CheckVectorCompareOperands - vector comparisons are a clang extension that
 /// operates on extended vector types.  Instead of producing an IntTy result,
 /// like a scalar comparison, a vector comparison produces a vector of integer
 /// types.
 QualType Sema::CheckVectorCompareOperands(ExprResult &LHS, ExprResult &RHS,
                                           SourceLocation Loc,
                                           bool IsRelational) {
   // Check to make sure we're operating on vectors of the same type and width,
   // Allowing one side to be a scalar of element type.
   QualType vType = CheckVectorOperands(LHS, RHS, Loc, /*isCompAssign*/false,
                               /*AllowBothBool*/true,
                               /*AllowBoolConversions*/getLangOpts().ZVector);
   if (vType.isNull())
     return vType;
 
   QualType LHSType = LHS.get()->getType();
 
   // If AltiVec, the comparison results in a numeric type, i.e.
   // bool for C++, int for C
   if (getLangOpts().AltiVec &&
       vType->getAs<VectorType>()->getVectorKind() == VectorType::AltiVecVector)
     return Context.getLogicalOperationType();
 
   // For non-floating point types, check for self-comparisons of the form
   // x == x, x != x, x < x, etc.  These always evaluate to a constant, and
   // often indicate logic errors in the program.
   if (!LHSType->hasFloatingRepresentation() && !inTemplateInstantiation()) {
     if (DeclRefExpr* DRL
           = dyn_cast<DeclRefExpr>(LHS.get()->IgnoreParenImpCasts()))
       if (DeclRefExpr* DRR
             = dyn_cast<DeclRefExpr>(RHS.get()->IgnoreParenImpCasts()))
         if (DRL->getDecl() == DRR->getDecl())
           DiagRuntimeBehavior(Loc, nullptr,
                               PDiag(diag::warn_comparison_always)
                                 << 0 // self-
                                 << 2 // "a constant"
                               );
   }
 
   // Check for comparisons of floating point operands using != and ==.
   if (!IsRelational && LHSType->hasFloatingRepresentation()) {
     assert (RHS.get()->getType()->hasFloatingRepresentation());
     CheckFloatComparison(Loc, LHS.get(), RHS.get());
   }
 
   // Return a signed type for the vector.
   return GetSignedVectorType(vType);
 }
 
 QualType Sema::CheckVectorLogicalOperands(ExprResult &LHS, ExprResult &RHS,
                                           SourceLocation Loc) {
   // Ensure that either both operands are of the same vector type, or
   // one operand is of a vector type and the other is of its element type.
   QualType vType = CheckVectorOperands(LHS, RHS, Loc, false,
                                        /*AllowBothBool*/true,
                                        /*AllowBoolConversions*/false);
   if (vType.isNull())
     return InvalidOperands(Loc, LHS, RHS);
   if (getLangOpts().OpenCL && getLangOpts().OpenCLVersion < 120 &&
       vType->hasFloatingRepresentation())
     return InvalidOperands(Loc, LHS, RHS);
   // FIXME: The check for C++ here is for GCC compatibility. GCC rejects the
   //        usage of the logical operators && and || with vectors in C. This
   //        check could be notionally dropped.
   if (!getLangOpts().CPlusPlus &&
       !(isa<ExtVectorType>(vType->getAs<VectorType>())))
     return InvalidLogicalVectorOperands(Loc, LHS, RHS);
 
   return GetSignedVectorType(LHS.get()->getType());
 }
 
 inline QualType Sema::CheckBitwiseOperands(ExprResult &LHS, ExprResult &RHS,
                                            SourceLocation Loc,
                                            BinaryOperatorKind Opc) {
   checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/false);
 
   bool IsCompAssign =
       Opc == BO_AndAssign || Opc == BO_OrAssign || Opc == BO_XorAssign;
 
   if (LHS.get()->getType()->isVectorType() ||
       RHS.get()->getType()->isVectorType()) {
     if (LHS.get()->getType()->hasIntegerRepresentation() &&
         RHS.get()->getType()->hasIntegerRepresentation())
       return CheckVectorOperands(LHS, RHS, Loc, IsCompAssign,
                         /*AllowBothBool*/true,
                         /*AllowBoolConversions*/getLangOpts().ZVector);
     return InvalidOperands(Loc, LHS, RHS);
   }
 
   if (Opc == BO_And)
     diagnoseLogicalNotOnLHSofCheck(*this, LHS, RHS, Loc, Opc);
 
   ExprResult LHSResult = LHS, RHSResult = RHS;
   QualType compType = UsualArithmeticConversions(LHSResult, RHSResult,
                                                  IsCompAssign);
   if (LHSResult.isInvalid() || RHSResult.isInvalid())
     return QualType();
   LHS = LHSResult.get();
   RHS = RHSResult.get();
 
   if (!compType.isNull() && compType->isIntegralOrUnscopedEnumerationType())
     return compType;
   return InvalidOperands(Loc, LHS, RHS);
 }
 
 // C99 6.5.[13,14]
 inline QualType Sema::CheckLogicalOperands(ExprResult &LHS, ExprResult &RHS,
                                            SourceLocation Loc,
                                            BinaryOperatorKind Opc) {
   // Check vector operands differently.
   if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType())
     return CheckVectorLogicalOperands(LHS, RHS, Loc);
   
   // Diagnose cases where the user write a logical and/or but probably meant a
   // bitwise one.  We do this when the LHS is a non-bool integer and the RHS
   // is a constant.
   if (LHS.get()->getType()->isIntegerType() &&
       !LHS.get()->getType()->isBooleanType() &&
       RHS.get()->getType()->isIntegerType() && !RHS.get()->isValueDependent() &&
       // Don't warn in macros or template instantiations.
       !Loc.isMacroID() && !inTemplateInstantiation()) {
     // If the RHS can be constant folded, and if it constant folds to something
     // that isn't 0 or 1 (which indicate a potential logical operation that
     // happened to fold to true/false) then warn.
     // Parens on the RHS are ignored.
     llvm::APSInt Result;
     if (RHS.get()->EvaluateAsInt(Result, Context))
       if ((getLangOpts().Bool && !RHS.get()->getType()->isBooleanType() &&
            !RHS.get()->getExprLoc().isMacroID()) ||
           (Result != 0 && Result != 1)) {
         Diag(Loc, diag::warn_logical_instead_of_bitwise)
           << RHS.get()->getSourceRange()
           << (Opc == BO_LAnd ? "&&" : "||");
         // Suggest replacing the logical operator with the bitwise version
         Diag(Loc, diag::note_logical_instead_of_bitwise_change_operator)
             << (Opc == BO_LAnd ? "&" : "|")
             << FixItHint::CreateReplacement(SourceRange(
                                                  Loc, getLocForEndOfToken(Loc)),
                                             Opc == BO_LAnd ? "&" : "|");
         if (Opc == BO_LAnd)
           // Suggest replacing "Foo() && kNonZero" with "Foo()"
           Diag(Loc, diag::note_logical_instead_of_bitwise_remove_constant)
               << FixItHint::CreateRemoval(
                   SourceRange(getLocForEndOfToken(LHS.get()->getLocEnd()),
                               RHS.get()->getLocEnd()));
       }
   }
 
   if (!Context.getLangOpts().CPlusPlus) {
     // OpenCL v1.1 s6.3.g: The logical operators and (&&), or (||) do
     // not operate on the built-in scalar and vector float types.
     if (Context.getLangOpts().OpenCL &&
         Context.getLangOpts().OpenCLVersion < 120) {
       if (LHS.get()->getType()->isFloatingType() ||
           RHS.get()->getType()->isFloatingType())
         return InvalidOperands(Loc, LHS, RHS);
     }
 
     LHS = UsualUnaryConversions(LHS.get());
     if (LHS.isInvalid())
       return QualType();
 
     RHS = UsualUnaryConversions(RHS.get());
     if (RHS.isInvalid())
       return QualType();
 
     if (!LHS.get()->getType()->isScalarType() ||
         !RHS.get()->getType()->isScalarType())
       return InvalidOperands(Loc, LHS, RHS);
 
     return Context.IntTy;
   }
 
   // The following is safe because we only use this method for
   // non-overloadable operands.
 
   // C++ [expr.log.and]p1
   // C++ [expr.log.or]p1
   // The operands are both contextually converted to type bool.
   ExprResult LHSRes = PerformContextuallyConvertToBool(LHS.get());
   if (LHSRes.isInvalid())
     return InvalidOperands(Loc, LHS, RHS);
   LHS = LHSRes;
 
   ExprResult RHSRes = PerformContextuallyConvertToBool(RHS.get());
   if (RHSRes.isInvalid())
     return InvalidOperands(Loc, LHS, RHS);
   RHS = RHSRes;
 
   // C++ [expr.log.and]p2
   // C++ [expr.log.or]p2
   // The result is a bool.
   return Context.BoolTy;
 }
 
 static bool IsReadonlyMessage(Expr *E, Sema &S) {
   const MemberExpr *ME = dyn_cast<MemberExpr>(E);
   if (!ME) return false;
   if (!isa<FieldDecl>(ME->getMemberDecl())) return false;
   ObjCMessageExpr *Base = dyn_cast<ObjCMessageExpr>(
       ME->getBase()->IgnoreImplicit()->IgnoreParenImpCasts());
   if (!Base) return false;
   return Base->getMethodDecl() != nullptr;
 }
 
 /// Is the given expression (which must be 'const') a reference to a
 /// variable which was originally non-const, but which has become
 /// 'const' due to being captured within a block?
 enum NonConstCaptureKind { NCCK_None, NCCK_Block, NCCK_Lambda };
 static NonConstCaptureKind isReferenceToNonConstCapture(Sema &S, Expr *E) {
   assert(E->isLValue() && E->getType().isConstQualified());
   E = E->IgnoreParens();
 
   // Must be a reference to a declaration from an enclosing scope.
   DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E);
   if (!DRE) return NCCK_None;
   if (!DRE->refersToEnclosingVariableOrCapture()) return NCCK_None;
 
   // The declaration must be a variable which is not declared 'const'.
   VarDecl *var = dyn_cast<VarDecl>(DRE->getDecl());
   if (!var) return NCCK_None;
   if (var->getType().isConstQualified()) return NCCK_None;
   assert(var->hasLocalStorage() && "capture added 'const' to non-local?");
 
   // Decide whether the first capture was for a block or a lambda.
   DeclContext *DC = S.CurContext, *Prev = nullptr;
   // Decide whether the first capture was for a block or a lambda.
   while (DC) {
     // For init-capture, it is possible that the variable belongs to the
     // template pattern of the current context.
     if (auto *FD = dyn_cast<FunctionDecl>(DC))
       if (var->isInitCapture() &&
           FD->getTemplateInstantiationPattern() == var->getDeclContext())
         break;
     if (DC == var->getDeclContext())
       break;
     Prev = DC;
     DC = DC->getParent();
   }
   // Unless we have an init-capture, we've gone one step too far.
   if (!var->isInitCapture())
     DC = Prev;
   return (isa<BlockDecl>(DC) ? NCCK_Block : NCCK_Lambda);
 }
 
 static bool IsTypeModifiable(QualType Ty, bool IsDereference) {
   Ty = Ty.getNonReferenceType();
   if (IsDereference && Ty->isPointerType())
     Ty = Ty->getPointeeType();
   return !Ty.isConstQualified();
 }
 
 // Update err_typecheck_assign_const and note_typecheck_assign_const
 // when this enum is changed.
 enum {
   ConstFunction,
   ConstVariable,
   ConstMember,
   ConstMethod,
   NestedConstMember,
   ConstUnknown,  // Keep as last element
 };
 
 /// Emit the "read-only variable not assignable" error and print notes to give
 /// more information about why the variable is not assignable, such as pointing
 /// to the declaration of a const variable, showing that a method is const, or
 /// that the function is returning a const reference.
 static void DiagnoseConstAssignment(Sema &S, const Expr *E,
                                     SourceLocation Loc) {
   SourceRange ExprRange = E->getSourceRange();
 
   // Only emit one error on the first const found.  All other consts will emit
   // a note to the error.
   bool DiagnosticEmitted = false;
 
   // Track if the current expression is the result of a dereference, and if the
   // next checked expression is the result of a dereference.
   bool IsDereference = false;
   bool NextIsDereference = false;
 
   // Loop to process MemberExpr chains.
   while (true) {
     IsDereference = NextIsDereference;
 
     E = E->IgnoreImplicit()->IgnoreParenImpCasts();
     if (const MemberExpr *ME = dyn_cast<MemberExpr>(E)) {
       NextIsDereference = ME->isArrow();
       const ValueDecl *VD = ME->getMemberDecl();
       if (const FieldDecl *Field = dyn_cast<FieldDecl>(VD)) {
         // Mutable fields can be modified even if the class is const.
         if (Field->isMutable()) {
           assert(DiagnosticEmitted && "Expected diagnostic not emitted.");
           break;
         }
 
         if (!IsTypeModifiable(Field->getType(), IsDereference)) {
           if (!DiagnosticEmitted) {
             S.Diag(Loc, diag::err_typecheck_assign_const)
                 << ExprRange << ConstMember << false /*static*/ << Field
                 << Field->getType();
             DiagnosticEmitted = true;
           }
           S.Diag(VD->getLocation(), diag::note_typecheck_assign_const)
               << ConstMember << false /*static*/ << Field << Field->getType()
               << Field->getSourceRange();
         }
         E = ME->getBase();
         continue;
       } else if (const VarDecl *VDecl = dyn_cast<VarDecl>(VD)) {
         if (VDecl->getType().isConstQualified()) {
           if (!DiagnosticEmitted) {
             S.Diag(Loc, diag::err_typecheck_assign_const)
                 << ExprRange << ConstMember << true /*static*/ << VDecl
                 << VDecl->getType();
             DiagnosticEmitted = true;
           }
           S.Diag(VD->getLocation(), diag::note_typecheck_assign_const)
               << ConstMember << true /*static*/ << VDecl << VDecl->getType()
               << VDecl->getSourceRange();
         }
         // Static fields do not inherit constness from parents.
         break;
       }
       break;
     } // End MemberExpr
     break;
   }
 
   if (const CallExpr *CE = dyn_cast<CallExpr>(E)) {
     // Function calls
     const FunctionDecl *FD = CE->getDirectCallee();
     if (FD && !IsTypeModifiable(FD->getReturnType(), IsDereference)) {
       if (!DiagnosticEmitted) {
         S.Diag(Loc, diag::err_typecheck_assign_const) << ExprRange
                                                       << ConstFunction << FD;
         DiagnosticEmitted = true;
       }
       S.Diag(FD->getReturnTypeSourceRange().getBegin(),
              diag::note_typecheck_assign_const)
           << ConstFunction << FD << FD->getReturnType()
           << FD->getReturnTypeSourceRange();
     }
   } else if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E)) {
     // Point to variable declaration.
     if (const ValueDecl *VD = DRE->getDecl()) {
       if (!IsTypeModifiable(VD->getType(), IsDereference)) {
         if (!DiagnosticEmitted) {
           S.Diag(Loc, diag::err_typecheck_assign_const)
               << ExprRange << ConstVariable << VD << VD->getType();
           DiagnosticEmitted = true;
         }
         S.Diag(VD->getLocation(), diag::note_typecheck_assign_const)
             << ConstVariable << VD << VD->getType() << VD->getSourceRange();
       }
     }
   } else if (isa<CXXThisExpr>(E)) {
     if (const DeclContext *DC = S.getFunctionLevelDeclContext()) {
       if (const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(DC)) {
         if (MD->isConst()) {
           if (!DiagnosticEmitted) {
             S.Diag(Loc, diag::err_typecheck_assign_const) << ExprRange
                                                           << ConstMethod << MD;
             DiagnosticEmitted = true;
           }
           S.Diag(MD->getLocation(), diag::note_typecheck_assign_const)
               << ConstMethod << MD << MD->getSourceRange();
         }
       }
     }
   }
 
   if (DiagnosticEmitted)
     return;
 
   // Can't determine a more specific message, so display the generic error.
   S.Diag(Loc, diag::err_typecheck_assign_const) << ExprRange << ConstUnknown;
 }
 
 enum OriginalExprKind {
   OEK_Variable,
   OEK_Member,
   OEK_LValue
 };
 
 static void DiagnoseRecursiveConstFields(Sema &S, const ValueDecl *VD,
                                          const RecordType *Ty,
                                          SourceLocation Loc, SourceRange Range,
                                          OriginalExprKind OEK,
                                          bool &DiagnosticEmitted,
                                          bool IsNested = false) {
   // We walk the record hierarchy breadth-first to ensure that we print
   // diagnostics in field nesting order.
   // First, check every field for constness.
   for (const FieldDecl *Field : Ty->getDecl()->fields()) {
     if (Field->getType().isConstQualified()) {
       if (!DiagnosticEmitted) {
         S.Diag(Loc, diag::err_typecheck_assign_const)
             << Range << NestedConstMember << OEK << VD
             << IsNested << Field;
         DiagnosticEmitted = true;
       }
       S.Diag(Field->getLocation(), diag::note_typecheck_assign_const)
           << NestedConstMember << IsNested << Field
           << Field->getType() << Field->getSourceRange();
     }
   }
   // Then, recurse.
   for (const FieldDecl *Field : Ty->getDecl()->fields()) {
     QualType FTy = Field->getType();
     if (const RecordType *FieldRecTy = FTy->getAs<RecordType>())
       DiagnoseRecursiveConstFields(S, VD, FieldRecTy, Loc, Range,
                                    OEK, DiagnosticEmitted, true);
   }
 }
 
 /// Emit an error for the case where a record we are trying to assign to has a
 /// const-qualified field somewhere in its hierarchy.
 static void DiagnoseRecursiveConstFields(Sema &S, const Expr *E,
                                          SourceLocation Loc) {
   QualType Ty = E->getType();
   assert(Ty->isRecordType() && "lvalue was not record?");
   SourceRange Range = E->getSourceRange();
   const RecordType *RTy = Ty.getCanonicalType()->getAs<RecordType>();
   bool DiagEmitted = false;
 
   if (const MemberExpr *ME = dyn_cast<MemberExpr>(E))
     DiagnoseRecursiveConstFields(S, ME->getMemberDecl(), RTy, Loc,
             Range, OEK_Member, DiagEmitted);
   else if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E))
     DiagnoseRecursiveConstFields(S, DRE->getDecl(), RTy, Loc,
             Range, OEK_Variable, DiagEmitted);
   else
     DiagnoseRecursiveConstFields(S, nullptr, RTy, Loc,
             Range, OEK_LValue, DiagEmitted);
   if (!DiagEmitted)
     DiagnoseConstAssignment(S, E, Loc);
 }
 
 /// CheckForModifiableLvalue - Verify that E is a modifiable lvalue.  If not,
 /// emit an error and return true.  If so, return false.
 static bool CheckForModifiableLvalue(Expr *E, SourceLocation Loc, Sema &S) {
   assert(!E->hasPlaceholderType(BuiltinType::PseudoObject));
 
   S.CheckShadowingDeclModification(E, Loc);
 
   SourceLocation OrigLoc = Loc;
   Expr::isModifiableLvalueResult IsLV = E->isModifiableLvalue(S.Context,
                                                               &Loc);
   if (IsLV == Expr::MLV_ClassTemporary && IsReadonlyMessage(E, S))
     IsLV = Expr::MLV_InvalidMessageExpression;
   if (IsLV == Expr::MLV_Valid)
     return false;
 
   unsigned DiagID = 0;
   bool NeedType = false;
   switch (IsLV) { // C99 6.5.16p2
   case Expr::MLV_ConstQualified:
     // Use a specialized diagnostic when we're assigning to an object
     // from an enclosing function or block.
     if (NonConstCaptureKind NCCK = isReferenceToNonConstCapture(S, E)) {
       if (NCCK == NCCK_Block)
         DiagID = diag::err_block_decl_ref_not_modifiable_lvalue;
       else
         DiagID = diag::err_lambda_decl_ref_not_modifiable_lvalue;
       break;
     }
 
     // In ARC, use some specialized diagnostics for occasions where we
     // infer 'const'.  These are always pseudo-strong variables.
     if (S.getLangOpts().ObjCAutoRefCount) {
       DeclRefExpr *declRef = dyn_cast<DeclRefExpr>(E->IgnoreParenCasts());
       if (declRef && isa<VarDecl>(declRef->getDecl())) {
         VarDecl *var = cast<VarDecl>(declRef->getDecl());
 
         // Use the normal diagnostic if it's pseudo-__strong but the
         // user actually wrote 'const'.
         if (var->isARCPseudoStrong() &&
             (!var->getTypeSourceInfo() ||
              !var->getTypeSourceInfo()->getType().isConstQualified())) {
           // There are two pseudo-strong cases:
           //  - self
           ObjCMethodDecl *method = S.getCurMethodDecl();
           if (method && var == method->getSelfDecl())
             DiagID = method->isClassMethod()
               ? diag::err_typecheck_arc_assign_self_class_method
               : diag::err_typecheck_arc_assign_self;
 
           //  - fast enumeration variables
           else
             DiagID = diag::err_typecheck_arr_assign_enumeration;
 
           SourceRange Assign;
           if (Loc != OrigLoc)
             Assign = SourceRange(OrigLoc, OrigLoc);
           S.Diag(Loc, DiagID) << E->getSourceRange() << Assign;
           // We need to preserve the AST regardless, so migration tool
           // can do its job.
           return false;
         }
       }
     }
 
     // If none of the special cases above are triggered, then this is a
     // simple const assignment.
     if (DiagID == 0) {
       DiagnoseConstAssignment(S, E, Loc);
       return true;
     }
 
     break;
   case Expr::MLV_ConstAddrSpace:
     DiagnoseConstAssignment(S, E, Loc);
     return true;
   case Expr::MLV_ConstQualifiedField:
     DiagnoseRecursiveConstFields(S, E, Loc);
     return true;
   case Expr::MLV_ArrayType:
   case Expr::MLV_ArrayTemporary:
     DiagID = diag::err_typecheck_array_not_modifiable_lvalue;
     NeedType = true;
     break;
   case Expr::MLV_NotObjectType:
     DiagID = diag::err_typecheck_non_object_not_modifiable_lvalue;
     NeedType = true;
     break;
   case Expr::MLV_LValueCast:
     DiagID = diag::err_typecheck_lvalue_casts_not_supported;
     break;
   case Expr::MLV_Valid:
     llvm_unreachable("did not take early return for MLV_Valid");
   case Expr::MLV_InvalidExpression:
   case Expr::MLV_MemberFunction:
   case Expr::MLV_ClassTemporary:
     DiagID = diag::err_typecheck_expression_not_modifiable_lvalue;
     break;
   case Expr::MLV_IncompleteType:
   case Expr::MLV_IncompleteVoidType:
     return S.RequireCompleteType(Loc, E->getType(),
              diag::err_typecheck_incomplete_type_not_modifiable_lvalue, E);
   case Expr::MLV_DuplicateVectorComponents:
     DiagID = diag::err_typecheck_duplicate_vector_components_not_mlvalue;
     break;
   case Expr::MLV_NoSetterProperty:
     llvm_unreachable("readonly properties should be processed differently");
   case Expr::MLV_InvalidMessageExpression:
     DiagID = diag::err_readonly_message_assignment;
     break;
   case Expr::MLV_SubObjCPropertySetting:
     DiagID = diag::err_no_subobject_property_setting;
     break;
   }
 
   SourceRange Assign;
   if (Loc != OrigLoc)
     Assign = SourceRange(OrigLoc, OrigLoc);
   if (NeedType)
     S.Diag(Loc, DiagID) << E->getType() << E->getSourceRange() << Assign;
   else
     S.Diag(Loc, DiagID) << E->getSourceRange() << Assign;
   return true;
 }
 
 static void CheckIdentityFieldAssignment(Expr *LHSExpr, Expr *RHSExpr,
                                          SourceLocation Loc,
                                          Sema &Sema) {
   // C / C++ fields
   MemberExpr *ML = dyn_cast<MemberExpr>(LHSExpr);
   MemberExpr *MR = dyn_cast<MemberExpr>(RHSExpr);
   if (ML && MR && ML->getMemberDecl() == MR->getMemberDecl()) {
     if (isa<CXXThisExpr>(ML->getBase()) && isa<CXXThisExpr>(MR->getBase()))
       Sema.Diag(Loc, diag::warn_identity_field_assign) << 0;
   }
 
   // Objective-C instance variables
   ObjCIvarRefExpr *OL = dyn_cast<ObjCIvarRefExpr>(LHSExpr);
   ObjCIvarRefExpr *OR = dyn_cast<ObjCIvarRefExpr>(RHSExpr);
   if (OL && OR && OL->getDecl() == OR->getDecl()) {
     DeclRefExpr *RL = dyn_cast<DeclRefExpr>(OL->getBase()->IgnoreImpCasts());
     DeclRefExpr *RR = dyn_cast<DeclRefExpr>(OR->getBase()->IgnoreImpCasts());
     if (RL && RR && RL->getDecl() == RR->getDecl())
       Sema.Diag(Loc, diag::warn_identity_field_assign) << 1;
   }
 }
 
 // C99 6.5.16.1
 QualType Sema::CheckAssignmentOperands(Expr *LHSExpr, ExprResult &RHS,
                                        SourceLocation Loc,
                                        QualType CompoundType) {
   assert(!LHSExpr->hasPlaceholderType(BuiltinType::PseudoObject));
 
   // Verify that LHS is a modifiable lvalue, and emit error if not.
   if (CheckForModifiableLvalue(LHSExpr, Loc, *this))
     return QualType();
 
   QualType LHSType = LHSExpr->getType();
   QualType RHSType = CompoundType.isNull() ? RHS.get()->getType() :
                                              CompoundType;
   // OpenCL v1.2 s6.1.1.1 p2:
   // The half data type can only be used to declare a pointer to a buffer that
   // contains half values
   if (getLangOpts().OpenCL && !getOpenCLOptions().isEnabled("cl_khr_fp16") &&
     LHSType->isHalfType()) {
     Diag(Loc, diag::err_opencl_half_load_store) << 1
         << LHSType.getUnqualifiedType();
     return QualType();
   }
     
   AssignConvertType ConvTy;
   if (CompoundType.isNull()) {
     Expr *RHSCheck = RHS.get();
 
     CheckIdentityFieldAssignment(LHSExpr, RHSCheck, Loc, *this);
 
     QualType LHSTy(LHSType);
     ConvTy = CheckSingleAssignmentConstraints(LHSTy, RHS);
     if (RHS.isInvalid())
       return QualType();
     // Special case of NSObject attributes on c-style pointer types.
     if (ConvTy == IncompatiblePointer &&
         ((Context.isObjCNSObjectType(LHSType) &&
           RHSType->isObjCObjectPointerType()) ||
          (Context.isObjCNSObjectType(RHSType) &&
           LHSType->isObjCObjectPointerType())))
       ConvTy = Compatible;
 
     if (ConvTy == Compatible &&
         LHSType->isObjCObjectType())
         Diag(Loc, diag::err_objc_object_assignment)
           << LHSType;
 
     // If the RHS is a unary plus or minus, check to see if they = and + are
     // right next to each other.  If so, the user may have typo'd "x =+ 4"
     // instead of "x += 4".
     if (ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(RHSCheck))
       RHSCheck = ICE->getSubExpr();
     if (UnaryOperator *UO = dyn_cast<UnaryOperator>(RHSCheck)) {
       if ((UO->getOpcode() == UO_Plus ||
            UO->getOpcode() == UO_Minus) &&
           Loc.isFileID() && UO->getOperatorLoc().isFileID() &&
           // Only if the two operators are exactly adjacent.
           Loc.getLocWithOffset(1) == UO->getOperatorLoc() &&
           // And there is a space or other character before the subexpr of the
           // unary +/-.  We don't want to warn on "x=-1".
           Loc.getLocWithOffset(2) != UO->getSubExpr()->getLocStart() &&
           UO->getSubExpr()->getLocStart().isFileID()) {
         Diag(Loc, diag::warn_not_compound_assign)
           << (UO->getOpcode() == UO_Plus ? "+" : "-")
           << SourceRange(UO->getOperatorLoc(), UO->getOperatorLoc());
       }
     }
 
     if (ConvTy == Compatible) {
       if (LHSType.getObjCLifetime() == Qualifiers::OCL_Strong) {
         // Warn about retain cycles where a block captures the LHS, but
         // not if the LHS is a simple variable into which the block is
         // being stored...unless that variable can be captured by reference!
         const Expr *InnerLHS = LHSExpr->IgnoreParenCasts();
         const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(InnerLHS);
         if (!DRE || DRE->getDecl()->hasAttr<BlocksAttr>())
           checkRetainCycles(LHSExpr, RHS.get());
       }
 
       if (LHSType.getObjCLifetime() == Qualifiers::OCL_Strong ||
           LHSType.isNonWeakInMRRWithObjCWeak(Context)) {
         // It is safe to assign a weak reference into a strong variable.
         // Although this code can still have problems:
         //   id x = self.weakProp;
         //   id y = self.weakProp;
         // we do not warn to warn spuriously when 'x' and 'y' are on separate
         // paths through the function. This should be revisited if
         // -Wrepeated-use-of-weak is made flow-sensitive.
         // For ObjCWeak only, we do not warn if the assign is to a non-weak
         // variable, which will be valid for the current autorelease scope.
         if (!Diags.isIgnored(diag::warn_arc_repeated_use_of_weak,
                              RHS.get()->getLocStart()))
           getCurFunction()->markSafeWeakUse(RHS.get());
 
       } else if (getLangOpts().ObjCAutoRefCount || getLangOpts().ObjCWeak) {
         checkUnsafeExprAssigns(Loc, LHSExpr, RHS.get());
       }
     }
   } else {
     // Compound assignment "x += y"
     ConvTy = CheckAssignmentConstraints(Loc, LHSType, RHSType);
   }
 
   if (DiagnoseAssignmentResult(ConvTy, Loc, LHSType, RHSType,
                                RHS.get(), AA_Assigning))
     return QualType();
 
   CheckForNullPointerDereference(*this, LHSExpr);
 
   // C99 6.5.16p3: The type of an assignment expression is the type of the
   // left operand unless the left operand has qualified type, in which case
   // it is the unqualified version of the type of the left operand.
   // C99 6.5.16.1p2: In simple assignment, the value of the right operand
   // is converted to the type of the assignment expression (above).
   // C++ 5.17p1: the type of the assignment expression is that of its left
   // operand.
   return (getLangOpts().CPlusPlus
           ? LHSType : LHSType.getUnqualifiedType());
 }
 
 // Only ignore explicit casts to void.
 static bool IgnoreCommaOperand(const Expr *E) {
   E = E->IgnoreParens();
 
   if (const CastExpr *CE = dyn_cast<CastExpr>(E)) {
     if (CE->getCastKind() == CK_ToVoid) {
       return true;
     }
   }
 
   return false;
 }
 
 // Look for instances where it is likely the comma operator is confused with
 // another operator.  There is a whitelist of acceptable expressions for the
 // left hand side of the comma operator, otherwise emit a warning.
 void Sema::DiagnoseCommaOperator(const Expr *LHS, SourceLocation Loc) {
   // No warnings in macros
   if (Loc.isMacroID())
     return;
 
   // Don't warn in template instantiations.
   if (inTemplateInstantiation())
     return;
 
   // Scope isn't fine-grained enough to whitelist the specific cases, so
   // instead, skip more than needed, then call back into here with the
   // CommaVisitor in SemaStmt.cpp.
   // The whitelisted locations are the initialization and increment portions
   // of a for loop.  The additional checks are on the condition of
   // if statements, do/while loops, and for loops.
   const unsigned ForIncrementFlags =
       Scope::ControlScope | Scope::ContinueScope | Scope::BreakScope;
   const unsigned ForInitFlags = Scope::ControlScope | Scope::DeclScope;
   const unsigned ScopeFlags = getCurScope()->getFlags();
   if ((ScopeFlags & ForIncrementFlags) == ForIncrementFlags ||
       (ScopeFlags & ForInitFlags) == ForInitFlags)
     return;
 
   // If there are multiple comma operators used together, get the RHS of the
   // of the comma operator as the LHS.
   while (const BinaryOperator *BO = dyn_cast<BinaryOperator>(LHS)) {
     if (BO->getOpcode() != BO_Comma)
       break;
     LHS = BO->getRHS();
   }
 
   // Only allow some expressions on LHS to not warn.
   if (IgnoreCommaOperand(LHS))
     return;
 
   Diag(Loc, diag::warn_comma_operator);
   Diag(LHS->getLocStart(), diag::note_cast_to_void)
       << LHS->getSourceRange()
       << FixItHint::CreateInsertion(LHS->getLocStart(),
                                     LangOpts.CPlusPlus ? "static_cast<void>("
                                                        : "(void)(")
       << FixItHint::CreateInsertion(PP.getLocForEndOfToken(LHS->getLocEnd()),
                                     ")");
 }
 
 // C99 6.5.17
 static QualType CheckCommaOperands(Sema &S, ExprResult &LHS, ExprResult &RHS,
                                    SourceLocation Loc) {
   LHS = S.CheckPlaceholderExpr(LHS.get());
   RHS = S.CheckPlaceholderExpr(RHS.get());
   if (LHS.isInvalid() || RHS.isInvalid())
     return QualType();
 
   // C's comma performs lvalue conversion (C99 6.3.2.1) on both its
   // operands, but not unary promotions.
   // C++'s comma does not do any conversions at all (C++ [expr.comma]p1).
 
   // So we treat the LHS as a ignored value, and in C++ we allow the
   // containing site to determine what should be done with the RHS.
   LHS = S.IgnoredValueConversions(LHS.get());
   if (LHS.isInvalid())
     return QualType();
 
   S.DiagnoseUnusedExprResult(LHS.get());
 
   if (!S.getLangOpts().CPlusPlus) {
     RHS = S.DefaultFunctionArrayLvalueConversion(RHS.get());
     if (RHS.isInvalid())
       return QualType();
     if (!RHS.get()->getType()->isVoidType())
       S.RequireCompleteType(Loc, RHS.get()->getType(),
                             diag::err_incomplete_type);
   }
 
   if (!S.getDiagnostics().isIgnored(diag::warn_comma_operator, Loc))
     S.DiagnoseCommaOperator(LHS.get(), Loc);
 
   return RHS.get()->getType();
 }
 
 /// CheckIncrementDecrementOperand - unlike most "Check" methods, this routine
 /// doesn't need to call UsualUnaryConversions or UsualArithmeticConversions.
 static QualType CheckIncrementDecrementOperand(Sema &S, Expr *Op,
                                                ExprValueKind &VK,
                                                ExprObjectKind &OK,
                                                SourceLocation OpLoc,
                                                bool IsInc, bool IsPrefix) {
   if (Op->isTypeDependent())
     return S.Context.DependentTy;
 
   QualType ResType = Op->getType();
   // Atomic types can be used for increment / decrement where the non-atomic
   // versions can, so ignore the _Atomic() specifier for the purpose of
   // checking.
   if (const AtomicType *ResAtomicType = ResType->getAs<AtomicType>())
     ResType = ResAtomicType->getValueType();
 
   assert(!ResType.isNull() && "no type for increment/decrement expression");
 
   if (S.getLangOpts().CPlusPlus && ResType->isBooleanType()) {
     // Decrement of bool is not allowed.
     if (!IsInc) {
       S.Diag(OpLoc, diag::err_decrement_bool) << Op->getSourceRange();
       return QualType();
     }
     // Increment of bool sets it to true, but is deprecated.
     S.Diag(OpLoc, S.getLangOpts().CPlusPlus17 ? diag::ext_increment_bool
                                               : diag::warn_increment_bool)
       << Op->getSourceRange();
   } else if (S.getLangOpts().CPlusPlus && ResType->isEnumeralType()) {
     // Error on enum increments and decrements in C++ mode
     S.Diag(OpLoc, diag::err_increment_decrement_enum) << IsInc << ResType;
     return QualType();
   } else if (ResType->isRealType()) {
     // OK!
   } else if (ResType->isPointerType()) {
     // C99 6.5.2.4p2, 6.5.6p2
     if (!checkArithmeticOpPointerOperand(S, OpLoc, Op))
       return QualType();
   } else if (ResType->isObjCObjectPointerType()) {
     // On modern runtimes, ObjC pointer arithmetic is forbidden.
     // Otherwise, we just need a complete type.
     if (checkArithmeticIncompletePointerType(S, OpLoc, Op) ||
         checkArithmeticOnObjCPointer(S, OpLoc, Op))
       return QualType();    
   } else if (ResType->isAnyComplexType()) {
     // C99 does not support ++/-- on complex types, we allow as an extension.
     S.Diag(OpLoc, diag::ext_integer_increment_complex)
       << ResType << Op->getSourceRange();
   } else if (ResType->isPlaceholderType()) {
     ExprResult PR = S.CheckPlaceholderExpr(Op);
     if (PR.isInvalid()) return QualType();
     return CheckIncrementDecrementOperand(S, PR.get(), VK, OK, OpLoc,
                                           IsInc, IsPrefix);
   } else if (S.getLangOpts().AltiVec && ResType->isVectorType()) {
     // OK! ( C/C++ Language Extensions for CBEA(Version 2.6) 10.3 )
   } else if (S.getLangOpts().ZVector && ResType->isVectorType() &&
              (ResType->getAs<VectorType>()->getVectorKind() !=
               VectorType::AltiVecBool)) {
     // The z vector extensions allow ++ and -- for non-bool vectors.
   } else if(S.getLangOpts().OpenCL && ResType->isVectorType() &&
             ResType->getAs<VectorType>()->getElementType()->isIntegerType()) {
     // OpenCL V1.2 6.3 says dec/inc ops operate on integer vector types.
   } else {
     S.Diag(OpLoc, diag::err_typecheck_illegal_increment_decrement)
       << ResType << int(IsInc) << Op->getSourceRange();
     return QualType();
   }
   // At this point, we know we have a real, complex or pointer type.
   // Now make sure the operand is a modifiable lvalue.
   if (CheckForModifiableLvalue(Op, OpLoc, S))
     return QualType();
   // In C++, a prefix increment is the same type as the operand. Otherwise
   // (in C or with postfix), the increment is the unqualified type of the
   // operand.
   if (IsPrefix && S.getLangOpts().CPlusPlus) {
     VK = VK_LValue;
     OK = Op->getObjectKind();
     return ResType;
   } else {
     VK = VK_RValue;
     return ResType.getUnqualifiedType();
   }
 }
   
 
 /// getPrimaryDecl - Helper function for CheckAddressOfOperand().
 /// This routine allows us to typecheck complex/recursive expressions
 /// where the declaration is needed for type checking. We only need to
 /// handle cases when the expression references a function designator
 /// or is an lvalue. Here are some examples:
 ///  - &(x) => x
 ///  - &*****f => f for f a function designator.
 ///  - &s.xx => s
 ///  - &s.zz[1].yy -> s, if zz is an array
 ///  - *(x + 1) -> x, if x is an array
 ///  - &"123"[2] -> 0
 ///  - & __real__ x -> x
 static ValueDecl *getPrimaryDecl(Expr *E) {
   switch (E->getStmtClass()) {
   case Stmt::DeclRefExprClass:
     return cast<DeclRefExpr>(E)->getDecl();
   case Stmt::MemberExprClass:
     // If this is an arrow operator, the address is an offset from
     // the base's value, so the object the base refers to is
     // irrelevant.
     if (cast<MemberExpr>(E)->isArrow())
       return nullptr;
     // Otherwise, the expression refers to a part of the base
     return getPrimaryDecl(cast<MemberExpr>(E)->getBase());
   case Stmt::ArraySubscriptExprClass: {
     // FIXME: This code shouldn't be necessary!  We should catch the implicit
     // promotion of register arrays earlier.
     Expr* Base = cast<ArraySubscriptExpr>(E)->getBase();
     if (ImplicitCastExpr* ICE = dyn_cast<ImplicitCastExpr>(Base)) {
       if (ICE->getSubExpr()->getType()->isArrayType())
         return getPrimaryDecl(ICE->getSubExpr());
     }
     return nullptr;
   }
   case Stmt::UnaryOperatorClass: {
     UnaryOperator *UO = cast<UnaryOperator>(E);
 
     switch(UO->getOpcode()) {
     case UO_Real:
     case UO_Imag:
     case UO_Extension:
       return getPrimaryDecl(UO->getSubExpr());
     default:
       return nullptr;
     }
   }
   case Stmt::ParenExprClass:
     return getPrimaryDecl(cast<ParenExpr>(E)->getSubExpr());
   case Stmt::ImplicitCastExprClass:
     // If the result of an implicit cast is an l-value, we care about
     // the sub-expression; otherwise, the result here doesn't matter.
     return getPrimaryDecl(cast<ImplicitCastExpr>(E)->getSubExpr());
   default:
     return nullptr;
   }
 }
 
 namespace {
   enum {
     AO_Bit_Field = 0,
     AO_Vector_Element = 1,
     AO_Property_Expansion = 2,
     AO_Register_Variable = 3,
     AO_No_Error = 4
   };
 }
 /// \brief Diagnose invalid operand for address of operations.
 ///
 /// \param Type The type of operand which cannot have its address taken.
 static void diagnoseAddressOfInvalidType(Sema &S, SourceLocation Loc,
                                          Expr *E, unsigned Type) {
   S.Diag(Loc, diag::err_typecheck_address_of) << Type << E->getSourceRange();
 }
 
 /// CheckAddressOfOperand - The operand of & must be either a function
 /// designator or an lvalue designating an object. If it is an lvalue, the
 /// object cannot be declared with storage class register or be a bit field.
 /// Note: The usual conversions are *not* applied to the operand of the &
 /// operator (C99 6.3.2.1p[2-4]), and its result is never an lvalue.
 /// In C++, the operand might be an overloaded function name, in which case
 /// we allow the '&' but retain the overloaded-function type.
 QualType Sema::CheckAddressOfOperand(ExprResult &OrigOp, SourceLocation OpLoc) {
   if (const BuiltinType *PTy = OrigOp.get()->getType()->getAsPlaceholderType()){
     if (PTy->getKind() == BuiltinType::Overload) {
       Expr *E = OrigOp.get()->IgnoreParens();
       if (!isa<OverloadExpr>(E)) {
         assert(cast<UnaryOperator>(E)->getOpcode() == UO_AddrOf);
         Diag(OpLoc, diag::err_typecheck_invalid_lvalue_addrof_addrof_function)
           << OrigOp.get()->getSourceRange();
         return QualType();
       }
 
       OverloadExpr *Ovl = cast<OverloadExpr>(E);
       if (isa<UnresolvedMemberExpr>(Ovl))
         if (!ResolveSingleFunctionTemplateSpecialization(Ovl)) {
           Diag(OpLoc, diag::err_invalid_form_pointer_member_function)
             << OrigOp.get()->getSourceRange();
           return QualType();
         }
 
       return Context.OverloadTy;
     }
 
     if (PTy->getKind() == BuiltinType::UnknownAny)
       return Context.UnknownAnyTy;
 
     if (PTy->getKind() == BuiltinType::BoundMember) {
       Diag(OpLoc, diag::err_invalid_form_pointer_member_function)
         << OrigOp.get()->getSourceRange();
       return QualType();
     }
 
     OrigOp = CheckPlaceholderExpr(OrigOp.get());
     if (OrigOp.isInvalid()) return QualType();
   }
 
   if (OrigOp.get()->isTypeDependent())
     return Context.DependentTy;
 
   assert(!OrigOp.get()->getType()->isPlaceholderType());
 
   // Make sure to ignore parentheses in subsequent checks
   Expr *op = OrigOp.get()->IgnoreParens();
 
   // In OpenCL captures for blocks called as lambda functions
   // are located in the private address space. Blocks used in
   // enqueue_kernel can be located in a different address space
   // depending on a vendor implementation. Thus preventing
   // taking an address of the capture to avoid invalid AS casts.
   if (LangOpts.OpenCL) {
     auto* VarRef = dyn_cast<DeclRefExpr>(op);
     if (VarRef && VarRef->refersToEnclosingVariableOrCapture()) {
       Diag(op->getExprLoc(), diag::err_opencl_taking_address_capture);
       return QualType();
     }
   }
 
   if (getLangOpts().C99) {
     // Implement C99-only parts of addressof rules.
     if (UnaryOperator* uOp = dyn_cast<UnaryOperator>(op)) {
       if (uOp->getOpcode() == UO_Deref)
         // Per C99 6.5.3.2, the address of a deref always returns a valid result
         // (assuming the deref expression is valid).
         return uOp->getSubExpr()->getType();
     }
     // Technically, there should be a check for array subscript
     // expressions here, but the result of one is always an lvalue anyway.
   }
   ValueDecl *dcl = getPrimaryDecl(op);
 
   if (auto *FD = dyn_cast_or_null<FunctionDecl>(dcl))
     if (!checkAddressOfFunctionIsAvailable(FD, /*Complain=*/true,
                                            op->getLocStart()))
       return QualType();
 
   Expr::LValueClassification lval = op->ClassifyLValue(Context);
   unsigned AddressOfError = AO_No_Error;
 
   if (lval == Expr::LV_ClassTemporary || lval == Expr::LV_ArrayTemporary) { 
     bool sfinae = (bool)isSFINAEContext();
     Diag(OpLoc, isSFINAEContext() ? diag::err_typecheck_addrof_temporary
                                   : diag::ext_typecheck_addrof_temporary)
       << op->getType() << op->getSourceRange();
     if (sfinae)
       return QualType();
     // Materialize the temporary as an lvalue so that we can take its address.
     OrigOp = op =
         CreateMaterializeTemporaryExpr(op->getType(), OrigOp.get(), true);
   } else if (isa<ObjCSelectorExpr>(op)) {
     return Context.getPointerType(op->getType());
   } else if (lval == Expr::LV_MemberFunction) {
     // If it's an instance method, make a member pointer.
     // The expression must have exactly the form &A::foo.
 
     // If the underlying expression isn't a decl ref, give up.
     if (!isa<DeclRefExpr>(op)) {
       Diag(OpLoc, diag::err_invalid_form_pointer_member_function)
         << OrigOp.get()->getSourceRange();
       return QualType();
     }
     DeclRefExpr *DRE = cast<DeclRefExpr>(op);
     CXXMethodDecl *MD = cast<CXXMethodDecl>(DRE->getDecl());
 
     // The id-expression was parenthesized.
     if (OrigOp.get() != DRE) {
       Diag(OpLoc, diag::err_parens_pointer_member_function)
         << OrigOp.get()->getSourceRange();
 
     // The method was named without a qualifier.
     } else if (!DRE->getQualifier()) {
       if (MD->getParent()->getName().empty())
         Diag(OpLoc, diag::err_unqualified_pointer_member_function)
           << op->getSourceRange();
       else {
         SmallString<32> Str;
         StringRef Qual = (MD->getParent()->getName() + "::").toStringRef(Str);
         Diag(OpLoc, diag::err_unqualified_pointer_member_function)
           << op->getSourceRange()
           << FixItHint::CreateInsertion(op->getSourceRange().getBegin(), Qual);
       }
     }
 
     // Taking the address of a dtor is illegal per C++ [class.dtor]p2.
     if (isa<CXXDestructorDecl>(MD))
       Diag(OpLoc, diag::err_typecheck_addrof_dtor) << op->getSourceRange();
 
     QualType MPTy = Context.getMemberPointerType(
         op->getType(), Context.getTypeDeclType(MD->getParent()).getTypePtr());
     // Under the MS ABI, lock down the inheritance model now.
     if (Context.getTargetInfo().getCXXABI().isMicrosoft())
       (void)isCompleteType(OpLoc, MPTy);
     return MPTy;
   } else if (lval != Expr::LV_Valid && lval != Expr::LV_IncompleteVoidType) {
     // C99 6.5.3.2p1
     // The operand must be either an l-value or a function designator
     if (!op->getType()->isFunctionType()) {
       // Use a special diagnostic for loads from property references.
       if (isa<PseudoObjectExpr>(op)) {
         AddressOfError = AO_Property_Expansion;
       } else {
         Diag(OpLoc, diag::err_typecheck_invalid_lvalue_addrof)
           << op->getType() << op->getSourceRange();
         return QualType();
       }
     }
   } else if (op->getObjectKind() == OK_BitField) { // C99 6.5.3.2p1
     // The operand cannot be a bit-field
     AddressOfError = AO_Bit_Field;
   } else if (op->getObjectKind() == OK_VectorComponent) {
     // The operand cannot be an element of a vector
     AddressOfError = AO_Vector_Element;
   } else if (dcl) { // C99 6.5.3.2p1
     // We have an lvalue with a decl. Make sure the decl is not declared
     // with the register storage-class specifier.
     if (const VarDecl *vd = dyn_cast<VarDecl>(dcl)) {
       // in C++ it is not error to take address of a register
       // variable (c++03 7.1.1P3)
       if (vd->getStorageClass() == SC_Register &&
           !getLangOpts().CPlusPlus) {
         AddressOfError = AO_Register_Variable;
       }
     } else if (isa<MSPropertyDecl>(dcl)) {
       AddressOfError = AO_Property_Expansion;
     } else if (isa<FunctionTemplateDecl>(dcl)) {
       return Context.OverloadTy;
     } else if (isa<FieldDecl>(dcl) || isa<IndirectFieldDecl>(dcl)) {
       // Okay: we can take the address of a field.
       // Could be a pointer to member, though, if there is an explicit
       // scope qualifier for the class.
       if (isa<DeclRefExpr>(op) && cast<DeclRefExpr>(op)->getQualifier()) {
         DeclContext *Ctx = dcl->getDeclContext();
         if (Ctx && Ctx->isRecord()) {
           if (dcl->getType()->isReferenceType()) {
             Diag(OpLoc,
                  diag::err_cannot_form_pointer_to_member_of_reference_type)
               << dcl->getDeclName() << dcl->getType();
             return QualType();
           }
 
           while (cast<RecordDecl>(Ctx)->isAnonymousStructOrUnion())
             Ctx = Ctx->getParent();
 
           QualType MPTy = Context.getMemberPointerType(
               op->getType(),
               Context.getTypeDeclType(cast<RecordDecl>(Ctx)).getTypePtr());
           // Under the MS ABI, lock down the inheritance model now.
           if (Context.getTargetInfo().getCXXABI().isMicrosoft())
             (void)isCompleteType(OpLoc, MPTy);
           return MPTy;
         }
       }
     } else if (!isa<FunctionDecl>(dcl) && !isa<NonTypeTemplateParmDecl>(dcl) &&
                !isa<BindingDecl>(dcl))
       llvm_unreachable("Unknown/unexpected decl type");
   }
 
   if (AddressOfError != AO_No_Error) {
     diagnoseAddressOfInvalidType(*this, OpLoc, op, AddressOfError);
     return QualType();
   }
 
   if (lval == Expr::LV_IncompleteVoidType) {
     // Taking the address of a void variable is technically illegal, but we
     // allow it in cases which are otherwise valid.
     // Example: "extern void x; void* y = &x;".
     Diag(OpLoc, diag::ext_typecheck_addrof_void) << op->getSourceRange();
   }
 
   // If the operand has type "type", the result has type "pointer to type".
   if (op->getType()->isObjCObjectType())
     return Context.getObjCObjectPointerType(op->getType());
 
   CheckAddressOfPackedMember(op);
 
   return Context.getPointerType(op->getType());
 }
 
 static void RecordModifiableNonNullParam(Sema &S, const Expr *Exp) {
   const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(Exp);
   if (!DRE)
     return;
   const Decl *D = DRE->getDecl();
   if (!D)
     return;
   const ParmVarDecl *Param = dyn_cast<ParmVarDecl>(D);
   if (!Param)
     return;
   if (const FunctionDecl* FD = dyn_cast<FunctionDecl>(Param->getDeclContext()))
     if (!FD->hasAttr<NonNullAttr>() && !Param->hasAttr<NonNullAttr>())
       return;
   if (FunctionScopeInfo *FD = S.getCurFunction())
     if (!FD->ModifiedNonNullParams.count(Param))
       FD->ModifiedNonNullParams.insert(Param);
 }
 
 /// CheckIndirectionOperand - Type check unary indirection (prefix '*').
 static QualType CheckIndirectionOperand(Sema &S, Expr *Op, ExprValueKind &VK,
                                         SourceLocation OpLoc) {
   if (Op->isTypeDependent())
     return S.Context.DependentTy;
 
   ExprResult ConvResult = S.UsualUnaryConversions(Op);
   if (ConvResult.isInvalid())
     return QualType();
   Op = ConvResult.get();
   QualType OpTy = Op->getType();
   QualType Result;
 
   if (isa<CXXReinterpretCastExpr>(Op)) {
     QualType OpOrigType = Op->IgnoreParenCasts()->getType();
     S.CheckCompatibleReinterpretCast(OpOrigType, OpTy, /*IsDereference*/true,
                                      Op->getSourceRange());
   }
 
   if (const PointerType *PT = OpTy->getAs<PointerType>())
   {
     Result = PT->getPointeeType();
   }
   else if (const ObjCObjectPointerType *OPT =
              OpTy->getAs<ObjCObjectPointerType>())
     Result = OPT->getPointeeType();
   else {
     ExprResult PR = S.CheckPlaceholderExpr(Op);
     if (PR.isInvalid()) return QualType();
     if (PR.get() != Op)
       return CheckIndirectionOperand(S, PR.get(), VK, OpLoc);
   }
 
   if (Result.isNull()) {
     S.Diag(OpLoc, diag::err_typecheck_indirection_requires_pointer)
       << OpTy << Op->getSourceRange();
     return QualType();
   }
 
   // Note that per both C89 and C99, indirection is always legal, even if Result
   // is an incomplete type or void.  It would be possible to warn about
   // dereferencing a void pointer, but it's completely well-defined, and such a
   // warning is unlikely to catch any mistakes. In C++, indirection is not valid
   // for pointers to 'void' but is fine for any other pointer type:
   //
   // C++ [expr.unary.op]p1:
   //   [...] the expression to which [the unary * operator] is applied shall
   //   be a pointer to an object type, or a pointer to a function type
   if (S.getLangOpts().CPlusPlus && Result->isVoidType())
     S.Diag(OpLoc, diag::ext_typecheck_indirection_through_void_pointer)
       << OpTy << Op->getSourceRange();
 
   // Dereferences are usually l-values...
   VK = VK_LValue;
 
   // ...except that certain expressions are never l-values in C.
   if (!S.getLangOpts().CPlusPlus && Result.isCForbiddenLValueType())
     VK = VK_RValue;
   
   return Result;
 }
 
 BinaryOperatorKind Sema::ConvertTokenKindToBinaryOpcode(tok::TokenKind Kind) {
   BinaryOperatorKind Opc;
   switch (Kind) {
   default: llvm_unreachable("Unknown binop!");
   case tok::periodstar:           Opc = BO_PtrMemD; break;
   case tok::arrowstar:            Opc = BO_PtrMemI; break;
   case tok::star:                 Opc = BO_Mul; break;
   case tok::slash:                Opc = BO_Div; break;
   case tok::percent:              Opc = BO_Rem; break;
   case tok::plus:                 Opc = BO_Add; break;
   case tok::minus:                Opc = BO_Sub; break;
   case tok::lessless:             Opc = BO_Shl; break;
   case tok::greatergreater:       Opc = BO_Shr; break;
   case tok::lessequal:            Opc = BO_LE; break;
   case tok::less:                 Opc = BO_LT; break;
   case tok::greaterequal:         Opc = BO_GE; break;
   case tok::greater:              Opc = BO_GT; break;
   case tok::exclaimequal:         Opc = BO_NE; break;
   case tok::equalequal:           Opc = BO_EQ; break;
   case tok::spaceship:            Opc = BO_Cmp; break;
   case tok::amp:                  Opc = BO_And; break;
   case tok::caret:                Opc = BO_Xor; break;
   case tok::pipe:                 Opc = BO_Or; break;
   case tok::ampamp:               Opc = BO_LAnd; break;
   case tok::pipepipe:             Opc = BO_LOr; break;
   case tok::equal:                Opc = BO_Assign; break;
   case tok::starequal:            Opc = BO_MulAssign; break;
   case tok::slashequal:           Opc = BO_DivAssign; break;
   case tok::percentequal:         Opc = BO_RemAssign; break;
   case tok::plusequal:            Opc = BO_AddAssign; break;
   case tok::minusequal:           Opc = BO_SubAssign; break;
   case tok::lesslessequal:        Opc = BO_ShlAssign; break;
   case tok::greatergreaterequal:  Opc = BO_ShrAssign; break;
   case tok::ampequal:             Opc = BO_AndAssign; break;
   case tok::caretequal:           Opc = BO_XorAssign; break;
   case tok::pipeequal:            Opc = BO_OrAssign; break;
   case tok::comma:                Opc = BO_Comma; break;
   }
   return Opc;
 }
 
 static inline UnaryOperatorKind ConvertTokenKindToUnaryOpcode(
   tok::TokenKind Kind) {
   UnaryOperatorKind Opc;
   switch (Kind) {
   default: llvm_unreachable("Unknown unary op!");
   case tok::plusplus:     Opc = UO_PreInc; break;
   case tok::minusminus:   Opc = UO_PreDec; break;
   case tok::amp:          Opc = UO_AddrOf; break;
   case tok::star:         Opc = UO_Deref; break;
   case tok::plus:         Opc = UO_Plus; break;
   case tok::minus:        Opc = UO_Minus; break;
   case tok::tilde:        Opc = UO_Not; break;
   case tok::exclaim:      Opc = UO_LNot; break;
   case tok::kw___real:    Opc = UO_Real; break;
   case tok::kw___imag:    Opc = UO_Imag; break;
   case tok::kw___extension__: Opc = UO_Extension; break;
   }
   return Opc;
 }
 
 /// DiagnoseSelfAssignment - Emits a warning if a value is assigned to itself.
 /// This warning is only emitted for builtin assignment operations. It is also
 /// suppressed in the event of macro expansions.
 static void DiagnoseSelfAssignment(Sema &S, Expr *LHSExpr, Expr *RHSExpr,
                                    SourceLocation OpLoc) {
   if (S.inTemplateInstantiation())
     return;
   if (OpLoc.isInvalid() || OpLoc.isMacroID())
     return;
   LHSExpr = LHSExpr->IgnoreParenImpCasts();
   RHSExpr = RHSExpr->IgnoreParenImpCasts();
   const DeclRefExpr *LHSDeclRef = dyn_cast<DeclRefExpr>(LHSExpr);
   const DeclRefExpr *RHSDeclRef = dyn_cast<DeclRefExpr>(RHSExpr);
   if (!LHSDeclRef || !RHSDeclRef ||
       LHSDeclRef->getLocation().isMacroID() ||
       RHSDeclRef->getLocation().isMacroID())
     return;
   const ValueDecl *LHSDecl =
     cast<ValueDecl>(LHSDeclRef->getDecl()->getCanonicalDecl());
   const ValueDecl *RHSDecl =
     cast<ValueDecl>(RHSDeclRef->getDecl()->getCanonicalDecl());
   if (LHSDecl != RHSDecl)
     return;
   if (LHSDecl->getType().isVolatileQualified())
     return;
   if (const ReferenceType *RefTy = LHSDecl->getType()->getAs<ReferenceType>())
     if (RefTy->getPointeeType().isVolatileQualified())
       return;
 
   S.Diag(OpLoc, diag::warn_self_assignment)
       << LHSDeclRef->getType()
       << LHSExpr->getSourceRange() << RHSExpr->getSourceRange();
 }
 
 /// Check if a bitwise-& is performed on an Objective-C pointer.  This
 /// is usually indicative of introspection within the Objective-C pointer.
 static void checkObjCPointerIntrospection(Sema &S, ExprResult &L, ExprResult &R,
                                           SourceLocation OpLoc) {
   if (!S.getLangOpts().ObjC1)
     return;
 
   const Expr *ObjCPointerExpr = nullptr, *OtherExpr = nullptr;
   const Expr *LHS = L.get();
   const Expr *RHS = R.get();
 
   if (LHS->IgnoreParenCasts()->getType()->isObjCObjectPointerType()) {
     ObjCPointerExpr = LHS;
     OtherExpr = RHS;
   }
   else if (RHS->IgnoreParenCasts()->getType()->isObjCObjectPointerType()) {
     ObjCPointerExpr = RHS;
     OtherExpr = LHS;
   }
 
   // This warning is deliberately made very specific to reduce false
   // positives with logic that uses '&' for hashing.  This logic mainly
   // looks for code trying to introspect into tagged pointers, which
   // code should generally never do.
   if (ObjCPointerExpr && isa<IntegerLiteral>(OtherExpr->IgnoreParenCasts())) {
     unsigned Diag = diag::warn_objc_pointer_masking;
     // Determine if we are introspecting the result of performSelectorXXX.
     const Expr *Ex = ObjCPointerExpr->IgnoreParenCasts();
     // Special case messages to -performSelector and friends, which
     // can return non-pointer values boxed in a pointer value.
     // Some clients may wish to silence warnings in this subcase.
     if (const ObjCMessageExpr *ME = dyn_cast<ObjCMessageExpr>(Ex)) {
       Selector S = ME->getSelector();
       StringRef SelArg0 = S.getNameForSlot(0);
       if (SelArg0.startswith("performSelector"))
         Diag = diag::warn_objc_pointer_masking_performSelector;
     }
     
     S.Diag(OpLoc, Diag)
       << ObjCPointerExpr->getSourceRange();
   }
 }
 
 static NamedDecl *getDeclFromExpr(Expr *E) {
   if (!E)
     return nullptr;
   if (auto *DRE = dyn_cast<DeclRefExpr>(E))
     return DRE->getDecl();
   if (auto *ME = dyn_cast<MemberExpr>(E))
     return ME->getMemberDecl();
   if (auto *IRE = dyn_cast<ObjCIvarRefExpr>(E))
     return IRE->getDecl();
   return nullptr;
 }
 
 // This helper function promotes a binary operator's operands (which are of a
 // half vector type) to a vector of floats and then truncates the result to
 // a vector of either half or short.
 static ExprResult convertHalfVecBinOp(Sema &S, ExprResult LHS, ExprResult RHS,
                                       BinaryOperatorKind Opc, QualType ResultTy,
                                       ExprValueKind VK, ExprObjectKind OK,
                                       bool IsCompAssign, SourceLocation OpLoc,
                                       FPOptions FPFeatures) {
   auto &Context = S.getASTContext();
   assert((isVector(ResultTy, Context.HalfTy) ||
           isVector(ResultTy, Context.ShortTy)) &&
          "Result must be a vector of half or short");
   assert(isVector(LHS.get()->getType(), Context.HalfTy) &&
          isVector(RHS.get()->getType(), Context.HalfTy) &&
          "both operands expected to be a half vector");
 
   RHS = convertVector(RHS.get(), Context.FloatTy, S);
   QualType BinOpResTy = RHS.get()->getType();
 
   // If Opc is a comparison, ResultType is a vector of shorts. In that case,
   // change BinOpResTy to a vector of ints.
   if (isVector(ResultTy, Context.ShortTy))
     BinOpResTy = S.GetSignedVectorType(BinOpResTy);
 
   if (IsCompAssign)
     return new (Context) CompoundAssignOperator(
         LHS.get(), RHS.get(), Opc, ResultTy, VK, OK, BinOpResTy, BinOpResTy,
         OpLoc, FPFeatures);
 
   LHS = convertVector(LHS.get(), Context.FloatTy, S);
   auto *BO = new (Context) BinaryOperator(LHS.get(), RHS.get(), Opc, BinOpResTy,
                                           VK, OK, OpLoc, FPFeatures);
   return convertVector(BO, ResultTy->getAs<VectorType>()->getElementType(), S);
 }
 
 static std::pair<ExprResult, ExprResult>
 CorrectDelayedTyposInBinOp(Sema &S, BinaryOperatorKind Opc, Expr *LHSExpr,
                            Expr *RHSExpr) {
   ExprResult LHS = LHSExpr, RHS = RHSExpr;
   if (!S.getLangOpts().CPlusPlus) {
     // C cannot handle TypoExpr nodes on either side of a binop because it
     // doesn't handle dependent types properly, so make sure any TypoExprs have
     // been dealt with before checking the operands.
     LHS = S.CorrectDelayedTyposInExpr(LHS);
     RHS = S.CorrectDelayedTyposInExpr(RHS, [Opc, LHS](Expr *E) {
       if (Opc != BO_Assign)
         return ExprResult(E);
       // Avoid correcting the RHS to the same Expr as the LHS.
       Decl *D = getDeclFromExpr(E);
       return (D && D == getDeclFromExpr(LHS.get())) ? ExprError() : E;
     });
   }
   return std::make_pair(LHS, RHS);
 }
 
 /// Returns true if conversion between vectors of halfs and vectors of floats
 /// is needed.
 static bool needsConversionOfHalfVec(bool OpRequiresConversion, ASTContext &Ctx,
                                      QualType SrcType) {
   return OpRequiresConversion && !Ctx.getLangOpts().NativeHalfType &&
          !Ctx.getTargetInfo().useFP16ConversionIntrinsics() &&
          isVector(SrcType, Ctx.HalfTy);
 }
 
 /// CreateBuiltinBinOp - Creates a new built-in binary operation with
 /// operator @p Opc at location @c TokLoc. This routine only supports
 /// built-in operations; ActOnBinOp handles overloaded operators.
 ExprResult Sema::CreateBuiltinBinOp(SourceLocation OpLoc,
                                     BinaryOperatorKind Opc,
                                     Expr *LHSExpr, Expr *RHSExpr) {
   if (getLangOpts().CPlusPlus11 && isa<InitListExpr>(RHSExpr)) {
     // The syntax only allows initializer lists on the RHS of assignment,
     // so we don't need to worry about accepting invalid code for
     // non-assignment operators.
     // C++11 5.17p9:
     //   The meaning of x = {v} [...] is that of x = T(v) [...]. The meaning
     //   of x = {} is x = T().
     InitializationKind Kind =
         InitializationKind::CreateDirectList(RHSExpr->getLocStart());
     InitializedEntity Entity =
         InitializedEntity::InitializeTemporary(LHSExpr->getType());
     InitializationSequence InitSeq(*this, Entity, Kind, RHSExpr);
     ExprResult Init = InitSeq.Perform(*this, Entity, Kind, RHSExpr);
     if (Init.isInvalid())
       return Init;
     RHSExpr = Init.get();
   }
 
   ExprResult LHS = LHSExpr, RHS = RHSExpr;
   QualType ResultTy;     // Result type of the binary operator.
   // The following two variables are used for compound assignment operators
   QualType CompLHSTy;    // Type of LHS after promotions for computation
   QualType CompResultTy; // Type of computation result
   ExprValueKind VK = VK_RValue;
   ExprObjectKind OK = OK_Ordinary;
   bool ConvertHalfVec = false;
 
   std::tie(LHS, RHS) = CorrectDelayedTyposInBinOp(*this, Opc, LHSExpr, RHSExpr);
   if (!LHS.isUsable() || !RHS.isUsable())
     return ExprError();
 
   if (getLangOpts().OpenCL) {
     QualType LHSTy = LHSExpr->getType();
     QualType RHSTy = RHSExpr->getType();
     // OpenCLC v2.0 s6.13.11.1 allows atomic variables to be initialized by
     // the ATOMIC_VAR_INIT macro.
     if (LHSTy->isAtomicType() || RHSTy->isAtomicType()) {
       SourceRange SR(LHSExpr->getLocStart(), RHSExpr->getLocEnd());
       if (BO_Assign == Opc)
         Diag(OpLoc, diag::err_opencl_atomic_init) << 0 << SR;
       else
         ResultTy = InvalidOperands(OpLoc, LHS, RHS);
       return ExprError();
     }
 
     // OpenCL special types - image, sampler, pipe, and blocks are to be used
     // only with a builtin functions and therefore should be disallowed here.
     if (LHSTy->isImageType() || RHSTy->isImageType() ||
         LHSTy->isSamplerT() || RHSTy->isSamplerT() ||
         LHSTy->isPipeType() || RHSTy->isPipeType() ||
         LHSTy->isBlockPointerType() || RHSTy->isBlockPointerType()) {
       ResultTy = InvalidOperands(OpLoc, LHS, RHS);
       return ExprError();
     }
   }
 
   switch (Opc) {
   case BO_Assign:
     ResultTy = CheckAssignmentOperands(LHS.get(), RHS, OpLoc, QualType());
     if (getLangOpts().CPlusPlus &&
         LHS.get()->getObjectKind() != OK_ObjCProperty) {
       VK = LHS.get()->getValueKind();
       OK = LHS.get()->getObjectKind();
     }
     if (!ResultTy.isNull()) {
       DiagnoseSelfAssignment(*this, LHS.get(), RHS.get(), OpLoc);
       DiagnoseSelfMove(LHS.get(), RHS.get(), OpLoc);
     }
     RecordModifiableNonNullParam(*this, LHS.get());
     break;
   case BO_PtrMemD:
   case BO_PtrMemI:
     ResultTy = CheckPointerToMemberOperands(LHS, RHS, VK, OpLoc,
                                             Opc == BO_PtrMemI);
     break;
   case BO_Mul:
   case BO_Div:
     ConvertHalfVec = true;
     ResultTy = CheckMultiplyDivideOperands(LHS, RHS, OpLoc, false,
                                            Opc == BO_Div);
     break;
   case BO_Rem:
     ResultTy = CheckRemainderOperands(LHS, RHS, OpLoc);
     break;
   case BO_Add:
     ConvertHalfVec = true;
     ResultTy = CheckAdditionOperands(LHS, RHS, OpLoc, Opc);
     break;
   case BO_Sub:
     ConvertHalfVec = true;
     ResultTy = CheckSubtractionOperands(LHS, RHS, OpLoc);
     break;
   case BO_Shl:
   case BO_Shr:
     ResultTy = CheckShiftOperands(LHS, RHS, OpLoc, Opc);
     break;
   case BO_LE:
   case BO_LT:
   case BO_GE:
   case BO_GT:
     ConvertHalfVec = true;
     ResultTy = CheckCompareOperands(LHS, RHS, OpLoc, Opc, true);
     break;
   case BO_EQ:
   case BO_NE:
     ConvertHalfVec = true;
     ResultTy = CheckCompareOperands(LHS, RHS, OpLoc, Opc, false);
     break;
   case BO_Cmp:
     // FIXME: Implement proper semantic checking of '<=>'.
     ConvertHalfVec = true;
     ResultTy = CheckCompareOperands(LHS, RHS, OpLoc, Opc, true);
     if (!ResultTy.isNull())
       ResultTy = Context.VoidTy;
     break;
   case BO_And:
     checkObjCPointerIntrospection(*this, LHS, RHS, OpLoc);
     LLVM_FALLTHROUGH;
   case BO_Xor:
   case BO_Or:
     ResultTy = CheckBitwiseOperands(LHS, RHS, OpLoc, Opc);
     break;
   case BO_LAnd:
   case BO_LOr:
     ConvertHalfVec = true;
     ResultTy = CheckLogicalOperands(LHS, RHS, OpLoc, Opc);
     break;
   case BO_MulAssign:
   case BO_DivAssign:
     ConvertHalfVec = true;
     CompResultTy = CheckMultiplyDivideOperands(LHS, RHS, OpLoc, true,
                                                Opc == BO_DivAssign);
     CompLHSTy = CompResultTy;
     if (!CompResultTy.isNull() && !LHS.isInvalid() && !RHS.isInvalid())
       ResultTy = CheckAssignmentOperands(LHS.get(), RHS, OpLoc, CompResultTy);
     break;
   case BO_RemAssign:
     CompResultTy = CheckRemainderOperands(LHS, RHS, OpLoc, true);
     CompLHSTy = CompResultTy;
     if (!CompResultTy.isNull() && !LHS.isInvalid() && !RHS.isInvalid())
       ResultTy = CheckAssignmentOperands(LHS.get(), RHS, OpLoc, CompResultTy);
     break;
   case BO_AddAssign:
     ConvertHalfVec = true;
     CompResultTy = CheckAdditionOperands(LHS, RHS, OpLoc, Opc, &CompLHSTy);
     if (!CompResultTy.isNull() && !LHS.isInvalid() && !RHS.isInvalid())
       ResultTy = CheckAssignmentOperands(LHS.get(), RHS, OpLoc, CompResultTy);
     break;
   case BO_SubAssign:
     ConvertHalfVec = true;
     CompResultTy = CheckSubtractionOperands(LHS, RHS, OpLoc, &CompLHSTy);
     if (!CompResultTy.isNull() && !LHS.isInvalid() && !RHS.isInvalid())
       ResultTy = CheckAssignmentOperands(LHS.get(), RHS, OpLoc, CompResultTy);
     break;
   case BO_ShlAssign:
   case BO_ShrAssign:
     CompResultTy = CheckShiftOperands(LHS, RHS, OpLoc, Opc, true);
     CompLHSTy = CompResultTy;
     if (!CompResultTy.isNull() && !LHS.isInvalid() && !RHS.isInvalid())
       ResultTy = CheckAssignmentOperands(LHS.get(), RHS, OpLoc, CompResultTy);
     break;
   case BO_AndAssign:
   case BO_OrAssign: // fallthrough
     DiagnoseSelfAssignment(*this, LHS.get(), RHS.get(), OpLoc);
     LLVM_FALLTHROUGH;
   case BO_XorAssign:
     CompResultTy = CheckBitwiseOperands(LHS, RHS, OpLoc, Opc);
     CompLHSTy = CompResultTy;
     if (!CompResultTy.isNull() && !LHS.isInvalid() && !RHS.isInvalid())
       ResultTy = CheckAssignmentOperands(LHS.get(), RHS, OpLoc, CompResultTy);
     break;
   case BO_Comma:
     ResultTy = CheckCommaOperands(*this, LHS, RHS, OpLoc);
     if (getLangOpts().CPlusPlus && !RHS.isInvalid()) {
       VK = RHS.get()->getValueKind();
       OK = RHS.get()->getObjectKind();
     }
     break;
   }
   if (ResultTy.isNull() || LHS.isInvalid() || RHS.isInvalid())
     return ExprError();
 
   // Some of the binary operations require promoting operands of half vector to
   // float vectors and truncating the result back to half vector. For now, we do
   // this only when HalfArgsAndReturn is set (that is, when the target is arm or
   // arm64).
   assert(isVector(RHS.get()->getType(), Context.HalfTy) ==
          isVector(LHS.get()->getType(), Context.HalfTy) &&
          "both sides are half vectors or neither sides are");
   ConvertHalfVec = needsConversionOfHalfVec(ConvertHalfVec, Context,
                                             LHS.get()->getType());
 
   // Check for array bounds violations for both sides of the BinaryOperator
   CheckArrayAccess(LHS.get());
   CheckArrayAccess(RHS.get());
 
   if (const ObjCIsaExpr *OISA = dyn_cast<ObjCIsaExpr>(LHS.get()->IgnoreParenCasts())) {
     NamedDecl *ObjectSetClass = LookupSingleName(TUScope,
                                                  &Context.Idents.get("object_setClass"),
                                                  SourceLocation(), LookupOrdinaryName);
     if (ObjectSetClass && isa<ObjCIsaExpr>(LHS.get())) {
       SourceLocation RHSLocEnd = getLocForEndOfToken(RHS.get()->getLocEnd());
       Diag(LHS.get()->getExprLoc(), diag::warn_objc_isa_assign) <<
       FixItHint::CreateInsertion(LHS.get()->getLocStart(), "object_setClass(") <<
       FixItHint::CreateReplacement(SourceRange(OISA->getOpLoc(), OpLoc), ",") <<
       FixItHint::CreateInsertion(RHSLocEnd, ")");
     }
     else
       Diag(LHS.get()->getExprLoc(), diag::warn_objc_isa_assign);
   }
   else if (const ObjCIvarRefExpr *OIRE =
            dyn_cast<ObjCIvarRefExpr>(LHS.get()->IgnoreParenCasts()))
     DiagnoseDirectIsaAccess(*this, OIRE, OpLoc, RHS.get());
   
   // Opc is not a compound assignment if CompResultTy is null.
   if (CompResultTy.isNull()) {
     if (ConvertHalfVec)
       return convertHalfVecBinOp(*this, LHS, RHS, Opc, ResultTy, VK, OK, false,
                                  OpLoc, FPFeatures);
     return new (Context) BinaryOperator(LHS.get(), RHS.get(), Opc, ResultTy, VK,
                                         OK, OpLoc, FPFeatures);
   }
 
   // Handle compound assignments.
   if (getLangOpts().CPlusPlus && LHS.get()->getObjectKind() !=
       OK_ObjCProperty) {
     VK = VK_LValue;
     OK = LHS.get()->getObjectKind();
   }
 
   if (ConvertHalfVec)
     return convertHalfVecBinOp(*this, LHS, RHS, Opc, ResultTy, VK, OK, true,
                                OpLoc, FPFeatures);
 
   return new (Context) CompoundAssignOperator(
       LHS.get(), RHS.get(), Opc, ResultTy, VK, OK, CompLHSTy, CompResultTy,
       OpLoc, FPFeatures);
 }
 
 /// DiagnoseBitwisePrecedence - Emit a warning when bitwise and comparison
 /// operators are mixed in a way that suggests that the programmer forgot that
 /// comparison operators have higher precedence. The most typical example of
 /// such code is "flags & 0x0020 != 0", which is equivalent to "flags & 1".
 static void DiagnoseBitwisePrecedence(Sema &Self, BinaryOperatorKind Opc,
                                       SourceLocation OpLoc, Expr *LHSExpr,
                                       Expr *RHSExpr) {
   BinaryOperator *LHSBO = dyn_cast<BinaryOperator>(LHSExpr);
   BinaryOperator *RHSBO = dyn_cast<BinaryOperator>(RHSExpr);
 
   // Check that one of the sides is a comparison operator and the other isn't.
   bool isLeftComp = LHSBO && LHSBO->isComparisonOp();
   bool isRightComp = RHSBO && RHSBO->isComparisonOp();
   if (isLeftComp == isRightComp)
     return;
 
   // Bitwise operations are sometimes used as eager logical ops.
   // Don't diagnose this.
   bool isLeftBitwise = LHSBO && LHSBO->isBitwiseOp();
   bool isRightBitwise = RHSBO && RHSBO->isBitwiseOp();
   if (isLeftBitwise || isRightBitwise)
     return;
 
   SourceRange DiagRange = isLeftComp ? SourceRange(LHSExpr->getLocStart(),
                                                    OpLoc)
                                      : SourceRange(OpLoc, RHSExpr->getLocEnd());
   StringRef OpStr = isLeftComp ? LHSBO->getOpcodeStr() : RHSBO->getOpcodeStr();
   SourceRange ParensRange = isLeftComp ?
       SourceRange(LHSBO->getRHS()->getLocStart(), RHSExpr->getLocEnd())
     : SourceRange(LHSExpr->getLocStart(), RHSBO->getLHS()->getLocEnd());
 
   Self.Diag(OpLoc, diag::warn_precedence_bitwise_rel)
     << DiagRange << BinaryOperator::getOpcodeStr(Opc) << OpStr;
   SuggestParentheses(Self, OpLoc,
     Self.PDiag(diag::note_precedence_silence) << OpStr,
     (isLeftComp ? LHSExpr : RHSExpr)->getSourceRange());
   SuggestParentheses(Self, OpLoc,
     Self.PDiag(diag::note_precedence_bitwise_first)
       << BinaryOperator::getOpcodeStr(Opc),
     ParensRange);
 }
 
 /// \brief It accepts a '&&' expr that is inside a '||' one.
 /// Emit a diagnostic together with a fixit hint that wraps the '&&' expression
 /// in parentheses.
 static void
 EmitDiagnosticForLogicalAndInLogicalOr(Sema &Self, SourceLocation OpLoc,
                                        BinaryOperator *Bop) {
   assert(Bop->getOpcode() == BO_LAnd);
   Self.Diag(Bop->getOperatorLoc(), diag::warn_logical_and_in_logical_or)
       << Bop->getSourceRange() << OpLoc;
   SuggestParentheses(Self, Bop->getOperatorLoc(),
     Self.PDiag(diag::note_precedence_silence)
       << Bop->getOpcodeStr(),
     Bop->getSourceRange());
 }
 
 /// \brief Returns true if the given expression can be evaluated as a constant
 /// 'true'.
 static bool EvaluatesAsTrue(Sema &S, Expr *E) {
   bool Res;
   return !E->isValueDependent() &&
          E->EvaluateAsBooleanCondition(Res, S.getASTContext()) && Res;
 }
 
 /// \brief Returns true if the given expression can be evaluated as a constant
 /// 'false'.
 static bool EvaluatesAsFalse(Sema &S, Expr *E) {
   bool Res;
   return !E->isValueDependent() &&
          E->EvaluateAsBooleanCondition(Res, S.getASTContext()) && !Res;
 }
 
 /// \brief Look for '&&' in the left hand of a '||' expr.
 static void DiagnoseLogicalAndInLogicalOrLHS(Sema &S, SourceLocation OpLoc,
                                              Expr *LHSExpr, Expr *RHSExpr) {
   if (BinaryOperator *Bop = dyn_cast<BinaryOperator>(LHSExpr)) {
     if (Bop->getOpcode() == BO_LAnd) {
       // If it's "a && b || 0" don't warn since the precedence doesn't matter.
       if (EvaluatesAsFalse(S, RHSExpr))
         return;
       // If it's "1 && a || b" don't warn since the precedence doesn't matter.
       if (!EvaluatesAsTrue(S, Bop->getLHS()))
         return EmitDiagnosticForLogicalAndInLogicalOr(S, OpLoc, Bop);
     } else if (Bop->getOpcode() == BO_LOr) {
       if (BinaryOperator *RBop = dyn_cast<BinaryOperator>(Bop->getRHS())) {
         // If it's "a || b && 1 || c" we didn't warn earlier for
         // "a || b && 1", but warn now.
         if (RBop->getOpcode() == BO_LAnd && EvaluatesAsTrue(S, RBop->getRHS()))
           return EmitDiagnosticForLogicalAndInLogicalOr(S, OpLoc, RBop);
       }
     }
   }
 }
 
 /// \brief Look for '&&' in the right hand of a '||' expr.
 static void DiagnoseLogicalAndInLogicalOrRHS(Sema &S, SourceLocation OpLoc,
                                              Expr *LHSExpr, Expr *RHSExpr) {
   if (BinaryOperator *Bop = dyn_cast<BinaryOperator>(RHSExpr)) {
     if (Bop->getOpcode() == BO_LAnd) {
       // If it's "0 || a && b" don't warn since the precedence doesn't matter.
       if (EvaluatesAsFalse(S, LHSExpr))
         return;
       // If it's "a || b && 1" don't warn since the precedence doesn't matter.
       if (!EvaluatesAsTrue(S, Bop->getRHS()))
         return EmitDiagnosticForLogicalAndInLogicalOr(S, OpLoc, Bop);
     }
   }
 }
 
 /// \brief Look for bitwise op in the left or right hand of a bitwise op with
 /// lower precedence and emit a diagnostic together with a fixit hint that wraps
 /// the '&' expression in parentheses.
 static void DiagnoseBitwiseOpInBitwiseOp(Sema &S, BinaryOperatorKind Opc,
                                          SourceLocation OpLoc, Expr *SubExpr) {
   if (BinaryOperator *Bop = dyn_cast<BinaryOperator>(SubExpr)) {
     if (Bop->isBitwiseOp() && Bop->getOpcode() < Opc) {
       S.Diag(Bop->getOperatorLoc(), diag::warn_bitwise_op_in_bitwise_op)
         << Bop->getOpcodeStr() << BinaryOperator::getOpcodeStr(Opc)
         << Bop->getSourceRange() << OpLoc;
       SuggestParentheses(S, Bop->getOperatorLoc(),
         S.PDiag(diag::note_precedence_silence)
           << Bop->getOpcodeStr(),
         Bop->getSourceRange());
     }
   }
 }
 
 static void DiagnoseAdditionInShift(Sema &S, SourceLocation OpLoc,
                                     Expr *SubExpr, StringRef Shift) {
   if (BinaryOperator *Bop = dyn_cast<BinaryOperator>(SubExpr)) {
     if (Bop->getOpcode() == BO_Add || Bop->getOpcode() == BO_Sub) {
       StringRef Op = Bop->getOpcodeStr();
       S.Diag(Bop->getOperatorLoc(), diag::warn_addition_in_bitshift)
           << Bop->getSourceRange() << OpLoc << Shift << Op;
       SuggestParentheses(S, Bop->getOperatorLoc(),
           S.PDiag(diag::note_precedence_silence) << Op,
           Bop->getSourceRange());
     }
   }
 }
 
 static void DiagnoseShiftCompare(Sema &S, SourceLocation OpLoc,
                                  Expr *LHSExpr, Expr *RHSExpr) {
   CXXOperatorCallExpr *OCE = dyn_cast<CXXOperatorCallExpr>(LHSExpr);
   if (!OCE)
     return;
 
   FunctionDecl *FD = OCE->getDirectCallee();
   if (!FD || !FD->isOverloadedOperator())
     return;
 
   OverloadedOperatorKind Kind = FD->getOverloadedOperator();
   if (Kind != OO_LessLess && Kind != OO_GreaterGreater)
     return;
 
   S.Diag(OpLoc, diag::warn_overloaded_shift_in_comparison)
       << LHSExpr->getSourceRange() << RHSExpr->getSourceRange()
       << (Kind == OO_LessLess);
   SuggestParentheses(S, OCE->getOperatorLoc(),
                      S.PDiag(diag::note_precedence_silence)
                          << (Kind == OO_LessLess ? "<<" : ">>"),
                      OCE->getSourceRange());
   SuggestParentheses(S, OpLoc,
                      S.PDiag(diag::note_evaluate_comparison_first),
                      SourceRange(OCE->getArg(1)->getLocStart(),
                                  RHSExpr->getLocEnd()));
 }
 
 /// DiagnoseBinOpPrecedence - Emit warnings for expressions with tricky
 /// precedence.
 static void DiagnoseBinOpPrecedence(Sema &Self, BinaryOperatorKind Opc,
                                     SourceLocation OpLoc, Expr *LHSExpr,
                                     Expr *RHSExpr){
   // Diagnose "arg1 'bitwise' arg2 'eq' arg3".
   if (BinaryOperator::isBitwiseOp(Opc))
     DiagnoseBitwisePrecedence(Self, Opc, OpLoc, LHSExpr, RHSExpr);
 
   // Diagnose "arg1 & arg2 | arg3"
   if ((Opc == BO_Or || Opc == BO_Xor) &&
       !OpLoc.isMacroID()/* Don't warn in macros. */) {
     DiagnoseBitwiseOpInBitwiseOp(Self, Opc, OpLoc, LHSExpr);
     DiagnoseBitwiseOpInBitwiseOp(Self, Opc, OpLoc, RHSExpr);
   }
 
   // Warn about arg1 || arg2 && arg3, as GCC 4.3+ does.
   // We don't warn for 'assert(a || b && "bad")' since this is safe.
   if (Opc == BO_LOr && !OpLoc.isMacroID()/* Don't warn in macros. */) {
     DiagnoseLogicalAndInLogicalOrLHS(Self, OpLoc, LHSExpr, RHSExpr);
     DiagnoseLogicalAndInLogicalOrRHS(Self, OpLoc, LHSExpr, RHSExpr);
   }
 
   if ((Opc == BO_Shl && LHSExpr->getType()->isIntegralType(Self.getASTContext()))
       || Opc == BO_Shr) {
     StringRef Shift = BinaryOperator::getOpcodeStr(Opc);
     DiagnoseAdditionInShift(Self, OpLoc, LHSExpr, Shift);
     DiagnoseAdditionInShift(Self, OpLoc, RHSExpr, Shift);
   }
 
   // Warn on overloaded shift operators and comparisons, such as:
   // cout << 5 == 4;
   if (BinaryOperator::isComparisonOp(Opc))
     DiagnoseShiftCompare(Self, OpLoc, LHSExpr, RHSExpr);
 }
 
 // Binary Operators.  'Tok' is the token for the operator.
 ExprResult Sema::ActOnBinOp(Scope *S, SourceLocation TokLoc,
                             tok::TokenKind Kind,
                             Expr *LHSExpr, Expr *RHSExpr) {
   BinaryOperatorKind Opc = ConvertTokenKindToBinaryOpcode(Kind);
   assert(LHSExpr && "ActOnBinOp(): missing left expression");
   assert(RHSExpr && "ActOnBinOp(): missing right expression");
 
   // Emit warnings for tricky precedence issues, e.g. "bitfield & 0x4 == 0"
   DiagnoseBinOpPrecedence(*this, Opc, TokLoc, LHSExpr, RHSExpr);
 
   return BuildBinOp(S, TokLoc, Opc, LHSExpr, RHSExpr);
 }
 
 /// Build an overloaded binary operator expression in the given scope.
 static ExprResult BuildOverloadedBinOp(Sema &S, Scope *Sc, SourceLocation OpLoc,
                                        BinaryOperatorKind Opc,
                                        Expr *LHS, Expr *RHS) {
   // Find all of the overloaded operators visible from this
   // point. We perform both an operator-name lookup from the local
   // scope and an argument-dependent lookup based on the types of
   // the arguments.
   UnresolvedSet<16> Functions;
   OverloadedOperatorKind OverOp
     = BinaryOperator::getOverloadedOperator(Opc);
   if (Sc && OverOp != OO_None && OverOp != OO_Equal)
     S.LookupOverloadedOperatorName(OverOp, Sc, LHS->getType(),
                                    RHS->getType(), Functions);
 
   // Build the (potentially-overloaded, potentially-dependent)
   // binary operation.
   return S.CreateOverloadedBinOp(OpLoc, Opc, Functions, LHS, RHS);
 }
 
 ExprResult Sema::BuildBinOp(Scope *S, SourceLocation OpLoc,
                             BinaryOperatorKind Opc,
                             Expr *LHSExpr, Expr *RHSExpr) {
   ExprResult LHS, RHS;
   std::tie(LHS, RHS) = CorrectDelayedTyposInBinOp(*this, Opc, LHSExpr, RHSExpr);
   if (!LHS.isUsable() || !RHS.isUsable())
     return ExprError();
   LHSExpr = LHS.get();
   RHSExpr = RHS.get();
 
   // We want to end up calling one of checkPseudoObjectAssignment
   // (if the LHS is a pseudo-object), BuildOverloadedBinOp (if
   // both expressions are overloadable or either is type-dependent),
   // or CreateBuiltinBinOp (in any other case).  We also want to get
   // any placeholder types out of the way.
 
   // Handle pseudo-objects in the LHS.
   if (const BuiltinType *pty = LHSExpr->getType()->getAsPlaceholderType()) {
     // Assignments with a pseudo-object l-value need special analysis.
     if (pty->getKind() == BuiltinType::PseudoObject &&
         BinaryOperator::isAssignmentOp(Opc))
       return checkPseudoObjectAssignment(S, OpLoc, Opc, LHSExpr, RHSExpr);
 
     // Don't resolve overloads if the other type is overloadable.
     if (getLangOpts().CPlusPlus && pty->getKind() == BuiltinType::Overload) {
       // We can't actually test that if we still have a placeholder,
       // though.  Fortunately, none of the exceptions we see in that
       // code below are valid when the LHS is an overload set.  Note
       // that an overload set can be dependently-typed, but it never
       // instantiates to having an overloadable type.
       ExprResult resolvedRHS = CheckPlaceholderExpr(RHSExpr);
       if (resolvedRHS.isInvalid()) return ExprError();
       RHSExpr = resolvedRHS.get();
 
       if (RHSExpr->isTypeDependent() ||
           RHSExpr->getType()->isOverloadableType())
         return BuildOverloadedBinOp(*this, S, OpLoc, Opc, LHSExpr, RHSExpr);
     }
 
     // If we're instantiating "a.x < b" or "A::x < b" and 'x' names a function
     // template, diagnose the missing 'template' keyword instead of diagnosing
     // an invalid use of a bound member function.
     //
     // Note that "A::x < b" might be valid if 'b' has an overloadable type due
     // to C++1z [over.over]/1.4, but we already checked for that case above.
     if (Opc == BO_LT && inTemplateInstantiation() &&
         (pty->getKind() == BuiltinType::BoundMember ||
          pty->getKind() == BuiltinType::Overload)) {
       auto *OE = dyn_cast<OverloadExpr>(LHSExpr);
       if (OE && !OE->hasTemplateKeyword() && !OE->hasExplicitTemplateArgs() &&
           std::any_of(OE->decls_begin(), OE->decls_end(), [](NamedDecl *ND) {
             return isa<FunctionTemplateDecl>(ND);
           })) {
         Diag(OE->getQualifier() ? OE->getQualifierLoc().getBeginLoc()
                                 : OE->getNameLoc(),
              diag::err_template_kw_missing)
           << OE->getName().getAsString() << "";
         return ExprError();
       }
     }
         
     ExprResult LHS = CheckPlaceholderExpr(LHSExpr);
     if (LHS.isInvalid()) return ExprError();
     LHSExpr = LHS.get();
   }
 
   // Handle pseudo-objects in the RHS.
   if (const BuiltinType *pty = RHSExpr->getType()->getAsPlaceholderType()) {
     // An overload in the RHS can potentially be resolved by the type
     // being assigned to.
     if (Opc == BO_Assign && pty->getKind() == BuiltinType::Overload) {
       if (getLangOpts().CPlusPlus &&
           (LHSExpr->isTypeDependent() || RHSExpr->isTypeDependent() ||
            LHSExpr->getType()->isOverloadableType()))
         return BuildOverloadedBinOp(*this, S, OpLoc, Opc, LHSExpr, RHSExpr);
 
       return CreateBuiltinBinOp(OpLoc, Opc, LHSExpr, RHSExpr);
     }
 
     // Don't resolve overloads if the other type is overloadable.
     if (getLangOpts().CPlusPlus && pty->getKind() == BuiltinType::Overload &&
         LHSExpr->getType()->isOverloadableType())
       return BuildOverloadedBinOp(*this, S, OpLoc, Opc, LHSExpr, RHSExpr);
 
     ExprResult resolvedRHS = CheckPlaceholderExpr(RHSExpr);
     if (!resolvedRHS.isUsable()) return ExprError();
     RHSExpr = resolvedRHS.get();
   }
 
   if (getLangOpts().CPlusPlus) {
     // If either expression is type-dependent, always build an
     // overloaded op.
     if (LHSExpr->isTypeDependent() || RHSExpr->isTypeDependent())
       return BuildOverloadedBinOp(*this, S, OpLoc, Opc, LHSExpr, RHSExpr);
 
     // Otherwise, build an overloaded op if either expression has an
     // overloadable type.
     if (LHSExpr->getType()->isOverloadableType() ||
         RHSExpr->getType()->isOverloadableType())
       return BuildOverloadedBinOp(*this, S, OpLoc, Opc, LHSExpr, RHSExpr);
   }
 
   // Build a built-in binary operation.
   return CreateBuiltinBinOp(OpLoc, Opc, LHSExpr, RHSExpr);
 }
 
 ExprResult Sema::CreateBuiltinUnaryOp(SourceLocation OpLoc,
                                       UnaryOperatorKind Opc,
                                       Expr *InputExpr) {
   ExprResult Input = InputExpr;
   ExprValueKind VK = VK_RValue;
   ExprObjectKind OK = OK_Ordinary;
   QualType resultType;
   bool ConvertHalfVec = false;
   if (getLangOpts().OpenCL) {
     QualType Ty = InputExpr->getType();
     // The only legal unary operation for atomics is '&'.
     if ((Opc != UO_AddrOf && Ty->isAtomicType()) ||
     // OpenCL special types - image, sampler, pipe, and blocks are to be used
     // only with a builtin functions and therefore should be disallowed here.
         (Ty->isImageType() || Ty->isSamplerT() || Ty->isPipeType()
         || Ty->isBlockPointerType())) {
       return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr)
                        << InputExpr->getType()
                        << Input.get()->getSourceRange());
     }
   }
   switch (Opc) {
   case UO_PreInc:
   case UO_PreDec:
   case UO_PostInc:
   case UO_PostDec:
     resultType = CheckIncrementDecrementOperand(*this, Input.get(), VK, OK,
                                                 OpLoc,
                                                 Opc == UO_PreInc ||
                                                 Opc == UO_PostInc,
                                                 Opc == UO_PreInc ||
                                                 Opc == UO_PreDec);
     break;
   case UO_AddrOf:
     resultType = CheckAddressOfOperand(Input, OpLoc);
     RecordModifiableNonNullParam(*this, InputExpr);
     break;
   case UO_Deref: {
     Input = DefaultFunctionArrayLvalueConversion(Input.get());
     if (Input.isInvalid()) return ExprError();
     resultType = CheckIndirectionOperand(*this, Input.get(), VK, OpLoc);
     break;
   }
   case UO_Plus:
   case UO_Minus:
     Input = UsualUnaryConversions(Input.get());
     if (Input.isInvalid()) return ExprError();
     // Unary plus and minus require promoting an operand of half vector to a
     // float vector and truncating the result back to a half vector. For now, we
     // do this only when HalfArgsAndReturns is set (that is, when the target is
     // arm or arm64).
     ConvertHalfVec =
         needsConversionOfHalfVec(true, Context, Input.get()->getType());
 
     // If the operand is a half vector, promote it to a float vector.
     if (ConvertHalfVec)
       Input = convertVector(Input.get(), Context.FloatTy, *this);
     resultType = Input.get()->getType();
     if (resultType->isDependentType())
       break;
     if (resultType->isArithmeticType()) // C99 6.5.3.3p1
       break;
     else if (resultType->isVectorType() &&
              // The z vector extensions don't allow + or - with bool vectors.
              (!Context.getLangOpts().ZVector ||
               resultType->getAs<VectorType>()->getVectorKind() !=
               VectorType::AltiVecBool))
       break;
     else if (getLangOpts().CPlusPlus && // C++ [expr.unary.op]p6
              Opc == UO_Plus &&
              resultType->isPointerType())
       break;
 
     return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr)
       << resultType << Input.get()->getSourceRange());
 
   case UO_Not: // bitwise complement
     Input = UsualUnaryConversions(Input.get());
     if (Input.isInvalid())
       return ExprError();
     resultType = Input.get()->getType();
     if (resultType->isDependentType())
       break;
     // C99 6.5.3.3p1. We allow complex int and float as a GCC extension.
     if (resultType->isComplexType() || resultType->isComplexIntegerType())
       // C99 does not support '~' for complex conjugation.
       Diag(OpLoc, diag::ext_integer_complement_complex)
           << resultType << Input.get()->getSourceRange();
     else if (resultType->hasIntegerRepresentation())
       break;
     else if (resultType->isExtVectorType() && Context.getLangOpts().OpenCL) {
       // OpenCL v1.1 s6.3.f: The bitwise operator not (~) does not operate
       // on vector float types.
       QualType T = resultType->getAs<ExtVectorType>()->getElementType();
       if (!T->isIntegerType())
         return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr)
                           << resultType << Input.get()->getSourceRange());
     } else {
       return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr)
                        << resultType << Input.get()->getSourceRange());
     }
     break;
 
   case UO_LNot: // logical negation
     // Unlike +/-/~, integer promotions aren't done here (C99 6.5.3.3p5).
     Input = DefaultFunctionArrayLvalueConversion(Input.get());
     if (Input.isInvalid()) return ExprError();
     resultType = Input.get()->getType();
 
     // Though we still have to promote half FP to float...
     if (resultType->isHalfType() && !Context.getLangOpts().NativeHalfType) {
       Input = ImpCastExprToType(Input.get(), Context.FloatTy, CK_FloatingCast).get();
       resultType = Context.FloatTy;
     }
 
     if (resultType->isDependentType())
       break;
     if (resultType->isScalarType() && !isScopedEnumerationType(resultType)) {
       // C99 6.5.3.3p1: ok, fallthrough;
       if (Context.getLangOpts().CPlusPlus) {
         // C++03 [expr.unary.op]p8, C++0x [expr.unary.op]p9:
         // operand contextually converted to bool.
         Input = ImpCastExprToType(Input.get(), Context.BoolTy,
                                   ScalarTypeToBooleanCastKind(resultType));
       } else if (Context.getLangOpts().OpenCL &&
                  Context.getLangOpts().OpenCLVersion < 120) {
         // OpenCL v1.1 6.3.h: The logical operator not (!) does not
         // operate on scalar float types.
         if (!resultType->isIntegerType() && !resultType->isPointerType())
           return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr)
                            << resultType << Input.get()->getSourceRange());
       }
     } else if (resultType->isExtVectorType()) {
       if (Context.getLangOpts().OpenCL &&
           Context.getLangOpts().OpenCLVersion < 120) {
         // OpenCL v1.1 6.3.h: The logical operator not (!) does not
         // operate on vector float types.
         QualType T = resultType->getAs<ExtVectorType>()->getElementType();
         if (!T->isIntegerType())
           return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr)
                            << resultType << Input.get()->getSourceRange());
       }
       // Vector logical not returns the signed variant of the operand type.
       resultType = GetSignedVectorType(resultType);
       break;
     } else {
       // FIXME: GCC's vector extension permits the usage of '!' with a vector
       //        type in C++. We should allow that here too.
       return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr)
         << resultType << Input.get()->getSourceRange());
     }
     
     // LNot always has type int. C99 6.5.3.3p5.
     // In C++, it's bool. C++ 5.3.1p8
     resultType = Context.getLogicalOperationType();
     break;
   case UO_Real:
   case UO_Imag:
     resultType = CheckRealImagOperand(*this, Input, OpLoc, Opc == UO_Real);
     // _Real maps ordinary l-values into ordinary l-values. _Imag maps ordinary
     // complex l-values to ordinary l-values and all other values to r-values.
     if (Input.isInvalid()) return ExprError();
     if (Opc == UO_Real || Input.get()->getType()->isAnyComplexType()) {
       if (Input.get()->getValueKind() != VK_RValue &&
           Input.get()->getObjectKind() == OK_Ordinary)
         VK = Input.get()->getValueKind();
     } else if (!getLangOpts().CPlusPlus) {
       // In C, a volatile scalar is read by __imag. In C++, it is not.
       Input = DefaultLvalueConversion(Input.get());
     }
     break;
   case UO_Extension:
     resultType = Input.get()->getType();
     VK = Input.get()->getValueKind();
     OK = Input.get()->getObjectKind();
     break;
   case UO_Coawait:
     // It's unnessesary to represent the pass-through operator co_await in the
     // AST; just return the input expression instead.
     assert(!Input.get()->getType()->isDependentType() &&
                    "the co_await expression must be non-dependant before "
                    "building operator co_await");
     return Input;
   }
   if (resultType.isNull() || Input.isInvalid())
     return ExprError();
 
   // Check for array bounds violations in the operand of the UnaryOperator,
   // except for the '*' and '&' operators that have to be handled specially
   // by CheckArrayAccess (as there are special cases like &array[arraysize]
   // that are explicitly defined as valid by the standard).
   if (Opc != UO_AddrOf && Opc != UO_Deref)
     CheckArrayAccess(Input.get());
 
   auto *UO = new (Context)
       UnaryOperator(Input.get(), Opc, resultType, VK, OK, OpLoc);
   // Convert the result back to a half vector.
   if (ConvertHalfVec)
     return convertVector(UO, Context.HalfTy, *this);
   return UO;
 }
 
 /// \brief Determine whether the given expression is a qualified member
 /// access expression, of a form that could be turned into a pointer to member
 /// with the address-of operator.
 static bool isQualifiedMemberAccess(Expr *E) {
   if (DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E)) {
     if (!DRE->getQualifier())
       return false;
     
     ValueDecl *VD = DRE->getDecl();
     if (!VD->isCXXClassMember())
       return false;
     
     if (isa<FieldDecl>(VD) || isa<IndirectFieldDecl>(VD))
       return true;
     if (CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(VD))
       return Method->isInstance();
       
     return false;
   }
   
   if (UnresolvedLookupExpr *ULE = dyn_cast<UnresolvedLookupExpr>(E)) {
     if (!ULE->getQualifier())
       return false;
     
     for (NamedDecl *D : ULE->decls()) {
       if (CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(D)) {
         if (Method->isInstance())
           return true;
       } else {
         // Overload set does not contain methods.
         break;
       }
     }
     
     return false;
   }
   
   return false;
 }
 
 ExprResult Sema::BuildUnaryOp(Scope *S, SourceLocation OpLoc,
                               UnaryOperatorKind Opc, Expr *Input) {
   // First things first: handle placeholders so that the
   // overloaded-operator check considers the right type.
   if (const BuiltinType *pty = Input->getType()->getAsPlaceholderType()) {
     // Increment and decrement of pseudo-object references.
     if (pty->getKind() == BuiltinType::PseudoObject &&
         UnaryOperator::isIncrementDecrementOp(Opc))
       return checkPseudoObjectIncDec(S, OpLoc, Opc, Input);
 
     // extension is always a builtin operator.
     if (Opc == UO_Extension)
       return CreateBuiltinUnaryOp(OpLoc, Opc, Input);
 
     // & gets special logic for several kinds of placeholder.
     // The builtin code knows what to do.
     if (Opc == UO_AddrOf &&
         (pty->getKind() == BuiltinType::Overload ||
          pty->getKind() == BuiltinType::UnknownAny ||
          pty->getKind() == BuiltinType::BoundMember))
       return CreateBuiltinUnaryOp(OpLoc, Opc, Input);
 
     // Anything else needs to be handled now.
     ExprResult Result = CheckPlaceholderExpr(Input);
     if (Result.isInvalid()) return ExprError();
     Input = Result.get();
   }
 
   if (getLangOpts().CPlusPlus && Input->getType()->isOverloadableType() &&
       UnaryOperator::getOverloadedOperator(Opc) != OO_None &&
       !(Opc == UO_AddrOf && isQualifiedMemberAccess(Input))) {
     // Find all of the overloaded operators visible from this
     // point. We perform both an operator-name lookup from the local
     // scope and an argument-dependent lookup based on the types of
     // the arguments.
     UnresolvedSet<16> Functions;
     OverloadedOperatorKind OverOp = UnaryOperator::getOverloadedOperator(Opc);
     if (S && OverOp != OO_None)
       LookupOverloadedOperatorName(OverOp, S, Input->getType(), QualType(),
                                    Functions);
 
     return CreateOverloadedUnaryOp(OpLoc, Opc, Functions, Input);
   }
 
   return CreateBuiltinUnaryOp(OpLoc, Opc, Input);
 }
 
 // Unary Operators.  'Tok' is the token for the operator.
 ExprResult Sema::ActOnUnaryOp(Scope *S, SourceLocation OpLoc,
                               tok::TokenKind Op, Expr *Input) {
   return BuildUnaryOp(S, OpLoc, ConvertTokenKindToUnaryOpcode(Op), Input);
 }
 
 /// ActOnAddrLabel - Parse the GNU address of label extension: "&&foo".
 ExprResult Sema::ActOnAddrLabel(SourceLocation OpLoc, SourceLocation LabLoc,
                                 LabelDecl *TheDecl) {
   TheDecl->markUsed(Context);
   // Create the AST node.  The address of a label always has type 'void*'.
   return new (Context) AddrLabelExpr(OpLoc, LabLoc, TheDecl,
                                      Context.getPointerType(Context.VoidTy));
 }
 
 /// Given the last statement in a statement-expression, check whether
 /// the result is a producing expression (like a call to an
 /// ns_returns_retained function) and, if so, rebuild it to hoist the
 /// release out of the full-expression.  Otherwise, return null.
 /// Cannot fail.
 static Expr *maybeRebuildARCConsumingStmt(Stmt *Statement) {
   // Should always be wrapped with one of these.
   ExprWithCleanups *cleanups = dyn_cast<ExprWithCleanups>(Statement);
   if (!cleanups) return nullptr;
 
   ImplicitCastExpr *cast = dyn_cast<ImplicitCastExpr>(cleanups->getSubExpr());
   if (!cast || cast->getCastKind() != CK_ARCConsumeObject)
     return nullptr;
 
   // Splice out the cast.  This shouldn't modify any interesting
   // features of the statement.
   Expr *producer = cast->getSubExpr();
   assert(producer->getType() == cast->getType());
   assert(producer->getValueKind() == cast->getValueKind());
   cleanups->setSubExpr(producer);
   return cleanups;
 }
 
 void Sema::ActOnStartStmtExpr() {
   PushExpressionEvaluationContext(ExprEvalContexts.back().Context);
 }
 
 void Sema::ActOnStmtExprError() {
   // Note that function is also called by TreeTransform when leaving a
   // StmtExpr scope without rebuilding anything.
 
   DiscardCleanupsInEvaluationContext();
   PopExpressionEvaluationContext();
 }
 
 ExprResult
 Sema::ActOnStmtExpr(SourceLocation LPLoc, Stmt *SubStmt,
                     SourceLocation RPLoc) { // "({..})"
   assert(SubStmt && isa<CompoundStmt>(SubStmt) && "Invalid action invocation!");
   CompoundStmt *Compound = cast<CompoundStmt>(SubStmt);
 
   if (hasAnyUnrecoverableErrorsInThisFunction())
     DiscardCleanupsInEvaluationContext();
   assert(!Cleanup.exprNeedsCleanups() &&
          "cleanups within StmtExpr not correctly bound!");
   PopExpressionEvaluationContext();
 
   // FIXME: there are a variety of strange constraints to enforce here, for
   // example, it is not possible to goto into a stmt expression apparently.
   // More semantic analysis is needed.
 
   // If there are sub-stmts in the compound stmt, take the type of the last one
   // as the type of the stmtexpr.
   QualType Ty = Context.VoidTy;
   bool StmtExprMayBindToTemp = false;
   if (!Compound->body_empty()) {
     Stmt *LastStmt = Compound->body_back();
     LabelStmt *LastLabelStmt = nullptr;
     // If LastStmt is a label, skip down through into the body.
     while (LabelStmt *Label = dyn_cast<LabelStmt>(LastStmt)) {
       LastLabelStmt = Label;
       LastStmt = Label->getSubStmt();
     }
 
     if (Expr *LastE = dyn_cast<Expr>(LastStmt)) {
       // Do function/array conversion on the last expression, but not
       // lvalue-to-rvalue.  However, initialize an unqualified type.
       ExprResult LastExpr = DefaultFunctionArrayConversion(LastE);
       if (LastExpr.isInvalid())
         return ExprError();
       Ty = LastExpr.get()->getType().getUnqualifiedType();
 
       if (!Ty->isDependentType() && !LastExpr.get()->isTypeDependent()) {
         // In ARC, if the final expression ends in a consume, splice
         // the consume out and bind it later.  In the alternate case
         // (when dealing with a retainable type), the result
         // initialization will create a produce.  In both cases the
         // result will be +1, and we'll need to balance that out with
         // a bind.
         if (Expr *rebuiltLastStmt
               = maybeRebuildARCConsumingStmt(LastExpr.get())) {
           LastExpr = rebuiltLastStmt;
         } else {
           LastExpr = PerformCopyInitialization(
                             InitializedEntity::InitializeResult(LPLoc, 
                                                                 Ty,
                                                                 false),
                                                    SourceLocation(),
                                                LastExpr);
         }
 
         if (LastExpr.isInvalid())
           return ExprError();
         if (LastExpr.get() != nullptr) {
           if (!LastLabelStmt)
             Compound->setLastStmt(LastExpr.get());
           else
             LastLabelStmt->setSubStmt(LastExpr.get());
           StmtExprMayBindToTemp = true;
         }
       }
     }
   }
 
   // FIXME: Check that expression type is complete/non-abstract; statement
   // expressions are not lvalues.
   Expr *ResStmtExpr = new (Context) StmtExpr(Compound, Ty, LPLoc, RPLoc);
   if (StmtExprMayBindToTemp)
     return MaybeBindToTemporary(ResStmtExpr);
   return ResStmtExpr;
 }
 
 ExprResult Sema::BuildBuiltinOffsetOf(SourceLocation BuiltinLoc,
                                       TypeSourceInfo *TInfo,
                                       ArrayRef<OffsetOfComponent> Components,
                                       SourceLocation RParenLoc) {
   QualType ArgTy = TInfo->getType();
   bool Dependent = ArgTy->isDependentType();
   SourceRange TypeRange = TInfo->getTypeLoc().getLocalSourceRange();
   
   // We must have at least one component that refers to the type, and the first
   // one is known to be a field designator.  Verify that the ArgTy represents
   // a struct/union/class.
   if (!Dependent && !ArgTy->isRecordType())
     return ExprError(Diag(BuiltinLoc, diag::err_offsetof_record_type) 
                        << ArgTy << TypeRange);
   
   // Type must be complete per C99 7.17p3 because a declaring a variable
   // with an incomplete type would be ill-formed.
   if (!Dependent 
       && RequireCompleteType(BuiltinLoc, ArgTy,
                              diag::err_offsetof_incomplete_type, TypeRange))
     return ExprError();
 
   bool DidWarnAboutNonPOD = false;
   QualType CurrentType = ArgTy;
   SmallVector<OffsetOfNode, 4> Comps;
   SmallVector<Expr*, 4> Exprs;
   for (const OffsetOfComponent &OC : Components) {
     if (OC.isBrackets) {
       // Offset of an array sub-field.  TODO: Should we allow vector elements?
       if (!CurrentType->isDependentType()) {
         const ArrayType *AT = Context.getAsArrayType(CurrentType);
         if(!AT)
           return ExprError(Diag(OC.LocEnd, diag::err_offsetof_array_type)
                            << CurrentType);
         CurrentType = AT->getElementType();
       } else
         CurrentType = Context.DependentTy;
       
       ExprResult IdxRval = DefaultLvalueConversion(static_cast<Expr*>(OC.U.E));
       if (IdxRval.isInvalid())
         return ExprError();
       Expr *Idx = IdxRval.get();
 
       // The expression must be an integral expression.
       // FIXME: An integral constant expression?
       if (!Idx->isTypeDependent() && !Idx->isValueDependent() &&
           !Idx->getType()->isIntegerType())
         return ExprError(Diag(Idx->getLocStart(),
                               diag::err_typecheck_subscript_not_integer)
                          << Idx->getSourceRange());
 
       // Record this array index.
       Comps.push_back(OffsetOfNode(OC.LocStart, Exprs.size(), OC.LocEnd));
       Exprs.push_back(Idx);
       continue;
     }
     
     // Offset of a field.
     if (CurrentType->isDependentType()) {
       // We have the offset of a field, but we can't look into the dependent
       // type. Just record the identifier of the field.
       Comps.push_back(OffsetOfNode(OC.LocStart, OC.U.IdentInfo, OC.LocEnd));
       CurrentType = Context.DependentTy;
       continue;
     }
     
     // We need to have a complete type to look into.
     if (RequireCompleteType(OC.LocStart, CurrentType,
                             diag::err_offsetof_incomplete_type))
       return ExprError();
     
     // Look for the designated field.
     const RecordType *RC = CurrentType->getAs<RecordType>();
     if (!RC) 
       return ExprError(Diag(OC.LocEnd, diag::err_offsetof_record_type)
                        << CurrentType);
     RecordDecl *RD = RC->getDecl();
     
     // C++ [lib.support.types]p5:
     //   The macro offsetof accepts a restricted set of type arguments in this
     //   International Standard. type shall be a POD structure or a POD union
     //   (clause 9).
     // C++11 [support.types]p4:
     //   If type is not a standard-layout class (Clause 9), the results are
     //   undefined.
     if (CXXRecordDecl *CRD = dyn_cast<CXXRecordDecl>(RD)) {
       bool IsSafe = LangOpts.CPlusPlus11? CRD->isStandardLayout() : CRD->isPOD();
       unsigned DiagID =
         LangOpts.CPlusPlus11? diag::ext_offsetof_non_standardlayout_type
                             : diag::ext_offsetof_non_pod_type;
 
       if (!IsSafe && !DidWarnAboutNonPOD &&
           DiagRuntimeBehavior(BuiltinLoc, nullptr,
                               PDiag(DiagID)
                               << SourceRange(Components[0].LocStart, OC.LocEnd)
                               << CurrentType))
         DidWarnAboutNonPOD = true;
     }
     
     // Look for the field.
     LookupResult R(*this, OC.U.IdentInfo, OC.LocStart, LookupMemberName);
     LookupQualifiedName(R, RD);
     FieldDecl *MemberDecl = R.getAsSingle<FieldDecl>();
     IndirectFieldDecl *IndirectMemberDecl = nullptr;
     if (!MemberDecl) {
       if ((IndirectMemberDecl = R.getAsSingle<IndirectFieldDecl>()))
         MemberDecl = IndirectMemberDecl->getAnonField();
     }
 
     if (!MemberDecl)
       return ExprError(Diag(BuiltinLoc, diag::err_no_member)
                        << OC.U.IdentInfo << RD << SourceRange(OC.LocStart, 
                                                               OC.LocEnd));
     
     // C99 7.17p3:
     //   (If the specified member is a bit-field, the behavior is undefined.)
     //
     // We diagnose this as an error.
     if (MemberDecl->isBitField()) {
       Diag(OC.LocEnd, diag::err_offsetof_bitfield)
         << MemberDecl->getDeclName()
         << SourceRange(BuiltinLoc, RParenLoc);
       Diag(MemberDecl->getLocation(), diag::note_bitfield_decl);
       return ExprError();
     }
 
     RecordDecl *Parent = MemberDecl->getParent();
     if (IndirectMemberDecl)
       Parent = cast<RecordDecl>(IndirectMemberDecl->getDeclContext());
 
     // If the member was found in a base class, introduce OffsetOfNodes for
     // the base class indirections.
     CXXBasePaths Paths;
     if (IsDerivedFrom(OC.LocStart, CurrentType, Context.getTypeDeclType(Parent),
                       Paths)) {
       if (Paths.getDetectedVirtual()) {
         Diag(OC.LocEnd, diag::err_offsetof_field_of_virtual_base)
           << MemberDecl->getDeclName()
           << SourceRange(BuiltinLoc, RParenLoc);
         return ExprError();
       }
 
       CXXBasePath &Path = Paths.front();
       for (const CXXBasePathElement &B : Path)
         Comps.push_back(OffsetOfNode(B.Base));
     }
 
     if (IndirectMemberDecl) {
       for (auto *FI : IndirectMemberDecl->chain()) {
         assert(isa<FieldDecl>(FI));
         Comps.push_back(OffsetOfNode(OC.LocStart,
                                      cast<FieldDecl>(FI), OC.LocEnd));
       }
     } else
       Comps.push_back(OffsetOfNode(OC.LocStart, MemberDecl, OC.LocEnd));
 
     CurrentType = MemberDecl->getType().getNonReferenceType(); 
   }
   
   return OffsetOfExpr::Create(Context, Context.getSizeType(), BuiltinLoc, TInfo,
                               Comps, Exprs, RParenLoc);
 }
 
 ExprResult Sema::ActOnBuiltinOffsetOf(Scope *S,
                                       SourceLocation BuiltinLoc,
                                       SourceLocation TypeLoc,
                                       ParsedType ParsedArgTy,
                                       ArrayRef<OffsetOfComponent> Components,
                                       SourceLocation RParenLoc) {
   
   TypeSourceInfo *ArgTInfo;
   QualType ArgTy = GetTypeFromParser(ParsedArgTy, &ArgTInfo);
   if (ArgTy.isNull())
     return ExprError();
 
   if (!ArgTInfo)
     ArgTInfo = Context.getTrivialTypeSourceInfo(ArgTy, TypeLoc);
 
   return BuildBuiltinOffsetOf(BuiltinLoc, ArgTInfo, Components, RParenLoc);
 }
 
 
 ExprResult Sema::ActOnChooseExpr(SourceLocation BuiltinLoc,
                                  Expr *CondExpr,
                                  Expr *LHSExpr, Expr *RHSExpr,
                                  SourceLocation RPLoc) {
   assert((CondExpr && LHSExpr && RHSExpr) && "Missing type argument(s)");
 
   ExprValueKind VK = VK_RValue;
   ExprObjectKind OK = OK_Ordinary;
   QualType resType;
   bool ValueDependent = false;
   bool CondIsTrue = false;
   if (CondExpr->isTypeDependent() || CondExpr->isValueDependent()) {
     resType = Context.DependentTy;
     ValueDependent = true;
   } else {
     // The conditional expression is required to be a constant expression.
     llvm::APSInt condEval(32);
     ExprResult CondICE
       = VerifyIntegerConstantExpression(CondExpr, &condEval,
           diag::err_typecheck_choose_expr_requires_constant, false);
     if (CondICE.isInvalid())
       return ExprError();
     CondExpr = CondICE.get();
     CondIsTrue = condEval.getZExtValue();
 
     // If the condition is > zero, then the AST type is the same as the LSHExpr.
     Expr *ActiveExpr = CondIsTrue ? LHSExpr : RHSExpr;
 
     resType = ActiveExpr->getType();
     ValueDependent = ActiveExpr->isValueDependent();
     VK = ActiveExpr->getValueKind();
     OK = ActiveExpr->getObjectKind();
   }
 
   return new (Context)
       ChooseExpr(BuiltinLoc, CondExpr, LHSExpr, RHSExpr, resType, VK, OK, RPLoc,
                  CondIsTrue, resType->isDependentType(), ValueDependent);
 }
 
 //===----------------------------------------------------------------------===//
 // Clang Extensions.
 //===----------------------------------------------------------------------===//
 
 /// ActOnBlockStart - This callback is invoked when a block literal is started.
 void Sema::ActOnBlockStart(SourceLocation CaretLoc, Scope *CurScope) {
   BlockDecl *Block = BlockDecl::Create(Context, CurContext, CaretLoc);
 
   if (LangOpts.CPlusPlus) {
     Decl *ManglingContextDecl;
     if (MangleNumberingContext *MCtx =
             getCurrentMangleNumberContext(Block->getDeclContext(),
                                           ManglingContextDecl)) {
       unsigned ManglingNumber = MCtx->getManglingNumber(Block);
       Block->setBlockMangling(ManglingNumber, ManglingContextDecl);
     }
   }
 
   PushBlockScope(CurScope, Block);
   CurContext->addDecl(Block);
   if (CurScope)
     PushDeclContext(CurScope, Block);
   else
     CurContext = Block;
 
   getCurBlock()->HasImplicitReturnType = true;
 
   // Enter a new evaluation context to insulate the block from any
   // cleanups from the enclosing full-expression.
   PushExpressionEvaluationContext(
       ExpressionEvaluationContext::PotentiallyEvaluated);
 }
 
 void Sema::ActOnBlockArguments(SourceLocation CaretLoc, Declarator &ParamInfo,
                                Scope *CurScope) {
   assert(ParamInfo.getIdentifier() == nullptr &&
          "block-id should have no identifier!");
   assert(ParamInfo.getContext() == DeclaratorContext::BlockLiteralContext);
   BlockScopeInfo *CurBlock = getCurBlock();
 
   TypeSourceInfo *Sig = GetTypeForDeclarator(ParamInfo, CurScope);
   QualType T = Sig->getType();
 
   // FIXME: We should allow unexpanded parameter packs here, but that would,
   // in turn, make the block expression contain unexpanded parameter packs.
   if (DiagnoseUnexpandedParameterPack(CaretLoc, Sig, UPPC_Block)) {
     // Drop the parameters.
     FunctionProtoType::ExtProtoInfo EPI;
     EPI.HasTrailingReturn = false;
     EPI.TypeQuals |= DeclSpec::TQ_const;
     T = Context.getFunctionType(Context.DependentTy, None, EPI);
     Sig = Context.getTrivialTypeSourceInfo(T);
   }
   
   // GetTypeForDeclarator always produces a function type for a block
   // literal signature.  Furthermore, it is always a FunctionProtoType
   // unless the function was written with a typedef.
   assert(T->isFunctionType() &&
          "GetTypeForDeclarator made a non-function block signature");
 
   // Look for an explicit signature in that function type.
   FunctionProtoTypeLoc ExplicitSignature;
 
   if ((ExplicitSignature =
            Sig->getTypeLoc().getAsAdjusted<FunctionProtoTypeLoc>())) {
 
     // Check whether that explicit signature was synthesized by
     // GetTypeForDeclarator.  If so, don't save that as part of the
     // written signature.
     if (ExplicitSignature.getLocalRangeBegin() ==
         ExplicitSignature.getLocalRangeEnd()) {
       // This would be much cheaper if we stored TypeLocs instead of
       // TypeSourceInfos.
       TypeLoc Result = ExplicitSignature.getReturnLoc();
       unsigned Size = Result.getFullDataSize();
       Sig = Context.CreateTypeSourceInfo(Result.getType(), Size);
       Sig->getTypeLoc().initializeFullCopy(Result, Size);
 
       ExplicitSignature = FunctionProtoTypeLoc();
     }
   }
 
   CurBlock->TheDecl->setSignatureAsWritten(Sig);
   CurBlock->FunctionType = T;
 
   const FunctionType *Fn = T->getAs<FunctionType>();
   QualType RetTy = Fn->getReturnType();
   bool isVariadic =
     (isa<FunctionProtoType>(Fn) && cast<FunctionProtoType>(Fn)->isVariadic());
 
   CurBlock->TheDecl->setIsVariadic(isVariadic);
 
   // Context.DependentTy is used as a placeholder for a missing block
   // return type.  TODO:  what should we do with declarators like:
   //   ^ * { ... }
   // If the answer is "apply template argument deduction"....
   if (RetTy != Context.DependentTy) {
     CurBlock->ReturnType = RetTy;
     CurBlock->TheDecl->setBlockMissingReturnType(false);
     CurBlock->HasImplicitReturnType = false;
   }
 
   // Push block parameters from the declarator if we had them.
   SmallVector<ParmVarDecl*, 8> Params;
   if (ExplicitSignature) {
     for (unsigned I = 0, E = ExplicitSignature.getNumParams(); I != E; ++I) {
       ParmVarDecl *Param = ExplicitSignature.getParam(I);
       if (Param->getIdentifier() == nullptr &&
           !Param->isImplicit() &&
           !Param->isInvalidDecl() &&
           !getLangOpts().CPlusPlus)
         Diag(Param->getLocation(), diag::err_parameter_name_omitted);
       Params.push_back(Param);
     }
 
   // Fake up parameter variables if we have a typedef, like
   //   ^ fntype { ... }
   } else if (const FunctionProtoType *Fn = T->getAs<FunctionProtoType>()) {
     for (const auto &I : Fn->param_types()) {
       ParmVarDecl *Param = BuildParmVarDeclForTypedef(
           CurBlock->TheDecl, ParamInfo.getLocStart(), I);
       Params.push_back(Param);
     }
   }
 
   // Set the parameters on the block decl.
   if (!Params.empty()) {
     CurBlock->TheDecl->setParams(Params);
     CheckParmsForFunctionDef(CurBlock->TheDecl->parameters(),
                              /*CheckParameterNames=*/false);
   }
   
   // Finally we can process decl attributes.
   ProcessDeclAttributes(CurScope, CurBlock->TheDecl, ParamInfo);
 
   // Put the parameter variables in scope.
   for (auto AI : CurBlock->TheDecl->parameters()) {
     AI->setOwningFunction(CurBlock->TheDecl);
 
     // If this has an identifier, add it to the scope stack.
     if (AI->getIdentifier()) {
       CheckShadow(CurBlock->TheScope, AI);
 
       PushOnScopeChains(AI, CurBlock->TheScope);
     }
   }
 }
 
 /// ActOnBlockError - If there is an error parsing a block, this callback
 /// is invoked to pop the information about the block from the action impl.
 void Sema::ActOnBlockError(SourceLocation CaretLoc, Scope *CurScope) {
   // Leave the expression-evaluation context.
   DiscardCleanupsInEvaluationContext();
   PopExpressionEvaluationContext();
 
   // Pop off CurBlock, handle nested blocks.
   PopDeclContext();
   PopFunctionScopeInfo();
 }
 
 /// ActOnBlockStmtExpr - This is called when the body of a block statement
 /// literal was successfully completed.  ^(int x){...}
 ExprResult Sema::ActOnBlockStmtExpr(SourceLocation CaretLoc,
                                     Stmt *Body, Scope *CurScope) {
   // If blocks are disabled, emit an error.
   if (!LangOpts.Blocks)
     Diag(CaretLoc, diag::err_blocks_disable) << LangOpts.OpenCL;
 
   // Leave the expression-evaluation context.
   if (hasAnyUnrecoverableErrorsInThisFunction())
     DiscardCleanupsInEvaluationContext();
   assert(!Cleanup.exprNeedsCleanups() &&
          "cleanups within block not correctly bound!");
   PopExpressionEvaluationContext();
 
   BlockScopeInfo *BSI = cast<BlockScopeInfo>(FunctionScopes.back());
 
   if (BSI->HasImplicitReturnType)
     deduceClosureReturnType(*BSI);
 
   PopDeclContext();
 
   QualType RetTy = Context.VoidTy;
   if (!BSI->ReturnType.isNull())
     RetTy = BSI->ReturnType;
 
   bool NoReturn = BSI->TheDecl->hasAttr<NoReturnAttr>();
   QualType BlockTy;
 
   // Set the captured variables on the block.
   // FIXME: Share capture structure between BlockDecl and CapturingScopeInfo!
   SmallVector<BlockDecl::Capture, 4> Captures;
   for (CapturingScopeInfo::Capture &Cap : BSI->Captures) {
     if (Cap.isThisCapture())
       continue;
     BlockDecl::Capture NewCap(Cap.getVariable(), Cap.isBlockCapture(),
                               Cap.isNested(), Cap.getInitExpr());
     Captures.push_back(NewCap);
   }
   BSI->TheDecl->setCaptures(Context, Captures, BSI->CXXThisCaptureIndex != 0);
 
   // If the user wrote a function type in some form, try to use that.
   if (!BSI->FunctionType.isNull()) {
     const FunctionType *FTy = BSI->FunctionType->getAs<FunctionType>();
 
     FunctionType::ExtInfo Ext = FTy->getExtInfo();
     if (NoReturn && !Ext.getNoReturn()) Ext = Ext.withNoReturn(true);
     
     // Turn protoless block types into nullary block types.
     if (isa<FunctionNoProtoType>(FTy)) {
       FunctionProtoType::ExtProtoInfo EPI;
       EPI.ExtInfo = Ext;
       BlockTy = Context.getFunctionType(RetTy, None, EPI);
 
     // Otherwise, if we don't need to change anything about the function type,
     // preserve its sugar structure.
     } else if (FTy->getReturnType() == RetTy &&
                (!NoReturn || FTy->getNoReturnAttr())) {
       BlockTy = BSI->FunctionType;
 
     // Otherwise, make the minimal modifications to the function type.
     } else {
       const FunctionProtoType *FPT = cast<FunctionProtoType>(FTy);
       FunctionProtoType::ExtProtoInfo EPI = FPT->getExtProtoInfo();
       EPI.TypeQuals = 0; // FIXME: silently?
       EPI.ExtInfo = Ext;
       BlockTy = Context.getFunctionType(RetTy, FPT->getParamTypes(), EPI);
     }
 
   // If we don't have a function type, just build one from nothing.
   } else {
     FunctionProtoType::ExtProtoInfo EPI;
     EPI.ExtInfo = FunctionType::ExtInfo().withNoReturn(NoReturn);
     BlockTy = Context.getFunctionType(RetTy, None, EPI);
   }
 
   DiagnoseUnusedParameters(BSI->TheDecl->parameters());
   BlockTy = Context.getBlockPointerType(BlockTy);
 
   // If needed, diagnose invalid gotos and switches in the block.
   if (getCurFunction()->NeedsScopeChecking() &&
       !PP.isCodeCompletionEnabled())
     DiagnoseInvalidJumps(cast<CompoundStmt>(Body));
 
   BSI->TheDecl->setBody(cast<CompoundStmt>(Body));
 
   if (Body && getCurFunction()->HasPotentialAvailabilityViolations)
     DiagnoseUnguardedAvailabilityViolations(BSI->TheDecl);
 
   // Try to apply the named return value optimization. We have to check again
   // if we can do this, though, because blocks keep return statements around
   // to deduce an implicit return type.
   if (getLangOpts().CPlusPlus && RetTy->isRecordType() &&
       !BSI->TheDecl->isDependentContext())
     computeNRVO(Body, BSI);
   
   BlockExpr *Result = new (Context) BlockExpr(BSI->TheDecl, BlockTy);
   AnalysisBasedWarnings::Policy WP = AnalysisWarnings.getDefaultPolicy();
   PopFunctionScopeInfo(&WP, Result->getBlockDecl(), Result);
 
   // If the block isn't obviously global, i.e. it captures anything at
   // all, then we need to do a few things in the surrounding context:
   if (Result->getBlockDecl()->hasCaptures()) {
     // First, this expression has a new cleanup object.
     ExprCleanupObjects.push_back(Result->getBlockDecl());
     Cleanup.setExprNeedsCleanups(true);
 
     // It also gets a branch-protected scope if any of the captured
     // variables needs destruction.
     for (const auto &CI : Result->getBlockDecl()->captures()) {
       const VarDecl *var = CI.getVariable();
       if (var->getType().isDestructedType() != QualType::DK_none) {
         getCurFunction()->setHasBranchProtectedScope();
         break;
       }
     }
   }
 
   return Result;
 }
 
 ExprResult Sema::ActOnVAArg(SourceLocation BuiltinLoc, Expr *E, ParsedType Ty,
                             SourceLocation RPLoc) {
   TypeSourceInfo *TInfo;
   GetTypeFromParser(Ty, &TInfo);
   return BuildVAArgExpr(BuiltinLoc, E, TInfo, RPLoc);
 }
 
 ExprResult Sema::BuildVAArgExpr(SourceLocation BuiltinLoc,
                                 Expr *E, TypeSourceInfo *TInfo,
                                 SourceLocation RPLoc) {
   Expr *OrigExpr = E;
   bool IsMS = false;
 
   // CUDA device code does not support varargs.
   if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
     if (const FunctionDecl *F = dyn_cast<FunctionDecl>(CurContext)) {
       CUDAFunctionTarget T = IdentifyCUDATarget(F);
       if (T == CFT_Global || T == CFT_Device || T == CFT_HostDevice)
         return ExprError(Diag(E->getLocStart(), diag::err_va_arg_in_device));
     }
   }
 
   // It might be a __builtin_ms_va_list. (But don't ever mark a va_arg()
   // as Microsoft ABI on an actual Microsoft platform, where
   // __builtin_ms_va_list and __builtin_va_list are the same.)
   if (!E->isTypeDependent() && Context.getTargetInfo().hasBuiltinMSVaList() &&
       Context.getTargetInfo().getBuiltinVaListKind() != TargetInfo::CharPtrBuiltinVaList) {
     QualType MSVaListType = Context.getBuiltinMSVaListType();
     if (Context.hasSameType(MSVaListType, E->getType())) {
       if (CheckForModifiableLvalue(E, BuiltinLoc, *this))
         return ExprError();
       IsMS = true;
     }
   }
 
   // Get the va_list type
   QualType VaListType = Context.getBuiltinVaListType();
   if (!IsMS) {
     if (VaListType->isArrayType()) {
       // Deal with implicit array decay; for example, on x86-64,
       // va_list is an array, but it's supposed to decay to
       // a pointer for va_arg.
       VaListType = Context.getArrayDecayedType(VaListType);
       // Make sure the input expression also decays appropriately.
       ExprResult Result = UsualUnaryConversions(E);
       if (Result.isInvalid())
         return ExprError();
       E = Result.get();
     } else if (VaListType->isRecordType() && getLangOpts().CPlusPlus) {
       // If va_list is a record type and we are compiling in C++ mode,
       // check the argument using reference binding.
       InitializedEntity Entity = InitializedEntity::InitializeParameter(
           Context, Context.getLValueReferenceType(VaListType), false);
       ExprResult Init = PerformCopyInitialization(Entity, SourceLocation(), E);
       if (Init.isInvalid())
         return ExprError();
       E = Init.getAs<Expr>();
     } else {
       // Otherwise, the va_list argument must be an l-value because
       // it is modified by va_arg.
       if (!E->isTypeDependent() &&
           CheckForModifiableLvalue(E, BuiltinLoc, *this))
         return ExprError();
     }
   }
 
   if (!IsMS && !E->isTypeDependent() &&
       !Context.hasSameType(VaListType, E->getType()))
     return ExprError(Diag(E->getLocStart(),
                          diag::err_first_argument_to_va_arg_not_of_type_va_list)
       << OrigExpr->getType() << E->getSourceRange());
 
   if (!TInfo->getType()->isDependentType()) {
     if (RequireCompleteType(TInfo->getTypeLoc().getBeginLoc(), TInfo->getType(),
                             diag::err_second_parameter_to_va_arg_incomplete,
                             TInfo->getTypeLoc()))
       return ExprError();
 
     if (RequireNonAbstractType(TInfo->getTypeLoc().getBeginLoc(),
                                TInfo->getType(),
                                diag::err_second_parameter_to_va_arg_abstract,
                                TInfo->getTypeLoc()))
       return ExprError();
 
     if (!TInfo->getType().isPODType(Context)) {
       Diag(TInfo->getTypeLoc().getBeginLoc(),
            TInfo->getType()->isObjCLifetimeType()
              ? diag::warn_second_parameter_to_va_arg_ownership_qualified
              : diag::warn_second_parameter_to_va_arg_not_pod)
         << TInfo->getType()
         << TInfo->getTypeLoc().getSourceRange();
     }
 
     // Check for va_arg where arguments of the given type will be promoted
     // (i.e. this va_arg is guaranteed to have undefined behavior).
     QualType PromoteType;
     if (TInfo->getType()->isPromotableIntegerType()) {
       PromoteType = Context.getPromotedIntegerType(TInfo->getType());
       if (Context.typesAreCompatible(PromoteType, TInfo->getType()))
         PromoteType = QualType();
     }
     if (TInfo->getType()->isSpecificBuiltinType(BuiltinType::Float))
       PromoteType = Context.DoubleTy;
     if (!PromoteType.isNull())
       DiagRuntimeBehavior(TInfo->getTypeLoc().getBeginLoc(), E,
                   PDiag(diag::warn_second_parameter_to_va_arg_never_compatible)
                           << TInfo->getType()
                           << PromoteType
                           << TInfo->getTypeLoc().getSourceRange());
   }
 
   QualType T = TInfo->getType().getNonLValueExprType(Context);
   return new (Context) VAArgExpr(BuiltinLoc, E, TInfo, RPLoc, T, IsMS);
 }
 
 ExprResult Sema::ActOnGNUNullExpr(SourceLocation TokenLoc) {
   // The type of __null will be int or long, depending on the size of
   // pointers on the target.
   QualType Ty;
   unsigned pw = Context.getTargetInfo().getPointerWidth(0);
   if (pw == Context.getTargetInfo().getIntWidth())
     Ty = Context.IntTy;
   else if (pw == Context.getTargetInfo().getLongWidth())
     Ty = Context.LongTy;
   else if (pw == Context.getTargetInfo().getLongLongWidth())
     Ty = Context.LongLongTy;
   else {
     llvm_unreachable("I don't know size of pointer!");
   }
 
   return new (Context) GNUNullExpr(Ty, TokenLoc);
 }
 
 bool Sema::ConversionToObjCStringLiteralCheck(QualType DstType, Expr *&Exp,
                                               bool Diagnose) {
   if (!getLangOpts().ObjC1)
     return false;
 
   const ObjCObjectPointerType *PT = DstType->getAs<ObjCObjectPointerType>();
   if (!PT)
     return false;
 
   if (!PT->isObjCIdType()) {
     // Check if the destination is the 'NSString' interface.
     const ObjCInterfaceDecl *ID = PT->getInterfaceDecl();
     if (!ID || !ID->getIdentifier()->isStr("NSString"))
       return false;
   }
   
   // Ignore any parens, implicit casts (should only be
   // array-to-pointer decays), and not-so-opaque values.  The last is
   // important for making this trigger for property assignments.
   Expr *SrcExpr = Exp->IgnoreParenImpCasts();
   if (OpaqueValueExpr *OV = dyn_cast<OpaqueValueExpr>(SrcExpr))
     if (OV->getSourceExpr())
       SrcExpr = OV->getSourceExpr()->IgnoreParenImpCasts();
 
   StringLiteral *SL = dyn_cast<StringLiteral>(SrcExpr);
   if (!SL || !SL->isAscii())
     return false;
   if (Diagnose) {
     Diag(SL->getLocStart(), diag::err_missing_atsign_prefix)
       << FixItHint::CreateInsertion(SL->getLocStart(), "@");
     Exp = BuildObjCStringLiteral(SL->getLocStart(), SL).get();
   }
   return true;
 }
 
 static bool maybeDiagnoseAssignmentToFunction(Sema &S, QualType DstType,
                                               const Expr *SrcExpr) {
   if (!DstType->isFunctionPointerType() ||
       !SrcExpr->getType()->isFunctionType())
     return false;
 
   auto *DRE = dyn_cast<DeclRefExpr>(SrcExpr->IgnoreParenImpCasts());
   if (!DRE)
     return false;
 
   auto *FD = dyn_cast<FunctionDecl>(DRE->getDecl());
   if (!FD)
     return false;
 
   return !S.checkAddressOfFunctionIsAvailable(FD,
                                               /*Complain=*/true,
                                               SrcExpr->getLocStart());
 }
 
 bool Sema::DiagnoseAssignmentResult(AssignConvertType ConvTy,
                                     SourceLocation Loc,
                                     QualType DstType, QualType SrcType,
                                     Expr *SrcExpr, AssignmentAction Action,
                                     bool *Complained) {
   if (Complained)
     *Complained = false;
 
   // Decode the result (notice that AST's are still created for extensions).
   bool CheckInferredResultType = false;
   bool isInvalid = false;
   unsigned DiagKind = 0;
   FixItHint Hint;
   ConversionFixItGenerator ConvHints;
   bool MayHaveConvFixit = false;
   bool MayHaveFunctionDiff = false;
   const ObjCInterfaceDecl *IFace = nullptr;
   const ObjCProtocolDecl *PDecl = nullptr;
 
   switch (ConvTy) {
   case Compatible:
       DiagnoseAssignmentEnum(DstType, SrcType, SrcExpr);
       return false;
 
   case PointerToInt:
     DiagKind = diag::ext_typecheck_convert_pointer_int;
     ConvHints.tryToFixConversion(SrcExpr, SrcType, DstType, *this);
     MayHaveConvFixit = true;
     break;
   case IntToPointer:
     DiagKind = diag::ext_typecheck_convert_int_pointer;
     ConvHints.tryToFixConversion(SrcExpr, SrcType, DstType, *this);
     MayHaveConvFixit = true;
     break;
   case IncompatiblePointer:
     if (Action == AA_Passing_CFAudited)
       DiagKind = diag::err_arc_typecheck_convert_incompatible_pointer;
     else if (SrcType->isFunctionPointerType() &&
              DstType->isFunctionPointerType())
       DiagKind = diag::ext_typecheck_convert_incompatible_function_pointer;
     else
       DiagKind = diag::ext_typecheck_convert_incompatible_pointer;
 
     CheckInferredResultType = DstType->isObjCObjectPointerType() &&
       SrcType->isObjCObjectPointerType();
     if (Hint.isNull() && !CheckInferredResultType) {
       ConvHints.tryToFixConversion(SrcExpr, SrcType, DstType, *this);
     }
     else if (CheckInferredResultType) {
       SrcType = SrcType.getUnqualifiedType();
       DstType = DstType.getUnqualifiedType();
     }
     MayHaveConvFixit = true;
     break;
   case IncompatiblePointerSign:
     DiagKind = diag::ext_typecheck_convert_incompatible_pointer_sign;
     break;
   case FunctionVoidPointer:
     DiagKind = diag::ext_typecheck_convert_pointer_void_func;
     break;
   case IncompatiblePointerDiscardsQualifiers: {
     // Perform array-to-pointer decay if necessary.
     if (SrcType->isArrayType()) SrcType = Context.getArrayDecayedType(SrcType);
 
     Qualifiers lhq = SrcType->getPointeeType().getQualifiers();
     Qualifiers rhq = DstType->getPointeeType().getQualifiers();
     if (lhq.getAddressSpace() != rhq.getAddressSpace()) {
       DiagKind = diag::err_typecheck_incompatible_address_space;
       break;
 
 
     } else if (lhq.getObjCLifetime() != rhq.getObjCLifetime()) {
       DiagKind = diag::err_typecheck_incompatible_ownership;
       break;
     }
 
     llvm_unreachable("unknown error case for discarding qualifiers!");
     // fallthrough
   }
   case CompatiblePointerDiscardsQualifiers:
     // If the qualifiers lost were because we were applying the
     // (deprecated) C++ conversion from a string literal to a char*
     // (or wchar_t*), then there was no error (C++ 4.2p2).  FIXME:
     // Ideally, this check would be performed in
     // checkPointerTypesForAssignment. However, that would require a
     // bit of refactoring (so that the second argument is an
     // expression, rather than a type), which should be done as part
     // of a larger effort to fix checkPointerTypesForAssignment for
     // C++ semantics.
     if (getLangOpts().CPlusPlus &&
         IsStringLiteralToNonConstPointerConversion(SrcExpr, DstType))
       return false;
     DiagKind = diag::ext_typecheck_convert_discards_qualifiers;
     break;
   case IncompatibleNestedPointerQualifiers:
     DiagKind = diag::ext_nested_pointer_qualifier_mismatch;
     break;
   case IntToBlockPointer:
     DiagKind = diag::err_int_to_block_pointer;
     break;
   case IncompatibleBlockPointer:
     DiagKind = diag::err_typecheck_convert_incompatible_block_pointer;
     break;
   case IncompatibleObjCQualifiedId: {
     if (SrcType->isObjCQualifiedIdType()) {
       const ObjCObjectPointerType *srcOPT =
                 SrcType->getAs<ObjCObjectPointerType>();
       for (auto *srcProto : srcOPT->quals()) {
         PDecl = srcProto;
         break;
       }
       if (const ObjCInterfaceType *IFaceT =
             DstType->getAs<ObjCObjectPointerType>()->getInterfaceType())
         IFace = IFaceT->getDecl();
     }
     else if (DstType->isObjCQualifiedIdType()) {
       const ObjCObjectPointerType *dstOPT =
         DstType->getAs<ObjCObjectPointerType>();
       for (auto *dstProto : dstOPT->quals()) {
         PDecl = dstProto;
         break;
       }
       if (const ObjCInterfaceType *IFaceT =
             SrcType->getAs<ObjCObjectPointerType>()->getInterfaceType())
         IFace = IFaceT->getDecl();
     }
     DiagKind = diag::warn_incompatible_qualified_id;
     break;
   }
   case IncompatibleVectors:
     DiagKind = diag::warn_incompatible_vectors;
     break;
   case IncompatibleObjCWeakRef:
     DiagKind = diag::err_arc_weak_unavailable_assign;
     break;
   case Incompatible:
     if (maybeDiagnoseAssignmentToFunction(*this, DstType, SrcExpr)) {
       if (Complained)
         *Complained = true;
       return true;
     }
 
     DiagKind = diag::err_typecheck_convert_incompatible;
     ConvHints.tryToFixConversion(SrcExpr, SrcType, DstType, *this);
     MayHaveConvFixit = true;
     isInvalid = true;
     MayHaveFunctionDiff = true;
     break;
   }
 
   QualType FirstType, SecondType;
   switch (Action) {
   case AA_Assigning:
   case AA_Initializing:
     // The destination type comes first.
     FirstType = DstType;
     SecondType = SrcType;
     break;
 
   case AA_Returning:
   case AA_Passing:
   case AA_Passing_CFAudited:
   case AA_Converting:
   case AA_Sending:
   case AA_Casting:
     // The source type comes first.
     FirstType = SrcType;
     SecondType = DstType;
     break;
   }
 
   PartialDiagnostic FDiag = PDiag(DiagKind);
   if (Action == AA_Passing_CFAudited)
     FDiag << FirstType << SecondType << AA_Passing << SrcExpr->getSourceRange();
   else
     FDiag << FirstType << SecondType << Action << SrcExpr->getSourceRange();
 
   // If we can fix the conversion, suggest the FixIts.
   assert(ConvHints.isNull() || Hint.isNull());
   if (!ConvHints.isNull()) {
     for (FixItHint &H : ConvHints.Hints)
       FDiag << H;
   } else {
     FDiag << Hint;
   }
   if (MayHaveConvFixit) { FDiag << (unsigned) (ConvHints.Kind); }
 
   if (MayHaveFunctionDiff)
     HandleFunctionTypeMismatch(FDiag, SecondType, FirstType);
 
   Diag(Loc, FDiag);
   if (DiagKind == diag::warn_incompatible_qualified_id &&
       PDecl && IFace && !IFace->hasDefinition())
       Diag(IFace->getLocation(), diag::note_incomplete_class_and_qualified_id)
         << IFace->getName() << PDecl->getName();
     
   if (SecondType == Context.OverloadTy)
     NoteAllOverloadCandidates(OverloadExpr::find(SrcExpr).Expression,
                               FirstType, /*TakingAddress=*/true);
 
   if (CheckInferredResultType)
     EmitRelatedResultTypeNote(SrcExpr);
 
   if (Action == AA_Returning && ConvTy == IncompatiblePointer)
     EmitRelatedResultTypeNoteForReturn(DstType);
   
   if (Complained)
     *Complained = true;
   return isInvalid;
 }
 
 ExprResult Sema::VerifyIntegerConstantExpression(Expr *E,
                                                  llvm::APSInt *Result) {
   class SimpleICEDiagnoser : public VerifyICEDiagnoser {
   public:
     void diagnoseNotICE(Sema &S, SourceLocation Loc, SourceRange SR) override {
       S.Diag(Loc, diag::err_expr_not_ice) << S.LangOpts.CPlusPlus << SR;
     }
   } Diagnoser;
   
   return VerifyIntegerConstantExpression(E, Result, Diagnoser);
 }
 
 ExprResult Sema::VerifyIntegerConstantExpression(Expr *E,
                                                  llvm::APSInt *Result,
                                                  unsigned DiagID,
                                                  bool AllowFold) {
   class IDDiagnoser : public VerifyICEDiagnoser {
     unsigned DiagID;
     
   public:
     IDDiagnoser(unsigned DiagID)
       : VerifyICEDiagnoser(DiagID == 0), DiagID(DiagID) { }
     
     void diagnoseNotICE(Sema &S, SourceLocation Loc, SourceRange SR) override {
       S.Diag(Loc, DiagID) << SR;
     }
   } Diagnoser(DiagID);
   
   return VerifyIntegerConstantExpression(E, Result, Diagnoser, AllowFold);
 }
 
 void Sema::VerifyICEDiagnoser::diagnoseFold(Sema &S, SourceLocation Loc,
                                             SourceRange SR) {
   S.Diag(Loc, diag::ext_expr_not_ice) << SR << S.LangOpts.CPlusPlus;
 }
 
 ExprResult
 Sema::VerifyIntegerConstantExpression(Expr *E, llvm::APSInt *Result,
                                       VerifyICEDiagnoser &Diagnoser,
                                       bool AllowFold) {
   SourceLocation DiagLoc = E->getLocStart();
 
   if (getLangOpts().CPlusPlus11) {
     // C++11 [expr.const]p5:
     //   If an expression of literal class type is used in a context where an
     //   integral constant expression is required, then that class type shall
     //   have a single non-explicit conversion function to an integral or
     //   unscoped enumeration type
     ExprResult Converted;
     class CXX11ConvertDiagnoser : public ICEConvertDiagnoser {
     public:
       CXX11ConvertDiagnoser(bool Silent)
           : ICEConvertDiagnoser(/*AllowScopedEnumerations*/false,
                                 Silent, true) {}
 
       SemaDiagnosticBuilder diagnoseNotInt(Sema &S, SourceLocation Loc,
                                            QualType T) override {
         return S.Diag(Loc, diag::err_ice_not_integral) << T;
       }
 
       SemaDiagnosticBuilder diagnoseIncomplete(
           Sema &S, SourceLocation Loc, QualType T) override {
         return S.Diag(Loc, diag::err_ice_incomplete_type) << T;
       }
 
       SemaDiagnosticBuilder diagnoseExplicitConv(
           Sema &S, SourceLocation Loc, QualType T, QualType ConvTy) override {
         return S.Diag(Loc, diag::err_ice_explicit_conversion) << T << ConvTy;
       }
 
       SemaDiagnosticBuilder noteExplicitConv(
           Sema &S, CXXConversionDecl *Conv, QualType ConvTy) override {
         return S.Diag(Conv->getLocation(), diag::note_ice_conversion_here)
                  << ConvTy->isEnumeralType() << ConvTy;
       }
 
       SemaDiagnosticBuilder diagnoseAmbiguous(
           Sema &S, SourceLocation Loc, QualType T) override {
         return S.Diag(Loc, diag::err_ice_ambiguous_conversion) << T;
       }
 
       SemaDiagnosticBuilder noteAmbiguous(
           Sema &S, CXXConversionDecl *Conv, QualType ConvTy) override {
         return S.Diag(Conv->getLocation(), diag::note_ice_conversion_here)
                  << ConvTy->isEnumeralType() << ConvTy;
       }
 
       SemaDiagnosticBuilder diagnoseConversion(
           Sema &S, SourceLocation Loc, QualType T, QualType ConvTy) override {
         llvm_unreachable("conversion functions are permitted");
       }
     } ConvertDiagnoser(Diagnoser.Suppress);
 
     Converted = PerformContextualImplicitConversion(DiagLoc, E,
                                                     ConvertDiagnoser);
     if (Converted.isInvalid())
       return Converted;
     E = Converted.get();
     if (!E->getType()->isIntegralOrUnscopedEnumerationType())
       return ExprError();
   } else if (!E->getType()->isIntegralOrUnscopedEnumerationType()) {
     // An ICE must be of integral or unscoped enumeration type.
     if (!Diagnoser.Suppress)
       Diagnoser.diagnoseNotICE(*this, DiagLoc, E->getSourceRange());
     return ExprError();
   }
 
   // Circumvent ICE checking in C++11 to avoid evaluating the expression twice
   // in the non-ICE case.
   if (!getLangOpts().CPlusPlus11 && E->isIntegerConstantExpr(Context)) {
     if (Result)
       *Result = E->EvaluateKnownConstInt(Context);
     return E;
   }
 
   Expr::EvalResult EvalResult;
   SmallVector<PartialDiagnosticAt, 8> Notes;
   EvalResult.Diag = &Notes;
 
   // Try to evaluate the expression, and produce diagnostics explaining why it's
   // not a constant expression as a side-effect.
   bool Folded = E->EvaluateAsRValue(EvalResult, Context) &&
                 EvalResult.Val.isInt() && !EvalResult.HasSideEffects;
 
   // In C++11, we can rely on diagnostics being produced for any expression
   // which is not a constant expression. If no diagnostics were produced, then
   // this is a constant expression.
   if (Folded && getLangOpts().CPlusPlus11 && Notes.empty()) {
     if (Result)
       *Result = EvalResult.Val.getInt();
     return E;
   }
 
   // If our only note is the usual "invalid subexpression" note, just point
   // the caret at its location rather than producing an essentially
   // redundant note.
   if (Notes.size() == 1 && Notes[0].second.getDiagID() ==
         diag::note_invalid_subexpr_in_const_expr) {
     DiagLoc = Notes[0].first;
     Notes.clear();
   }
 
   if (!Folded || !AllowFold) {
     if (!Diagnoser.Suppress) {
       Diagnoser.diagnoseNotICE(*this, DiagLoc, E->getSourceRange());
       for (const PartialDiagnosticAt &Note : Notes)
         Diag(Note.first, Note.second);
     }
 
     return ExprError();
   }
 
   Diagnoser.diagnoseFold(*this, DiagLoc, E->getSourceRange());
   for (const PartialDiagnosticAt &Note : Notes)
     Diag(Note.first, Note.second);
 
   if (Result)
     *Result = EvalResult.Val.getInt();
   return E;
 }
 
 namespace {
   // Handle the case where we conclude a expression which we speculatively
   // considered to be unevaluated is actually evaluated.
   class TransformToPE : public TreeTransform<TransformToPE> {
     typedef TreeTransform<TransformToPE> BaseTransform;
 
   public:
     TransformToPE(Sema &SemaRef) : BaseTransform(SemaRef) { }
 
     // Make sure we redo semantic analysis
     bool AlwaysRebuild() { return true; }
 
     // Make sure we handle LabelStmts correctly.
     // FIXME: This does the right thing, but maybe we need a more general
     // fix to TreeTransform?
     StmtResult TransformLabelStmt(LabelStmt *S) {
       S->getDecl()->setStmt(nullptr);
       return BaseTransform::TransformLabelStmt(S);
     }
 
     // We need to special-case DeclRefExprs referring to FieldDecls which
     // are not part of a member pointer formation; normal TreeTransforming
     // doesn't catch this case because of the way we represent them in the AST.
     // FIXME: This is a bit ugly; is it really the best way to handle this
     // case?
     //
     // Error on DeclRefExprs referring to FieldDecls.
     ExprResult TransformDeclRefExpr(DeclRefExpr *E) {
       if (isa<FieldDecl>(E->getDecl()) &&
           !SemaRef.isUnevaluatedContext())
         return SemaRef.Diag(E->getLocation(),
                             diag::err_invalid_non_static_member_use)
             << E->getDecl() << E->getSourceRange();
 
       return BaseTransform::TransformDeclRefExpr(E);
     }
 
     // Exception: filter out member pointer formation
     ExprResult TransformUnaryOperator(UnaryOperator *E) {
       if (E->getOpcode() == UO_AddrOf && E->getType()->isMemberPointerType())
         return E;
 
       return BaseTransform::TransformUnaryOperator(E);
     }
 
     ExprResult TransformLambdaExpr(LambdaExpr *E) {
       // Lambdas never need to be transformed.
       return E;
     }
   };
 }
 
 ExprResult Sema::TransformToPotentiallyEvaluated(Expr *E) {
   assert(isUnevaluatedContext() &&
          "Should only transform unevaluated expressions");
   ExprEvalContexts.back().Context =
       ExprEvalContexts[ExprEvalContexts.size()-2].Context;
   if (isUnevaluatedContext())
     return E;
   return TransformToPE(*this).TransformExpr(E);
 }
 
 void
 Sema::PushExpressionEvaluationContext(ExpressionEvaluationContext NewContext,
                                       Decl *LambdaContextDecl,
                                       bool IsDecltype) {
   ExprEvalContexts.emplace_back(NewContext, ExprCleanupObjects.size(), Cleanup,
                                 LambdaContextDecl, IsDecltype);
   Cleanup.reset();
   if (!MaybeODRUseExprs.empty())
     std::swap(MaybeODRUseExprs, ExprEvalContexts.back().SavedMaybeODRUseExprs);
 }
 
 void
 Sema::PushExpressionEvaluationContext(ExpressionEvaluationContext NewContext,
                                       ReuseLambdaContextDecl_t,
                                       bool IsDecltype) {
   Decl *ClosureContextDecl = ExprEvalContexts.back().ManglingContextDecl;
   PushExpressionEvaluationContext(NewContext, ClosureContextDecl, IsDecltype);
 }
 
 void Sema::PopExpressionEvaluationContext() {
   ExpressionEvaluationContextRecord& Rec = ExprEvalContexts.back();
   unsigned NumTypos = Rec.NumTypos;
 
   if (!Rec.Lambdas.empty()) {
     if (Rec.isUnevaluated() || Rec.isConstantEvaluated()) {
       unsigned D;
       if (Rec.isUnevaluated()) {
         // C++11 [expr.prim.lambda]p2:
         //   A lambda-expression shall not appear in an unevaluated operand
         //   (Clause 5).
         D = diag::err_lambda_unevaluated_operand;
       } else {
         // C++1y [expr.const]p2:
         //   A conditional-expression e is a core constant expression unless the
         //   evaluation of e, following the rules of the abstract machine, would
         //   evaluate [...] a lambda-expression.
         D = diag::err_lambda_in_constant_expression;
       }
 
       // C++1z allows lambda expressions as core constant expressions.
       // FIXME: In C++1z, reinstate the restrictions on lambda expressions (CWG
       // 1607) from appearing within template-arguments and array-bounds that
       // are part of function-signatures.  Be mindful that P0315 (Lambdas in
       // unevaluated contexts) might lift some of these restrictions in a 
       // future version.
       if (!Rec.isConstantEvaluated() || !getLangOpts().CPlusPlus17)
         for (const auto *L : Rec.Lambdas)
           Diag(L->getLocStart(), D);
     } else {
       // Mark the capture expressions odr-used. This was deferred
       // during lambda expression creation.
       for (auto *Lambda : Rec.Lambdas) {
         for (auto *C : Lambda->capture_inits())
           MarkDeclarationsReferencedInExpr(C);
       }
     }
   }
 
   // When are coming out of an unevaluated context, clear out any
   // temporaries that we may have created as part of the evaluation of
   // the expression in that context: they aren't relevant because they
   // will never be constructed.
   if (Rec.isUnevaluated() || Rec.isConstantEvaluated()) {
     ExprCleanupObjects.erase(ExprCleanupObjects.begin() + Rec.NumCleanupObjects,
                              ExprCleanupObjects.end());
     Cleanup = Rec.ParentCleanup;
     CleanupVarDeclMarking();
     std::swap(MaybeODRUseExprs, Rec.SavedMaybeODRUseExprs);
   // Otherwise, merge the contexts together.
   } else {
     Cleanup.mergeFrom(Rec.ParentCleanup);
     MaybeODRUseExprs.insert(Rec.SavedMaybeODRUseExprs.begin(),
                             Rec.SavedMaybeODRUseExprs.end());
   }
 
   // Pop the current expression evaluation context off the stack.
   ExprEvalContexts.pop_back();
 
   if (!ExprEvalContexts.empty())
     ExprEvalContexts.back().NumTypos += NumTypos;
   else
     assert(NumTypos == 0 && "There are outstanding typos after popping the "
                             "last ExpressionEvaluationContextRecord");
 }
 
 void Sema::DiscardCleanupsInEvaluationContext() {
   ExprCleanupObjects.erase(
          ExprCleanupObjects.begin() + ExprEvalContexts.back().NumCleanupObjects,
          ExprCleanupObjects.end());
   Cleanup.reset();
   MaybeODRUseExprs.clear();
 }
 
 ExprResult Sema::HandleExprEvaluationContextForTypeof(Expr *E) {
   if (!E->getType()->isVariablyModifiedType())
     return E;
   return TransformToPotentiallyEvaluated(E);
 }
 
 /// Are we within a context in which some evaluation could be performed (be it
 /// constant evaluation or runtime evaluation)? Sadly, this notion is not quite
 /// captured by C++'s idea of an "unevaluated context".
 static bool isEvaluatableContext(Sema &SemaRef) {
   switch (SemaRef.ExprEvalContexts.back().Context) {
     case Sema::ExpressionEvaluationContext::Unevaluated:
     case Sema::ExpressionEvaluationContext::UnevaluatedAbstract:
     case Sema::ExpressionEvaluationContext::DiscardedStatement:
       // Expressions in this context are never evaluated.
       return false;
 
     case Sema::ExpressionEvaluationContext::UnevaluatedList:
     case Sema::ExpressionEvaluationContext::ConstantEvaluated:
     case Sema::ExpressionEvaluationContext::PotentiallyEvaluated:
       // Expressions in this context could be evaluated.
       return true;
 
     case Sema::ExpressionEvaluationContext::PotentiallyEvaluatedIfUsed:
       // Referenced declarations will only be used if the construct in the
       // containing expression is used, at which point we'll be given another
       // turn to mark them.
       return false;
   }
   llvm_unreachable("Invalid context");
 }
 
 /// Are we within a context in which references to resolved functions or to
 /// variables result in odr-use?
 static bool isOdrUseContext(Sema &SemaRef, bool SkipDependentUses = true) {
   // An expression in a template is not really an expression until it's been
   // instantiated, so it doesn't trigger odr-use.
   if (SkipDependentUses && SemaRef.CurContext->isDependentContext())
     return false;
 
   switch (SemaRef.ExprEvalContexts.back().Context) {
     case Sema::ExpressionEvaluationContext::Unevaluated:
     case Sema::ExpressionEvaluationContext::UnevaluatedList:
     case Sema::ExpressionEvaluationContext::UnevaluatedAbstract:
     case Sema::ExpressionEvaluationContext::DiscardedStatement:
       return false;
 
     case Sema::ExpressionEvaluationContext::ConstantEvaluated:
     case Sema::ExpressionEvaluationContext::PotentiallyEvaluated:
       return true;
 
     case Sema::ExpressionEvaluationContext::PotentiallyEvaluatedIfUsed:
       return false;
   }
   llvm_unreachable("Invalid context");
 }
 
 static bool isImplicitlyDefinableConstexprFunction(FunctionDecl *Func) {
   CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(Func);
   return Func->isConstexpr() &&
          (Func->isImplicitlyInstantiable() || (MD && !MD->isUserProvided()));
 }
 
 /// \brief Mark a function referenced, and check whether it is odr-used
 /// (C++ [basic.def.odr]p2, C99 6.9p3)
 void Sema::MarkFunctionReferenced(SourceLocation Loc, FunctionDecl *Func,
                                   bool MightBeOdrUse) {
   assert(Func && "No function?");
 
   Func->setReferenced();
 
   // C++11 [basic.def.odr]p3:
   //   A function whose name appears as a potentially-evaluated expression is
   //   odr-used if it is the unique lookup result or the selected member of a
   //   set of overloaded functions [...].
   //
   // We (incorrectly) mark overload resolution as an unevaluated context, so we
   // can just check that here.
   bool OdrUse = MightBeOdrUse && isOdrUseContext(*this);
 
   // Determine whether we require a function definition to exist, per
   // C++11 [temp.inst]p3:
   //   Unless a function template specialization has been explicitly
   //   instantiated or explicitly specialized, the function template
   //   specialization is implicitly instantiated when the specialization is
   //   referenced in a context that requires a function definition to exist.
   //
   // That is either when this is an odr-use, or when a usage of a constexpr
   // function occurs within an evaluatable context.
   bool NeedDefinition =
       OdrUse || (isEvaluatableContext(*this) &&
                  isImplicitlyDefinableConstexprFunction(Func));
 
   // C++14 [temp.expl.spec]p6:
   //   If a template [...] is explicitly specialized then that specialization
   //   shall be declared before the first use of that specialization that would
   //   cause an implicit instantiation to take place, in every translation unit
   //   in which such a use occurs
   if (NeedDefinition &&
       (Func->getTemplateSpecializationKind() != TSK_Undeclared ||
        Func->getMemberSpecializationInfo()))
     checkSpecializationVisibility(Loc, Func);
 
   // C++14 [except.spec]p17:
   //   An exception-specification is considered to be needed when:
   //   - the function is odr-used or, if it appears in an unevaluated operand,
   //     would be odr-used if the expression were potentially-evaluated;
   //
   // Note, we do this even if MightBeOdrUse is false. That indicates that the
   // function is a pure virtual function we're calling, and in that case the
   // function was selected by overload resolution and we need to resolve its
   // exception specification for a different reason.
   const FunctionProtoType *FPT = Func->getType()->getAs<FunctionProtoType>();
   if (FPT && isUnresolvedExceptionSpec(FPT->getExceptionSpecType()))
     ResolveExceptionSpec(Loc, FPT);
 
   // If we don't need to mark the function as used, and we don't need to
   // try to provide a definition, there's nothing more to do.
   if ((Func->isUsed(/*CheckUsedAttr=*/false) || !OdrUse) &&
       (!NeedDefinition || Func->getBody()))
     return;
 
   // Note that this declaration has been used.
   if (CXXConstructorDecl *Constructor = dyn_cast<CXXConstructorDecl>(Func)) {
     Constructor = cast<CXXConstructorDecl>(Constructor->getFirstDecl());
     if (Constructor->isDefaulted() && !Constructor->isDeleted()) {
       if (Constructor->isDefaultConstructor()) {
         if (Constructor->isTrivial() && !Constructor->hasAttr<DLLExportAttr>())
           return;
         DefineImplicitDefaultConstructor(Loc, Constructor);
       } else if (Constructor->isCopyConstructor()) {
         DefineImplicitCopyConstructor(Loc, Constructor);
       } else if (Constructor->isMoveConstructor()) {
         DefineImplicitMoveConstructor(Loc, Constructor);
       }
     } else if (Constructor->getInheritedConstructor()) {
       DefineInheritingConstructor(Loc, Constructor);
     }
   } else if (CXXDestructorDecl *Destructor =
                  dyn_cast<CXXDestructorDecl>(Func)) {
     Destructor = cast<CXXDestructorDecl>(Destructor->getFirstDecl());
     if (Destructor->isDefaulted() && !Destructor->isDeleted()) {
       if (Destructor->isTrivial() && !Destructor->hasAttr<DLLExportAttr>())
         return;
       DefineImplicitDestructor(Loc, Destructor);
     }
     if (Destructor->isVirtual() && getLangOpts().AppleKext)
       MarkVTableUsed(Loc, Destructor->getParent());
   } else if (CXXMethodDecl *MethodDecl = dyn_cast<CXXMethodDecl>(Func)) {
     if (MethodDecl->isOverloadedOperator() &&
         MethodDecl->getOverloadedOperator() == OO_Equal) {
       MethodDecl = cast<CXXMethodDecl>(MethodDecl->getFirstDecl());
       if (MethodDecl->isDefaulted() && !MethodDecl->isDeleted()) {
         if (MethodDecl->isCopyAssignmentOperator())
           DefineImplicitCopyAssignment(Loc, MethodDecl);
         else if (MethodDecl->isMoveAssignmentOperator())
           DefineImplicitMoveAssignment(Loc, MethodDecl);
       }
     } else if (isa<CXXConversionDecl>(MethodDecl) &&
                MethodDecl->getParent()->isLambda()) {
       CXXConversionDecl *Conversion =
           cast<CXXConversionDecl>(MethodDecl->getFirstDecl());
       if (Conversion->isLambdaToBlockPointerConversion())
         DefineImplicitLambdaToBlockPointerConversion(Loc, Conversion);
       else
         DefineImplicitLambdaToFunctionPointerConversion(Loc, Conversion);
     } else if (MethodDecl->isVirtual() && getLangOpts().AppleKext)
       MarkVTableUsed(Loc, MethodDecl->getParent());
   }
 
   // Recursive functions should be marked when used from another function.
   // FIXME: Is this really right?
   if (CurContext == Func) return;
 
   // Implicit instantiation of function templates and member functions of
   // class templates.
   if (Func->isImplicitlyInstantiable()) {
     TemplateSpecializationKind TSK = Func->getTemplateSpecializationKind();
     SourceLocation PointOfInstantiation = Func->getPointOfInstantiation();
     bool FirstInstantiation = PointOfInstantiation.isInvalid();
     if (FirstInstantiation) {
       PointOfInstantiation = Loc;
       Func->setTemplateSpecializationKind(TSK, PointOfInstantiation);
     } else if (TSK != TSK_ImplicitInstantiation) {
       // Use the point of use as the point of instantiation, instead of the
       // point of explicit instantiation (which we track as the actual point of
       // instantiation). This gives better backtraces in diagnostics.
       PointOfInstantiation = Loc;
     }
 
     if (FirstInstantiation || TSK != TSK_ImplicitInstantiation ||
         Func->isConstexpr()) {
       if (isa<CXXRecordDecl>(Func->getDeclContext()) &&
           cast<CXXRecordDecl>(Func->getDeclContext())->isLocalClass() &&
           CodeSynthesisContexts.size())
         PendingLocalImplicitInstantiations.push_back(
             std::make_pair(Func, PointOfInstantiation));
       else if (Func->isConstexpr())
         // Do not defer instantiations of constexpr functions, to avoid the
         // expression evaluator needing to call back into Sema if it sees a
         // call to such a function.
         InstantiateFunctionDefinition(PointOfInstantiation, Func);
       else {
         Func->setInstantiationIsPending(true);
         PendingInstantiations.push_back(std::make_pair(Func,
                                                        PointOfInstantiation));
         // Notify the consumer that a function was implicitly instantiated.
         Consumer.HandleCXXImplicitFunctionInstantiation(Func);
       }
     }
   } else {
     // Walk redefinitions, as some of them may be instantiable.
     for (auto i : Func->redecls()) {
       if (!i->isUsed(false) && i->isImplicitlyInstantiable())
         MarkFunctionReferenced(Loc, i, OdrUse);
     }
   }
 
   if (!OdrUse) return;
 
   // Keep track of used but undefined functions.
   if (!Func->isDefined()) {
     if (mightHaveNonExternalLinkage(Func))
       UndefinedButUsed.insert(std::make_pair(Func->getCanonicalDecl(), Loc));
     else if (Func->getMostRecentDecl()->isInlined() &&
              !LangOpts.GNUInline &&
              !Func->getMostRecentDecl()->hasAttr<GNUInlineAttr>())
       UndefinedButUsed.insert(std::make_pair(Func->getCanonicalDecl(), Loc));
     else if (isExternalWithNoLinkageType(Func))
       UndefinedButUsed.insert(std::make_pair(Func->getCanonicalDecl(), Loc));
   }
 
   Func->markUsed(Context);
 }
 
 static void
 diagnoseUncapturableValueReference(Sema &S, SourceLocation loc,
                                    ValueDecl *var, DeclContext *DC) {
   DeclContext *VarDC = var->getDeclContext();
 
   //  If the parameter still belongs to the translation unit, then
   //  we're actually just using one parameter in the declaration of
   //  the next.
   if (isa<ParmVarDecl>(var) &&
       isa<TranslationUnitDecl>(VarDC))
     return;
 
   // For C code, don't diagnose about capture if we're not actually in code
   // right now; it's impossible to write a non-constant expression outside of
   // function context, so we'll get other (more useful) diagnostics later.
   //
   // For C++, things get a bit more nasty... it would be nice to suppress this
   // diagnostic for certain cases like using a local variable in an array bound
   // for a member of a local class, but the correct predicate is not obvious.
   if (!S.getLangOpts().CPlusPlus && !S.CurContext->isFunctionOrMethod())
     return;
 
   unsigned ValueKind = isa<BindingDecl>(var) ? 1 : 0;
   unsigned ContextKind = 3; // unknown
   if (isa<CXXMethodDecl>(VarDC) &&
       cast<CXXRecordDecl>(VarDC->getParent())->isLambda()) {
     ContextKind = 2;
   } else if (isa<FunctionDecl>(VarDC)) {
     ContextKind = 0;
   } else if (isa<BlockDecl>(VarDC)) {
     ContextKind = 1;
   }
 
   S.Diag(loc, diag::err_reference_to_local_in_enclosing_context)
     << var << ValueKind << ContextKind << VarDC;
   S.Diag(var->getLocation(), diag::note_entity_declared_at)
       << var;
 
   // FIXME: Add additional diagnostic info about class etc. which prevents
   // capture.
 }
 
  
 static bool isVariableAlreadyCapturedInScopeInfo(CapturingScopeInfo *CSI, VarDecl *Var, 
                                       bool &SubCapturesAreNested,
                                       QualType &CaptureType, 
                                       QualType &DeclRefType) {
    // Check whether we've already captured it.
   if (CSI->CaptureMap.count(Var)) {
     // If we found a capture, any subcaptures are nested.
     SubCapturesAreNested = true;
       
     // Retrieve the capture type for this variable.
     CaptureType = CSI->getCapture(Var).getCaptureType();
       
     // Compute the type of an expression that refers to this variable.
     DeclRefType = CaptureType.getNonReferenceType();
 
     // Similarly to mutable captures in lambda, all the OpenMP captures by copy
     // are mutable in the sense that user can change their value - they are
     // private instances of the captured declarations.
     const CapturingScopeInfo::Capture &Cap = CSI->getCapture(Var);
     if (Cap.isCopyCapture() &&
         !(isa<LambdaScopeInfo>(CSI) && cast<LambdaScopeInfo>(CSI)->Mutable) &&
         !(isa<CapturedRegionScopeInfo>(CSI) &&
           cast<CapturedRegionScopeInfo>(CSI)->CapRegionKind == CR_OpenMP))
       DeclRefType.addConst();
     return true;
   }
   return false;
 }
 
 // Only block literals, captured statements, and lambda expressions can
 // capture; other scopes don't work.
 static DeclContext *getParentOfCapturingContextOrNull(DeclContext *DC, VarDecl *Var, 
                                  SourceLocation Loc, 
                                  const bool Diagnose, Sema &S) {
   if (isa<BlockDecl>(DC) || isa<CapturedDecl>(DC) || isLambdaCallOperator(DC))
     return getLambdaAwareParentOfDeclContext(DC);
   else if (Var->hasLocalStorage()) {
     if (Diagnose)
        diagnoseUncapturableValueReference(S, Loc, Var, DC);
   }
   return nullptr;
 }
 
 // Certain capturing entities (lambdas, blocks etc.) are not allowed to capture 
 // certain types of variables (unnamed, variably modified types etc.)
 // so check for eligibility.
 static bool isVariableCapturable(CapturingScopeInfo *CSI, VarDecl *Var, 
                                  SourceLocation Loc, 
                                  const bool Diagnose, Sema &S) {
 
   bool IsBlock = isa<BlockScopeInfo>(CSI);
   bool IsLambda = isa<LambdaScopeInfo>(CSI);
 
   // Lambdas are not allowed to capture unnamed variables
   // (e.g. anonymous unions).
   // FIXME: The C++11 rule don't actually state this explicitly, but I'm
   // assuming that's the intent.
   if (IsLambda && !Var->getDeclName()) {
     if (Diagnose) {
       S.Diag(Loc, diag::err_lambda_capture_anonymous_var);
       S.Diag(Var->getLocation(), diag::note_declared_at);
     }
     return false;
   }
 
   // Prohibit variably-modified types in blocks; they're difficult to deal with.
   if (Var->getType()->isVariablyModifiedType() && IsBlock) {
     if (Diagnose) {
       S.Diag(Loc, diag::err_ref_vm_type);
       S.Diag(Var->getLocation(), diag::note_previous_decl) 
         << Var->getDeclName();
     }
     return false;
   }
   // Prohibit structs with flexible array members too.
   // We cannot capture what is in the tail end of the struct.
   if (const RecordType *VTTy = Var->getType()->getAs<RecordType>()) {
     if (VTTy->getDecl()->hasFlexibleArrayMember()) {
       if (Diagnose) {
         if (IsBlock)
           S.Diag(Loc, diag::err_ref_flexarray_type);
         else
           S.Diag(Loc, diag::err_lambda_capture_flexarray_type)
             << Var->getDeclName();
         S.Diag(Var->getLocation(), diag::note_previous_decl)
           << Var->getDeclName();
       }
       return false;
     }
   }
   const bool HasBlocksAttr = Var->hasAttr<BlocksAttr>();
   // Lambdas and captured statements are not allowed to capture __block
   // variables; they don't support the expected semantics.
   if (HasBlocksAttr && (IsLambda || isa<CapturedRegionScopeInfo>(CSI))) {
     if (Diagnose) {
       S.Diag(Loc, diag::err_capture_block_variable)
         << Var->getDeclName() << !IsLambda;
       S.Diag(Var->getLocation(), diag::note_previous_decl)
         << Var->getDeclName();
     }
     return false;
   }
   // OpenCL v2.0 s6.12.5: Blocks cannot reference/capture other blocks
   if (S.getLangOpts().OpenCL && IsBlock &&
       Var->getType()->isBlockPointerType()) {
     if (Diagnose)
       S.Diag(Loc, diag::err_opencl_block_ref_block);
     return false;
   }
 
   return true;
 }
 
 // Returns true if the capture by block was successful.
 static bool captureInBlock(BlockScopeInfo *BSI, VarDecl *Var, 
                                  SourceLocation Loc, 
                                  const bool BuildAndDiagnose, 
                                  QualType &CaptureType,
                                  QualType &DeclRefType, 
                                  const bool Nested,
                                  Sema &S) {
   Expr *CopyExpr = nullptr;
   bool ByRef = false;
       
   // Blocks are not allowed to capture arrays.
   if (CaptureType->isArrayType()) {
     if (BuildAndDiagnose) {
       S.Diag(Loc, diag::err_ref_array_type);
       S.Diag(Var->getLocation(), diag::note_previous_decl) 
       << Var->getDeclName();
     }
     return false;
   }
 
   // Forbid the block-capture of autoreleasing variables.
   if (CaptureType.getObjCLifetime() == Qualifiers::OCL_Autoreleasing) {
     if (BuildAndDiagnose) {
       S.Diag(Loc, diag::err_arc_autoreleasing_capture)
         << /*block*/ 0;
       S.Diag(Var->getLocation(), diag::note_previous_decl)
         << Var->getDeclName();
     }
     return false;
   }
 
   // Warn about implicitly autoreleasing indirect parameters captured by blocks.
   if (const auto *PT = CaptureType->getAs<PointerType>()) {
     // This function finds out whether there is an AttributedType of kind
     // attr_objc_ownership in Ty. The existence of AttributedType of kind
     // attr_objc_ownership implies __autoreleasing was explicitly specified
     // rather than being added implicitly by the compiler.
     auto IsObjCOwnershipAttributedType = [](QualType Ty) {
       while (const auto *AttrTy = Ty->getAs<AttributedType>()) {
         if (AttrTy->getAttrKind() == AttributedType::attr_objc_ownership)
           return true;
 
         // Peel off AttributedTypes that are not of kind objc_ownership.
         Ty = AttrTy->getModifiedType();
       }
 
       return false;
     };
 
     QualType PointeeTy = PT->getPointeeType();
 
     if (PointeeTy->getAs<ObjCObjectPointerType>() &&
         PointeeTy.getObjCLifetime() == Qualifiers::OCL_Autoreleasing &&
         !IsObjCOwnershipAttributedType(PointeeTy)) {
       if (BuildAndDiagnose) {
         SourceLocation VarLoc = Var->getLocation();
         S.Diag(Loc, diag::warn_block_capture_autoreleasing);
         {
           auto AddAutoreleaseNote =
               S.Diag(VarLoc, diag::note_declare_parameter_autoreleasing);
           // Provide a fix-it for the '__autoreleasing' keyword at the
           // appropriate location in the variable's type.
           if (const auto *TSI = Var->getTypeSourceInfo()) {
             PointerTypeLoc PTL =
                 TSI->getTypeLoc().getAsAdjusted<PointerTypeLoc>();
             if (PTL) {
               SourceLocation Loc = PTL.getPointeeLoc().getEndLoc();
               Loc = Lexer::getLocForEndOfToken(Loc, 0, S.getSourceManager(),
                                                S.getLangOpts());
               if (Loc.isValid()) {
                 StringRef CharAtLoc = Lexer::getSourceText(
                     CharSourceRange::getCharRange(Loc, Loc.getLocWithOffset(1)),
                     S.getSourceManager(), S.getLangOpts());
                 AddAutoreleaseNote << FixItHint::CreateInsertion(
                     Loc, CharAtLoc.empty() || !isWhitespace(CharAtLoc[0])
                              ? " __autoreleasing "
                              : " __autoreleasing");
               }
             }
           }
         }
         S.Diag(VarLoc, diag::note_declare_parameter_strong);
       }
     }
   }
 
   const bool HasBlocksAttr = Var->hasAttr<BlocksAttr>();
   if (HasBlocksAttr || CaptureType->isReferenceType() ||
       (S.getLangOpts().OpenMP && S.IsOpenMPCapturedDecl(Var))) {
     // Block capture by reference does not change the capture or
     // declaration reference types.
     ByRef = true;
   } else {
     // Block capture by copy introduces 'const'.
     CaptureType = CaptureType.getNonReferenceType().withConst();
     DeclRefType = CaptureType;
                 
     if (S.getLangOpts().CPlusPlus && BuildAndDiagnose) {
       if (const RecordType *Record = DeclRefType->getAs<RecordType>()) {
         // The capture logic needs the destructor, so make sure we mark it.
         // Usually this is unnecessary because most local variables have
         // their destructors marked at declaration time, but parameters are
         // an exception because it's technically only the call site that
         // actually requires the destructor.
         if (isa<ParmVarDecl>(Var))
           S.FinalizeVarWithDestructor(Var, Record);
 
         // Enter a new evaluation context to insulate the copy
         // full-expression.
         EnterExpressionEvaluationContext scope(
             S, Sema::ExpressionEvaluationContext::PotentiallyEvaluated);
 
         // According to the blocks spec, the capture of a variable from
         // the stack requires a const copy constructor.  This is not true
         // of the copy/move done to move a __block variable to the heap.
         Expr *DeclRef = new (S.Context) DeclRefExpr(Var, Nested,
                                                   DeclRefType.withConst(), 
                                                   VK_LValue, Loc);
             
         ExprResult Result
           = S.PerformCopyInitialization(
               InitializedEntity::InitializeBlock(Var->getLocation(),
                                                   CaptureType, false),
               Loc, DeclRef);
             
         // Build a full-expression copy expression if initialization
         // succeeded and used a non-trivial constructor.  Recover from
         // errors by pretending that the copy isn't necessary.
         if (!Result.isInvalid() &&
             !cast<CXXConstructExpr>(Result.get())->getConstructor()
                 ->isTrivial()) {
           Result = S.MaybeCreateExprWithCleanups(Result);
           CopyExpr = Result.get();
         }
       }
     }
   }
 
   // Actually capture the variable.
   if (BuildAndDiagnose)
     BSI->addCapture(Var, HasBlocksAttr, ByRef, Nested, Loc, 
                     SourceLocation(), CaptureType, CopyExpr);
 
   return true;
 
 }
 
 
 /// \brief Capture the given variable in the captured region.
 static bool captureInCapturedRegion(CapturedRegionScopeInfo *RSI,
                                     VarDecl *Var, 
                                     SourceLocation Loc, 
                                     const bool BuildAndDiagnose, 
                                     QualType &CaptureType,
                                     QualType &DeclRefType, 
                                     const bool RefersToCapturedVariable,
                                     Sema &S) {
   // By default, capture variables by reference.
   bool ByRef = true;
   // Using an LValue reference type is consistent with Lambdas (see below).
   if (S.getLangOpts().OpenMP && RSI->CapRegionKind == CR_OpenMP) {
     if (S.IsOpenMPCapturedDecl(Var)) {
       bool HasConst = DeclRefType.isConstQualified();
       DeclRefType = DeclRefType.getUnqualifiedType();
       // Don't lose diagnostics about assignments to const.
       if (HasConst)
         DeclRefType.addConst();
     }
     ByRef = S.IsOpenMPCapturedByRef(Var, RSI->OpenMPLevel);
   }
 
   if (ByRef)
     CaptureType = S.Context.getLValueReferenceType(DeclRefType);
   else
     CaptureType = DeclRefType;
 
   Expr *CopyExpr = nullptr;
   if (BuildAndDiagnose) {
     // The current implementation assumes that all variables are captured
     // by references. Since there is no capture by copy, no expression
     // evaluation will be needed.
     RecordDecl *RD = RSI->TheRecordDecl;
 
     FieldDecl *Field
       = FieldDecl::Create(S.Context, RD, Loc, Loc, nullptr, CaptureType,
                           S.Context.getTrivialTypeSourceInfo(CaptureType, Loc),
                           nullptr, false, ICIS_NoInit);
     Field->setImplicit(true);
     Field->setAccess(AS_private);
     RD->addDecl(Field);
     if (S.getLangOpts().OpenMP && RSI->CapRegionKind == CR_OpenMP)
       S.setOpenMPCaptureKind(Field, Var, RSI->OpenMPLevel);
  
     CopyExpr = new (S.Context) DeclRefExpr(Var, RefersToCapturedVariable,
                                             DeclRefType, VK_LValue, Loc);
     Var->setReferenced(true);
     Var->markUsed(S.Context);
   }
 
   // Actually capture the variable.
   if (BuildAndDiagnose)
     RSI->addCapture(Var, /*isBlock*/false, ByRef, RefersToCapturedVariable, Loc,
                     SourceLocation(), CaptureType, CopyExpr);
   
   
   return true;
 }
 
 /// \brief Create a field within the lambda class for the variable
 /// being captured.
 static void addAsFieldToClosureType(Sema &S, LambdaScopeInfo *LSI, 
                                     QualType FieldType, QualType DeclRefType,
                                     SourceLocation Loc,
                                     bool RefersToCapturedVariable) {
   CXXRecordDecl *Lambda = LSI->Lambda;
 
   // Build the non-static data member.
   FieldDecl *Field
     = FieldDecl::Create(S.Context, Lambda, Loc, Loc, nullptr, FieldType,
                         S.Context.getTrivialTypeSourceInfo(FieldType, Loc),
                         nullptr, false, ICIS_NoInit);
   Field->setImplicit(true);
   Field->setAccess(AS_private);
   Lambda->addDecl(Field);
 }
 
 /// \brief Capture the given variable in the lambda.
 static bool captureInLambda(LambdaScopeInfo *LSI,
                             VarDecl *Var, 
                             SourceLocation Loc, 
                             const bool BuildAndDiagnose, 
                             QualType &CaptureType,
                             QualType &DeclRefType, 
                             const bool RefersToCapturedVariable,
                             const Sema::TryCaptureKind Kind, 
                             SourceLocation EllipsisLoc,
                             const bool IsTopScope,
                             Sema &S) {
 
   // Determine whether we are capturing by reference or by value.
   bool ByRef = false;
   if (IsTopScope && Kind != Sema::TryCapture_Implicit) {
     ByRef = (Kind == Sema::TryCapture_ExplicitByRef);
   } else {
     ByRef = (LSI->ImpCaptureStyle == LambdaScopeInfo::ImpCap_LambdaByref);
   }
     
   // Compute the type of the field that will capture this variable.
   if (ByRef) {
     // C++11 [expr.prim.lambda]p15:
     //   An entity is captured by reference if it is implicitly or
     //   explicitly captured but not captured by copy. It is
     //   unspecified whether additional unnamed non-static data
     //   members are declared in the closure type for entities
     //   captured by reference.
     //
     // FIXME: It is not clear whether we want to build an lvalue reference
     // to the DeclRefType or to CaptureType.getNonReferenceType(). GCC appears
     // to do the former, while EDG does the latter. Core issue 1249 will 
     // clarify, but for now we follow GCC because it's a more permissive and
     // easily defensible position.
     CaptureType = S.Context.getLValueReferenceType(DeclRefType);
   } else {
     // C++11 [expr.prim.lambda]p14:
     //   For each entity captured by copy, an unnamed non-static
     //   data member is declared in the closure type. The
     //   declaration order of these members is unspecified. The type
     //   of such a data member is the type of the corresponding
     //   captured entity if the entity is not a reference to an
     //   object, or the referenced type otherwise. [Note: If the
     //   captured entity is a reference to a function, the
     //   corresponding data member is also a reference to a
     //   function. - end note ]
     if (const ReferenceType *RefType = CaptureType->getAs<ReferenceType>()){
       if (!RefType->getPointeeType()->isFunctionType())
         CaptureType = RefType->getPointeeType();
     }
 
     // Forbid the lambda copy-capture of autoreleasing variables.
     if (CaptureType.getObjCLifetime() == Qualifiers::OCL_Autoreleasing) {
       if (BuildAndDiagnose) {
         S.Diag(Loc, diag::err_arc_autoreleasing_capture) << /*lambda*/ 1;
         S.Diag(Var->getLocation(), diag::note_previous_decl)
           << Var->getDeclName();
       }
       return false;
     }
 
     // Make sure that by-copy captures are of a complete and non-abstract type.
     if (BuildAndDiagnose) {
       if (!CaptureType->isDependentType() &&
           S.RequireCompleteType(Loc, CaptureType,
                                 diag::err_capture_of_incomplete_type,
                                 Var->getDeclName()))
         return false;
 
       if (S.RequireNonAbstractType(Loc, CaptureType,
                                    diag::err_capture_of_abstract_type))
         return false;
     }
   }
 
   // Capture this variable in the lambda.
   if (BuildAndDiagnose)
     addAsFieldToClosureType(S, LSI, CaptureType, DeclRefType, Loc,
                             RefersToCapturedVariable);
     
   // Compute the type of a reference to this captured variable.
   if (ByRef)
     DeclRefType = CaptureType.getNonReferenceType();
   else {
     // C++ [expr.prim.lambda]p5:
     //   The closure type for a lambda-expression has a public inline 
     //   function call operator [...]. This function call operator is 
     //   declared const (9.3.1) if and only if the lambda-expression's 
     //   parameter-declaration-clause is not followed by mutable.
     DeclRefType = CaptureType.getNonReferenceType();
     if (!LSI->Mutable && !CaptureType->isReferenceType())
       DeclRefType.addConst();      
   }
     
   // Add the capture.
   if (BuildAndDiagnose)
     LSI->addCapture(Var, /*IsBlock=*/false, ByRef, RefersToCapturedVariable, 
                     Loc, EllipsisLoc, CaptureType, /*CopyExpr=*/nullptr);
       
   return true;
 }
 
 bool Sema::tryCaptureVariable(
     VarDecl *Var, SourceLocation ExprLoc, TryCaptureKind Kind,
     SourceLocation EllipsisLoc, bool BuildAndDiagnose, QualType &CaptureType,
     QualType &DeclRefType, const unsigned *const FunctionScopeIndexToStopAt) {
   // An init-capture is notionally from the context surrounding its
   // declaration, but its parent DC is the lambda class.
   DeclContext *VarDC = Var->getDeclContext();
   if (Var->isInitCapture())
     VarDC = VarDC->getParent();
   
   DeclContext *DC = CurContext;
   const unsigned MaxFunctionScopesIndex = FunctionScopeIndexToStopAt 
       ? *FunctionScopeIndexToStopAt : FunctionScopes.size() - 1;  
   // We need to sync up the Declaration Context with the
   // FunctionScopeIndexToStopAt
   if (FunctionScopeIndexToStopAt) {
     unsigned FSIndex = FunctionScopes.size() - 1;
     while (FSIndex != MaxFunctionScopesIndex) {
       DC = getLambdaAwareParentOfDeclContext(DC);
       --FSIndex;
     }
   }
 
   
   // If the variable is declared in the current context, there is no need to
   // capture it.
   if (VarDC == DC) return true;
 
   // Capture global variables if it is required to use private copy of this
   // variable.
   bool IsGlobal = !Var->hasLocalStorage();
   if (IsGlobal && !(LangOpts.OpenMP && IsOpenMPCapturedDecl(Var)))
     return true;
   Var = Var->getCanonicalDecl();
 
   // Walk up the stack to determine whether we can capture the variable,
   // performing the "simple" checks that don't depend on type. We stop when
   // we've either hit the declared scope of the variable or find an existing
   // capture of that variable.  We start from the innermost capturing-entity
   // (the DC) and ensure that all intervening capturing-entities 
   // (blocks/lambdas etc.) between the innermost capturer and the variable`s
   // declcontext can either capture the variable or have already captured
   // the variable.
   CaptureType = Var->getType();
   DeclRefType = CaptureType.getNonReferenceType();
   bool Nested = false;
   bool Explicit = (Kind != TryCapture_Implicit);
   unsigned FunctionScopesIndex = MaxFunctionScopesIndex;
   do {
     // Only block literals, captured statements, and lambda expressions can
     // capture; other scopes don't work.
     DeclContext *ParentDC = getParentOfCapturingContextOrNull(DC, Var, 
                                                               ExprLoc, 
                                                               BuildAndDiagnose,
                                                               *this);
     // We need to check for the parent *first* because, if we *have*
     // private-captured a global variable, we need to recursively capture it in
     // intermediate blocks, lambdas, etc.
     if (!ParentDC) {
       if (IsGlobal) {
         FunctionScopesIndex = MaxFunctionScopesIndex - 1;
         break;
       }
       return true;
     }
 
     FunctionScopeInfo  *FSI = FunctionScopes[FunctionScopesIndex];
     CapturingScopeInfo *CSI = cast<CapturingScopeInfo>(FSI);
 
 
     // Check whether we've already captured it.
     if (isVariableAlreadyCapturedInScopeInfo(CSI, Var, Nested, CaptureType, 
                                              DeclRefType)) {
       CSI->getCapture(Var).markUsed(BuildAndDiagnose);
       break;
     }
     // If we are instantiating a generic lambda call operator body, 
     // we do not want to capture new variables.  What was captured
     // during either a lambdas transformation or initial parsing
     // should be used. 
     if (isGenericLambdaCallOperatorSpecialization(DC)) {
       if (BuildAndDiagnose) {
         LambdaScopeInfo *LSI = cast<LambdaScopeInfo>(CSI);   
         if (LSI->ImpCaptureStyle == CapturingScopeInfo::ImpCap_None) {
           Diag(ExprLoc, diag::err_lambda_impcap) << Var->getDeclName();
           Diag(Var->getLocation(), diag::note_previous_decl) 
              << Var->getDeclName();
           Diag(LSI->Lambda->getLocStart(), diag::note_lambda_decl);          
         } else
           diagnoseUncapturableValueReference(*this, ExprLoc, Var, DC);
       }
       return true;
     }
     // Certain capturing entities (lambdas, blocks etc.) are not allowed to capture 
     // certain types of variables (unnamed, variably modified types etc.)
     // so check for eligibility.
     if (!isVariableCapturable(CSI, Var, ExprLoc, BuildAndDiagnose, *this))
        return true;
 
     // Try to capture variable-length arrays types.
     if (Var->getType()->isVariablyModifiedType()) {
       // We're going to walk down into the type and look for VLA
       // expressions.
       QualType QTy = Var->getType();
       if (ParmVarDecl *PVD = dyn_cast_or_null<ParmVarDecl>(Var))
         QTy = PVD->getOriginalType();
       captureVariablyModifiedType(Context, QTy, CSI);
     }
 
     if (getLangOpts().OpenMP) {
       if (auto *RSI = dyn_cast<CapturedRegionScopeInfo>(CSI)) {
         // OpenMP private variables should not be captured in outer scope, so
         // just break here. Similarly, global variables that are captured in a
         // target region should not be captured outside the scope of the region.
         if (RSI->CapRegionKind == CR_OpenMP) {
           bool IsOpenMPPrivateDecl = isOpenMPPrivateDecl(Var, RSI->OpenMPLevel);
           auto IsTargetCap = !IsOpenMPPrivateDecl &&
                              isOpenMPTargetCapturedDecl(Var, RSI->OpenMPLevel);
           // When we detect target captures we are looking from inside the
           // target region, therefore we need to propagate the capture from the
           // enclosing region. Therefore, the capture is not initially nested.
           if (IsTargetCap)
             adjustOpenMPTargetScopeIndex(FunctionScopesIndex, RSI->OpenMPLevel);
 
           if (IsTargetCap || IsOpenMPPrivateDecl) {
             Nested = !IsTargetCap;
             DeclRefType = DeclRefType.getUnqualifiedType();
             CaptureType = Context.getLValueReferenceType(DeclRefType);
             break;
           }
         }
       }
     }
     if (CSI->ImpCaptureStyle == CapturingScopeInfo::ImpCap_None && !Explicit) {
       // No capture-default, and this is not an explicit capture 
       // so cannot capture this variable.  
       if (BuildAndDiagnose) {
         Diag(ExprLoc, diag::err_lambda_impcap) << Var->getDeclName();
         Diag(Var->getLocation(), diag::note_previous_decl) 
           << Var->getDeclName();
         if (cast<LambdaScopeInfo>(CSI)->Lambda)
           Diag(cast<LambdaScopeInfo>(CSI)->Lambda->getLocStart(),
                diag::note_lambda_decl);
         // FIXME: If we error out because an outer lambda can not implicitly
         // capture a variable that an inner lambda explicitly captures, we
         // should have the inner lambda do the explicit capture - because
         // it makes for cleaner diagnostics later.  This would purely be done
         // so that the diagnostic does not misleadingly claim that a variable 
         // can not be captured by a lambda implicitly even though it is captured 
         // explicitly.  Suggestion:
         //  - create const bool VariableCaptureWasInitiallyExplicit = Explicit 
         //    at the function head
         //  - cache the StartingDeclContext - this must be a lambda 
         //  - captureInLambda in the innermost lambda the variable.
       }
       return true;
     }
 
     FunctionScopesIndex--;
     DC = ParentDC;
     Explicit = false;
   } while (!VarDC->Equals(DC));
 
   // Walk back down the scope stack, (e.g. from outer lambda to inner lambda)
   // computing the type of the capture at each step, checking type-specific 
   // requirements, and adding captures if requested. 
   // If the variable had already been captured previously, we start capturing 
   // at the lambda nested within that one.   
   for (unsigned I = ++FunctionScopesIndex, N = MaxFunctionScopesIndex + 1; I != N; 
        ++I) {
     CapturingScopeInfo *CSI = cast<CapturingScopeInfo>(FunctionScopes[I]);
     
     if (BlockScopeInfo *BSI = dyn_cast<BlockScopeInfo>(CSI)) {
       if (!captureInBlock(BSI, Var, ExprLoc, 
                           BuildAndDiagnose, CaptureType, 
                           DeclRefType, Nested, *this))
         return true;
       Nested = true;
     } else if (CapturedRegionScopeInfo *RSI = dyn_cast<CapturedRegionScopeInfo>(CSI)) {
       if (!captureInCapturedRegion(RSI, Var, ExprLoc, 
                                    BuildAndDiagnose, CaptureType, 
                                    DeclRefType, Nested, *this))
         return true;
       Nested = true;
     } else {
       LambdaScopeInfo *LSI = cast<LambdaScopeInfo>(CSI);
       if (!captureInLambda(LSI, Var, ExprLoc, 
                            BuildAndDiagnose, CaptureType, 
                            DeclRefType, Nested, Kind, EllipsisLoc, 
                             /*IsTopScope*/I == N - 1, *this))
         return true;
       Nested = true;
     }
   }
   return false;
 }
 
 bool Sema::tryCaptureVariable(VarDecl *Var, SourceLocation Loc,
                               TryCaptureKind Kind, SourceLocation EllipsisLoc) {  
   QualType CaptureType;
   QualType DeclRefType;
   return tryCaptureVariable(Var, Loc, Kind, EllipsisLoc,
                             /*BuildAndDiagnose=*/true, CaptureType,
                             DeclRefType, nullptr);
 }
 
 bool Sema::NeedToCaptureVariable(VarDecl *Var, SourceLocation Loc) {
   QualType CaptureType;
   QualType DeclRefType;
   return !tryCaptureVariable(Var, Loc, TryCapture_Implicit, SourceLocation(),
                              /*BuildAndDiagnose=*/false, CaptureType,
                              DeclRefType, nullptr);
 }
 
 QualType Sema::getCapturedDeclRefType(VarDecl *Var, SourceLocation Loc) {
   QualType CaptureType;
   QualType DeclRefType;
   
   // Determine whether we can capture this variable.
   if (tryCaptureVariable(Var, Loc, TryCapture_Implicit, SourceLocation(),
                          /*BuildAndDiagnose=*/false, CaptureType, 
                          DeclRefType, nullptr))
     return QualType();
 
   return DeclRefType;
 }
 
 
 
 // If either the type of the variable or the initializer is dependent, 
 // return false. Otherwise, determine whether the variable is a constant
 // expression. Use this if you need to know if a variable that might or
 // might not be dependent is truly a constant expression.
 static inline bool IsVariableNonDependentAndAConstantExpression(VarDecl *Var, 
     ASTContext &Context) {
  
   if (Var->getType()->isDependentType()) 
     return false;
   const VarDecl *DefVD = nullptr;
   Var->getAnyInitializer(DefVD);
   if (!DefVD) 
     return false;
   EvaluatedStmt *Eval = DefVD->ensureEvaluatedStmt();
   Expr *Init = cast<Expr>(Eval->Value);
   if (Init->isValueDependent()) 
     return false;
   return IsVariableAConstantExpression(Var, Context); 
 }
 
 
 void Sema::UpdateMarkingForLValueToRValue(Expr *E) {
   // Per C++11 [basic.def.odr], a variable is odr-used "unless it is 
   // an object that satisfies the requirements for appearing in a
   // constant expression (5.19) and the lvalue-to-rvalue conversion (4.1)
   // is immediately applied."  This function handles the lvalue-to-rvalue
   // conversion part.
   MaybeODRUseExprs.erase(E->IgnoreParens());
   
   // If we are in a lambda, check if this DeclRefExpr or MemberExpr refers
   // to a variable that is a constant expression, and if so, identify it as
   // a reference to a variable that does not involve an odr-use of that 
   // variable. 
   if (LambdaScopeInfo *LSI = getCurLambda()) {
     Expr *SansParensExpr = E->IgnoreParens();
     VarDecl *Var = nullptr;
     if (DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(SansParensExpr)) 
       Var = dyn_cast<VarDecl>(DRE->getFoundDecl());
     else if (MemberExpr *ME = dyn_cast<MemberExpr>(SansParensExpr))
       Var = dyn_cast<VarDecl>(ME->getMemberDecl());
     
     if (Var && IsVariableNonDependentAndAConstantExpression(Var, Context)) 
       LSI->markVariableExprAsNonODRUsed(SansParensExpr);    
   }
 }
 
 ExprResult Sema::ActOnConstantExpression(ExprResult Res) {
   Res = CorrectDelayedTyposInExpr(Res);
 
   if (!Res.isUsable())
     return Res;
 
   // If a constant-expression is a reference to a variable where we delay
   // deciding whether it is an odr-use, just assume we will apply the
   // lvalue-to-rvalue conversion.  In the one case where this doesn't happen
   // (a non-type template argument), we have special handling anyway.
   UpdateMarkingForLValueToRValue(Res.get());
   return Res;
 }
 
 void Sema::CleanupVarDeclMarking() {
   for (Expr *E : MaybeODRUseExprs) {
     VarDecl *Var;
     SourceLocation Loc;
     if (DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E)) {
       Var = cast<VarDecl>(DRE->getDecl());
       Loc = DRE->getLocation();
     } else if (MemberExpr *ME = dyn_cast<MemberExpr>(E)) {
       Var = cast<VarDecl>(ME->getMemberDecl());
       Loc = ME->getMemberLoc();
     } else {
       llvm_unreachable("Unexpected expression");
     }
 
     MarkVarDeclODRUsed(Var, Loc, *this,
                        /*MaxFunctionScopeIndex Pointer*/ nullptr);
   }
 
   MaybeODRUseExprs.clear();
 }
 
 
 static void DoMarkVarDeclReferenced(Sema &SemaRef, SourceLocation Loc,
                                     VarDecl *Var, Expr *E) {
   assert((!E || isa<DeclRefExpr>(E) || isa<MemberExpr>(E)) &&
          "Invalid Expr argument to DoMarkVarDeclReferenced");
   Var->setReferenced();
 
   TemplateSpecializationKind TSK = Var->getTemplateSpecializationKind();
 
   bool OdrUseContext = isOdrUseContext(SemaRef);
   bool UsableInConstantExpr =
       Var->isUsableInConstantExpressions(SemaRef.Context);
   bool NeedDefinition =
       OdrUseContext || (isEvaluatableContext(SemaRef) && UsableInConstantExpr);
 
   VarTemplateSpecializationDecl *VarSpec =
       dyn_cast<VarTemplateSpecializationDecl>(Var);
   assert(!isa<VarTemplatePartialSpecializationDecl>(Var) &&
          "Can't instantiate a partial template specialization.");
 
   // If this might be a member specialization of a static data member, check
   // the specialization is visible. We already did the checks for variable
   // template specializations when we created them.
   if (NeedDefinition && TSK != TSK_Undeclared &&
       !isa<VarTemplateSpecializationDecl>(Var))
     SemaRef.checkSpecializationVisibility(Loc, Var);
 
   // Perform implicit instantiation of static data members, static data member
   // templates of class templates, and variable template specializations. Delay
   // instantiations of variable templates, except for those that could be used
   // in a constant expression.
   if (NeedDefinition && isTemplateInstantiation(TSK)) {
     // Per C++17 [temp.explicit]p10, we may instantiate despite an explicit
     // instantiation declaration if a variable is usable in a constant
     // expression (among other cases).
     bool TryInstantiating =
         TSK == TSK_ImplicitInstantiation ||
         (TSK == TSK_ExplicitInstantiationDeclaration && UsableInConstantExpr);
 
     if (TryInstantiating) {
       SourceLocation PointOfInstantiation = Var->getPointOfInstantiation();
       bool FirstInstantiation = PointOfInstantiation.isInvalid();
       if (FirstInstantiation) {
         PointOfInstantiation = Loc;
         Var->setTemplateSpecializationKind(TSK, PointOfInstantiation);
       }
 
       bool InstantiationDependent = false;
       bool IsNonDependent =
           VarSpec ? !TemplateSpecializationType::anyDependentTemplateArguments(
                         VarSpec->getTemplateArgsInfo(), InstantiationDependent)
                   : true;
 
       // Do not instantiate specializations that are still type-dependent.
       if (IsNonDependent) {
         if (UsableInConstantExpr) {
           // Do not defer instantiations of variables that could be used in a
           // constant expression.
           SemaRef.InstantiateVariableDefinition(PointOfInstantiation, Var);
         } else if (FirstInstantiation ||
                    isa<VarTemplateSpecializationDecl>(Var)) {
           // FIXME: For a specialization of a variable template, we don't
           // distinguish between "declaration and type implicitly instantiated"
           // and "implicit instantiation of definition requested", so we have
           // no direct way to avoid enqueueing the pending instantiation
           // multiple times.
           SemaRef.PendingInstantiations
               .push_back(std::make_pair(Var, PointOfInstantiation));
         }
       }
     }
   }
 
   // Per C++11 [basic.def.odr], a variable is odr-used "unless it satisfies
   // the requirements for appearing in a constant expression (5.19) and, if
   // it is an object, the lvalue-to-rvalue conversion (4.1)
   // is immediately applied."  We check the first part here, and
   // Sema::UpdateMarkingForLValueToRValue deals with the second part.
   // Note that we use the C++11 definition everywhere because nothing in
   // C++03 depends on whether we get the C++03 version correct. The second
   // part does not apply to references, since they are not objects.
   if (OdrUseContext && E &&
       IsVariableAConstantExpression(Var, SemaRef.Context)) {
     // A reference initialized by a constant expression can never be
     // odr-used, so simply ignore it.
     if (!Var->getType()->isReferenceType() ||
         (SemaRef.LangOpts.OpenMP && SemaRef.IsOpenMPCapturedDecl(Var)))
       SemaRef.MaybeODRUseExprs.insert(E);
   } else if (OdrUseContext) {
     MarkVarDeclODRUsed(Var, Loc, SemaRef,
                        /*MaxFunctionScopeIndex ptr*/ nullptr);
   } else if (isOdrUseContext(SemaRef, /*SkipDependentUses*/false)) {
     // If this is a dependent context, we don't need to mark variables as
     // odr-used, but we may still need to track them for lambda capture.
     // FIXME: Do we also need to do this inside dependent typeid expressions
     // (which are modeled as unevaluated at this point)?
     const bool RefersToEnclosingScope =
         (SemaRef.CurContext != Var->getDeclContext() &&
          Var->getDeclContext()->isFunctionOrMethod() && Var->hasLocalStorage());
     if (RefersToEnclosingScope) {
       LambdaScopeInfo *const LSI =
           SemaRef.getCurLambda(/*IgnoreNonLambdaCapturingScope=*/true);
-      if (LSI && !LSI->CallOperator->Encloses(Var->getDeclContext())) {
+      if (LSI && (!LSI->CallOperator ||
+                  !LSI->CallOperator->Encloses(Var->getDeclContext()))) {
         // If a variable could potentially be odr-used, defer marking it so
         // until we finish analyzing the full expression for any
         // lvalue-to-rvalue
         // or discarded value conversions that would obviate odr-use.
         // Add it to the list of potential captures that will be analyzed
         // later (ActOnFinishFullExpr) for eventual capture and odr-use marking
         // unless the variable is a reference that was initialized by a constant
         // expression (this will never need to be captured or odr-used).
         assert(E && "Capture variable should be used in an expression.");
         if (!Var->getType()->isReferenceType() ||
             !IsVariableNonDependentAndAConstantExpression(Var, SemaRef.Context))
           LSI->addPotentialCapture(E->IgnoreParens());
       }
     }
   }
 }
 
 /// \brief Mark a variable referenced, and check whether it is odr-used
 /// (C++ [basic.def.odr]p2, C99 6.9p3).  Note that this should not be
 /// used directly for normal expressions referring to VarDecl.
 void Sema::MarkVariableReferenced(SourceLocation Loc, VarDecl *Var) {
   DoMarkVarDeclReferenced(*this, Loc, Var, nullptr);
 }
 
 static void MarkExprReferenced(Sema &SemaRef, SourceLocation Loc,
                                Decl *D, Expr *E, bool MightBeOdrUse) {
   if (SemaRef.isInOpenMPDeclareTargetContext())
     SemaRef.checkDeclIsAllowedInOpenMPTarget(E, D);
 
   if (VarDecl *Var = dyn_cast<VarDecl>(D)) {
     DoMarkVarDeclReferenced(SemaRef, Loc, Var, E);
     return;
   }
 
   SemaRef.MarkAnyDeclReferenced(Loc, D, MightBeOdrUse);
 
   // If this is a call to a method via a cast, also mark the method in the
   // derived class used in case codegen can devirtualize the call.
   const MemberExpr *ME = dyn_cast<MemberExpr>(E);
   if (!ME)
     return;
   CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(ME->getMemberDecl());
   if (!MD)
     return;
   // Only attempt to devirtualize if this is truly a virtual call.
   bool IsVirtualCall = MD->isVirtual() &&
                           ME->performsVirtualDispatch(SemaRef.getLangOpts());
   if (!IsVirtualCall)
     return;
 
   // If it's possible to devirtualize the call, mark the called function
   // referenced.
   CXXMethodDecl *DM = MD->getDevirtualizedMethod(
       ME->getBase(), SemaRef.getLangOpts().AppleKext);
   if (DM)
     SemaRef.MarkAnyDeclReferenced(Loc, DM, MightBeOdrUse);
 }
 
 /// \brief Perform reference-marking and odr-use handling for a DeclRefExpr.
 void Sema::MarkDeclRefReferenced(DeclRefExpr *E, const Expr *Base) {
   // TODO: update this with DR# once a defect report is filed.
   // C++11 defect. The address of a pure member should not be an ODR use, even
   // if it's a qualified reference.
   bool OdrUse = true;
   if (const CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(E->getDecl()))
     if (Method->isVirtual() &&
         !Method->getDevirtualizedMethod(Base, getLangOpts().AppleKext))
       OdrUse = false;
   MarkExprReferenced(*this, E->getLocation(), E->getDecl(), E, OdrUse);
 }
 
 /// \brief Perform reference-marking and odr-use handling for a MemberExpr.
 void Sema::MarkMemberReferenced(MemberExpr *E) {
   // C++11 [basic.def.odr]p2:
   //   A non-overloaded function whose name appears as a potentially-evaluated
   //   expression or a member of a set of candidate functions, if selected by
   //   overload resolution when referred to from a potentially-evaluated
   //   expression, is odr-used, unless it is a pure virtual function and its
   //   name is not explicitly qualified.
   bool MightBeOdrUse = true;
   if (E->performsVirtualDispatch(getLangOpts())) {
     if (CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(E->getMemberDecl()))
       if (Method->isPure())
         MightBeOdrUse = false;
   }
   SourceLocation Loc = E->getMemberLoc().isValid() ?
                             E->getMemberLoc() : E->getLocStart();
   MarkExprReferenced(*this, Loc, E->getMemberDecl(), E, MightBeOdrUse);
 }
 
 /// \brief Perform marking for a reference to an arbitrary declaration.  It
 /// marks the declaration referenced, and performs odr-use checking for
 /// functions and variables. This method should not be used when building a
 /// normal expression which refers to a variable.
 void Sema::MarkAnyDeclReferenced(SourceLocation Loc, Decl *D,
                                  bool MightBeOdrUse) {
   if (MightBeOdrUse) {
     if (auto *VD = dyn_cast<VarDecl>(D)) {
       MarkVariableReferenced(Loc, VD);
       return;
     }
   }
   if (auto *FD = dyn_cast<FunctionDecl>(D)) {
     MarkFunctionReferenced(Loc, FD, MightBeOdrUse);
     return;
   }
   D->setReferenced();
 }
 
 namespace {
   // Mark all of the declarations used by a type as referenced.
   // FIXME: Not fully implemented yet! We need to have a better understanding
   // of when we're entering a context we should not recurse into.
   // FIXME: This is and EvaluatedExprMarker are more-or-less equivalent to
   // TreeTransforms rebuilding the type in a new context. Rather than
   // duplicating the TreeTransform logic, we should consider reusing it here.
   // Currently that causes problems when rebuilding LambdaExprs.
   class MarkReferencedDecls : public RecursiveASTVisitor<MarkReferencedDecls> {
     Sema &S;
     SourceLocation Loc;
 
   public:
     typedef RecursiveASTVisitor<MarkReferencedDecls> Inherited;
 
     MarkReferencedDecls(Sema &S, SourceLocation Loc) : S(S), Loc(Loc) { }
 
     bool TraverseTemplateArgument(const TemplateArgument &Arg);
   };
 }
 
 bool MarkReferencedDecls::TraverseTemplateArgument(
     const TemplateArgument &Arg) {
   {
     // A non-type template argument is a constant-evaluated context.
     EnterExpressionEvaluationContext Evaluated(
         S, Sema::ExpressionEvaluationContext::ConstantEvaluated);
     if (Arg.getKind() == TemplateArgument::Declaration) {
       if (Decl *D = Arg.getAsDecl())
         S.MarkAnyDeclReferenced(Loc, D, true);
     } else if (Arg.getKind() == TemplateArgument::Expression) {
       S.MarkDeclarationsReferencedInExpr(Arg.getAsExpr(), false);
     }
   }
 
   return Inherited::TraverseTemplateArgument(Arg);
 }
 
 void Sema::MarkDeclarationsReferencedInType(SourceLocation Loc, QualType T) {
   MarkReferencedDecls Marker(*this, Loc);
   Marker.TraverseType(T);
 }
 
 namespace {
   /// \brief Helper class that marks all of the declarations referenced by
   /// potentially-evaluated subexpressions as "referenced".
   class EvaluatedExprMarker : public EvaluatedExprVisitor<EvaluatedExprMarker> {
     Sema &S;
     bool SkipLocalVariables;
     
   public:
     typedef EvaluatedExprVisitor<EvaluatedExprMarker> Inherited;
     
     EvaluatedExprMarker(Sema &S, bool SkipLocalVariables) 
       : Inherited(S.Context), S(S), SkipLocalVariables(SkipLocalVariables) { }
     
     void VisitDeclRefExpr(DeclRefExpr *E) {
       // If we were asked not to visit local variables, don't.
       if (SkipLocalVariables) {
         if (VarDecl *VD = dyn_cast<VarDecl>(E->getDecl()))
           if (VD->hasLocalStorage())
             return;
       }
       
       S.MarkDeclRefReferenced(E);
     }
 
     void VisitMemberExpr(MemberExpr *E) {
       S.MarkMemberReferenced(E);
       Inherited::VisitMemberExpr(E);
     }
     
     void VisitCXXBindTemporaryExpr(CXXBindTemporaryExpr *E) {
       S.MarkFunctionReferenced(E->getLocStart(),
             const_cast<CXXDestructorDecl*>(E->getTemporary()->getDestructor()));
       Visit(E->getSubExpr());
     }
     
     void VisitCXXNewExpr(CXXNewExpr *E) {
       if (E->getOperatorNew())
         S.MarkFunctionReferenced(E->getLocStart(), E->getOperatorNew());
       if (E->getOperatorDelete())
         S.MarkFunctionReferenced(E->getLocStart(), E->getOperatorDelete());
       Inherited::VisitCXXNewExpr(E);
     }
 
     void VisitCXXDeleteExpr(CXXDeleteExpr *E) {
       if (E->getOperatorDelete())
         S.MarkFunctionReferenced(E->getLocStart(), E->getOperatorDelete());
       QualType Destroyed = S.Context.getBaseElementType(E->getDestroyedType());
       if (const RecordType *DestroyedRec = Destroyed->getAs<RecordType>()) {
         CXXRecordDecl *Record = cast<CXXRecordDecl>(DestroyedRec->getDecl());
         S.MarkFunctionReferenced(E->getLocStart(), 
                                     S.LookupDestructor(Record));
       }
       
       Inherited::VisitCXXDeleteExpr(E);
     }
     
     void VisitCXXConstructExpr(CXXConstructExpr *E) {
       S.MarkFunctionReferenced(E->getLocStart(), E->getConstructor());
       Inherited::VisitCXXConstructExpr(E);
     }
     
     void VisitCXXDefaultArgExpr(CXXDefaultArgExpr *E) {
       Visit(E->getExpr());
     }
 
     void VisitImplicitCastExpr(ImplicitCastExpr *E) {
       Inherited::VisitImplicitCastExpr(E);
 
       if (E->getCastKind() == CK_LValueToRValue)
         S.UpdateMarkingForLValueToRValue(E->getSubExpr());
     }
   };
 }
 
 /// \brief Mark any declarations that appear within this expression or any
 /// potentially-evaluated subexpressions as "referenced".
 ///
 /// \param SkipLocalVariables If true, don't mark local variables as 
 /// 'referenced'.
 void Sema::MarkDeclarationsReferencedInExpr(Expr *E, 
                                             bool SkipLocalVariables) {
   EvaluatedExprMarker(*this, SkipLocalVariables).Visit(E);
 }
 
 /// \brief Emit a diagnostic that describes an effect on the run-time behavior
 /// of the program being compiled.
 ///
 /// This routine emits the given diagnostic when the code currently being
 /// type-checked is "potentially evaluated", meaning that there is a
 /// possibility that the code will actually be executable. Code in sizeof()
 /// expressions, code used only during overload resolution, etc., are not
 /// potentially evaluated. This routine will suppress such diagnostics or,
 /// in the absolutely nutty case of potentially potentially evaluated
 /// expressions (C++ typeid), queue the diagnostic to potentially emit it
 /// later.
 ///
 /// This routine should be used for all diagnostics that describe the run-time
 /// behavior of a program, such as passing a non-POD value through an ellipsis.
 /// Failure to do so will likely result in spurious diagnostics or failures
 /// during overload resolution or within sizeof/alignof/typeof/typeid.
 bool Sema::DiagRuntimeBehavior(SourceLocation Loc, const Stmt *Statement,
                                const PartialDiagnostic &PD) {
   switch (ExprEvalContexts.back().Context) {
   case ExpressionEvaluationContext::Unevaluated:
   case ExpressionEvaluationContext::UnevaluatedList:
   case ExpressionEvaluationContext::UnevaluatedAbstract:
   case ExpressionEvaluationContext::DiscardedStatement:
     // The argument will never be evaluated, so don't complain.
     break;
 
   case ExpressionEvaluationContext::ConstantEvaluated:
     // Relevant diagnostics should be produced by constant evaluation.
     break;
 
   case ExpressionEvaluationContext::PotentiallyEvaluated:
   case ExpressionEvaluationContext::PotentiallyEvaluatedIfUsed:
     if (Statement && getCurFunctionOrMethodDecl()) {
       FunctionScopes.back()->PossiblyUnreachableDiags.
         push_back(sema::PossiblyUnreachableDiag(PD, Loc, Statement));
       return true;
     }
 
     // The initializer of a constexpr variable or of the first declaration of a
     // static data member is not syntactically a constant evaluated constant,
     // but nonetheless is always required to be a constant expression, so we
     // can skip diagnosing.
     // FIXME: Using the mangling context here is a hack.
     if (auto *VD = dyn_cast_or_null<VarDecl>(
             ExprEvalContexts.back().ManglingContextDecl)) {
       if (VD->isConstexpr() ||
           (VD->isStaticDataMember() && VD->isFirstDecl() && !VD->isInline()))
         break;
       // FIXME: For any other kind of variable, we should build a CFG for its
       // initializer and check whether the context in question is reachable.
     }
 
     Diag(Loc, PD);
     return true;
   }
 
   return false;
 }
 
 bool Sema::CheckCallReturnType(QualType ReturnType, SourceLocation Loc,
                                CallExpr *CE, FunctionDecl *FD) {
   if (ReturnType->isVoidType() || !ReturnType->isIncompleteType())
     return false;
 
   // If we're inside a decltype's expression, don't check for a valid return
   // type or construct temporaries until we know whether this is the last call.
   if (ExprEvalContexts.back().IsDecltype) {
     ExprEvalContexts.back().DelayedDecltypeCalls.push_back(CE);
     return false;
   }
 
   class CallReturnIncompleteDiagnoser : public TypeDiagnoser {
     FunctionDecl *FD;
     CallExpr *CE;
     
   public:
     CallReturnIncompleteDiagnoser(FunctionDecl *FD, CallExpr *CE)
       : FD(FD), CE(CE) { }
 
     void diagnose(Sema &S, SourceLocation Loc, QualType T) override {
       if (!FD) {
         S.Diag(Loc, diag::err_call_incomplete_return)
           << T << CE->getSourceRange();
         return;
       }
       
       S.Diag(Loc, diag::err_call_function_incomplete_return)
         << CE->getSourceRange() << FD->getDeclName() << T;
       S.Diag(FD->getLocation(), diag::note_entity_declared_at)
           << FD->getDeclName();
     }
   } Diagnoser(FD, CE);
   
   if (RequireCompleteType(Loc, ReturnType, Diagnoser))
     return true;
 
   return false;
 }
 
 // Diagnose the s/=/==/ and s/\|=/!=/ typos. Note that adding parentheses
 // will prevent this condition from triggering, which is what we want.
 void Sema::DiagnoseAssignmentAsCondition(Expr *E) {
   SourceLocation Loc;
 
   unsigned diagnostic = diag::warn_condition_is_assignment;
   bool IsOrAssign = false;
 
   if (BinaryOperator *Op = dyn_cast<BinaryOperator>(E)) {
     if (Op->getOpcode() != BO_Assign && Op->getOpcode() != BO_OrAssign)
       return;
 
     IsOrAssign = Op->getOpcode() == BO_OrAssign;
 
     // Greylist some idioms by putting them into a warning subcategory.
     if (ObjCMessageExpr *ME
           = dyn_cast<ObjCMessageExpr>(Op->getRHS()->IgnoreParenCasts())) {
       Selector Sel = ME->getSelector();
 
       // self = [<foo> init...]
       if (isSelfExpr(Op->getLHS()) && ME->getMethodFamily() == OMF_init)
         diagnostic = diag::warn_condition_is_idiomatic_assignment;
 
       // <foo> = [<bar> nextObject]
       else if (Sel.isUnarySelector() && Sel.getNameForSlot(0) == "nextObject")
         diagnostic = diag::warn_condition_is_idiomatic_assignment;
     }
 
     Loc = Op->getOperatorLoc();
   } else if (CXXOperatorCallExpr *Op = dyn_cast<CXXOperatorCallExpr>(E)) {
     if (Op->getOperator() != OO_Equal && Op->getOperator() != OO_PipeEqual)
       return;
 
     IsOrAssign = Op->getOperator() == OO_PipeEqual;
     Loc = Op->getOperatorLoc();
   } else if (PseudoObjectExpr *POE = dyn_cast<PseudoObjectExpr>(E))
     return DiagnoseAssignmentAsCondition(POE->getSyntacticForm());
   else {
     // Not an assignment.
     return;
   }
 
   Diag(Loc, diagnostic) << E->getSourceRange();
 
   SourceLocation Open = E->getLocStart();
   SourceLocation Close = getLocForEndOfToken(E->getSourceRange().getEnd());
   Diag(Loc, diag::note_condition_assign_silence)
         << FixItHint::CreateInsertion(Open, "(")
         << FixItHint::CreateInsertion(Close, ")");
 
   if (IsOrAssign)
     Diag(Loc, diag::note_condition_or_assign_to_comparison)
       << FixItHint::CreateReplacement(Loc, "!=");
   else
     Diag(Loc, diag::note_condition_assign_to_comparison)
       << FixItHint::CreateReplacement(Loc, "==");
 }
 
 /// \brief Redundant parentheses over an equality comparison can indicate
 /// that the user intended an assignment used as condition.
 void Sema::DiagnoseEqualityWithExtraParens(ParenExpr *ParenE) {
   // Don't warn if the parens came from a macro.
   SourceLocation parenLoc = ParenE->getLocStart();
   if (parenLoc.isInvalid() || parenLoc.isMacroID())
     return;
   // Don't warn for dependent expressions.
   if (ParenE->isTypeDependent())
     return;
 
   Expr *E = ParenE->IgnoreParens();
 
   if (BinaryOperator *opE = dyn_cast<BinaryOperator>(E))
     if (opE->getOpcode() == BO_EQ &&
         opE->getLHS()->IgnoreParenImpCasts()->isModifiableLvalue(Context)
                                                            == Expr::MLV_Valid) {
       SourceLocation Loc = opE->getOperatorLoc();
       
       Diag(Loc, diag::warn_equality_with_extra_parens) << E->getSourceRange();
       SourceRange ParenERange = ParenE->getSourceRange();
       Diag(Loc, diag::note_equality_comparison_silence)
         << FixItHint::CreateRemoval(ParenERange.getBegin())
         << FixItHint::CreateRemoval(ParenERange.getEnd());
       Diag(Loc, diag::note_equality_comparison_to_assign)
         << FixItHint::CreateReplacement(Loc, "=");
     }
 }
 
 ExprResult Sema::CheckBooleanCondition(SourceLocation Loc, Expr *E,
                                        bool IsConstexpr) {
   DiagnoseAssignmentAsCondition(E);
   if (ParenExpr *parenE = dyn_cast<ParenExpr>(E))
     DiagnoseEqualityWithExtraParens(parenE);
 
   ExprResult result = CheckPlaceholderExpr(E);
   if (result.isInvalid()) return ExprError();
   E = result.get();
 
   if (!E->isTypeDependent()) {
     if (getLangOpts().CPlusPlus)
       return CheckCXXBooleanCondition(E, IsConstexpr); // C++ 6.4p4
 
     ExprResult ERes = DefaultFunctionArrayLvalueConversion(E);
     if (ERes.isInvalid())
       return ExprError();
     E = ERes.get();
 
     QualType T = E->getType();
     if (!T->isScalarType()) { // C99 6.8.4.1p1
       Diag(Loc, diag::err_typecheck_statement_requires_scalar)
         << T << E->getSourceRange();
       return ExprError();
     }
     CheckBoolLikeConversion(E, Loc);
   }
 
   return E;
 }
 
 Sema::ConditionResult Sema::ActOnCondition(Scope *S, SourceLocation Loc,
                                            Expr *SubExpr, ConditionKind CK) {
   // Empty conditions are valid in for-statements.
   if (!SubExpr)
     return ConditionResult();
 
   ExprResult Cond;
   switch (CK) {
   case ConditionKind::Boolean:
     Cond = CheckBooleanCondition(Loc, SubExpr);
     break;
 
   case ConditionKind::ConstexprIf:
     Cond = CheckBooleanCondition(Loc, SubExpr, true);
     break;
 
   case ConditionKind::Switch:
     Cond = CheckSwitchCondition(Loc, SubExpr);
     break;
   }
   if (Cond.isInvalid())
     return ConditionError();
 
   // FIXME: FullExprArg doesn't have an invalid bit, so check nullness instead.
   FullExprArg FullExpr = MakeFullExpr(Cond.get(), Loc);
   if (!FullExpr.get())
     return ConditionError();
 
   return ConditionResult(*this, nullptr, FullExpr,
                          CK == ConditionKind::ConstexprIf);
 }
 
 namespace {
   /// A visitor for rebuilding a call to an __unknown_any expression
   /// to have an appropriate type.
   struct RebuildUnknownAnyFunction
     : StmtVisitor<RebuildUnknownAnyFunction, ExprResult> {
 
     Sema &S;
 
     RebuildUnknownAnyFunction(Sema &S) : S(S) {}
 
     ExprResult VisitStmt(Stmt *S) {
       llvm_unreachable("unexpected statement!");
     }
 
     ExprResult VisitExpr(Expr *E) {
       S.Diag(E->getExprLoc(), diag::err_unsupported_unknown_any_call)
         << E->getSourceRange();
       return ExprError();
     }
 
     /// Rebuild an expression which simply semantically wraps another
     /// expression which it shares the type and value kind of.
     template <class T> ExprResult rebuildSugarExpr(T *E) {
       ExprResult SubResult = Visit(E->getSubExpr());
       if (SubResult.isInvalid()) return ExprError();
 
       Expr *SubExpr = SubResult.get();
       E->setSubExpr(SubExpr);
       E->setType(SubExpr->getType());
       E->setValueKind(SubExpr->getValueKind());
       assert(E->getObjectKind() == OK_Ordinary);
       return E;
     }
 
     ExprResult VisitParenExpr(ParenExpr *E) {
       return rebuildSugarExpr(E);
     }
 
     ExprResult VisitUnaryExtension(UnaryOperator *E) {
       return rebuildSugarExpr(E);
     }
 
     ExprResult VisitUnaryAddrOf(UnaryOperator *E) {
       ExprResult SubResult = Visit(E->getSubExpr());
       if (SubResult.isInvalid()) return ExprError();
 
       Expr *SubExpr = SubResult.get();
       E->setSubExpr(SubExpr);
       E->setType(S.Context.getPointerType(SubExpr->getType()));
       assert(E->getValueKind() == VK_RValue);
       assert(E->getObjectKind() == OK_Ordinary);
       return E;
     }
 
     ExprResult resolveDecl(Expr *E, ValueDecl *VD) {
       if (!isa<FunctionDecl>(VD)) return VisitExpr(E);
 
       E->setType(VD->getType());
 
       assert(E->getValueKind() == VK_RValue);
       if (S.getLangOpts().CPlusPlus &&
           !(isa<CXXMethodDecl>(VD) &&
             cast<CXXMethodDecl>(VD)->isInstance()))
         E->setValueKind(VK_LValue);
 
       return E;
     }
 
     ExprResult VisitMemberExpr(MemberExpr *E) {
       return resolveDecl(E, E->getMemberDecl());
     }
 
     ExprResult VisitDeclRefExpr(DeclRefExpr *E) {
       return resolveDecl(E, E->getDecl());
     }
   };
 }
 
 /// Given a function expression of unknown-any type, try to rebuild it
 /// to have a function type.
 static ExprResult rebuildUnknownAnyFunction(Sema &S, Expr *FunctionExpr) {
   ExprResult Result = RebuildUnknownAnyFunction(S).Visit(FunctionExpr);
   if (Result.isInvalid()) return ExprError();
   return S.DefaultFunctionArrayConversion(Result.get());
 }
 
 namespace {
   /// A visitor for rebuilding an expression of type __unknown_anytype
   /// into one which resolves the type directly on the referring
   /// expression.  Strict preservation of the original source
   /// structure is not a goal.
   struct RebuildUnknownAnyExpr
     : StmtVisitor<RebuildUnknownAnyExpr, ExprResult> {
 
     Sema &S;
 
     /// The current destination type.
     QualType DestType;
 
     RebuildUnknownAnyExpr(Sema &S, QualType CastType)
       : S(S), DestType(CastType) {}
 
     ExprResult VisitStmt(Stmt *S) {
       llvm_unreachable("unexpected statement!");
     }
 
     ExprResult VisitExpr(Expr *E) {
       S.Diag(E->getExprLoc(), diag::err_unsupported_unknown_any_expr)
         << E->getSourceRange();
       return ExprError();
     }
 
     ExprResult VisitCallExpr(CallExpr *E);
     ExprResult VisitObjCMessageExpr(ObjCMessageExpr *E);
 
     /// Rebuild an expression which simply semantically wraps another
     /// expression which it shares the type and value kind of.
     template <class T> ExprResult rebuildSugarExpr(T *E) {
       ExprResult SubResult = Visit(E->getSubExpr());
       if (SubResult.isInvalid()) return ExprError();
       Expr *SubExpr = SubResult.get();
       E->setSubExpr(SubExpr);
       E->setType(SubExpr->getType());
       E->setValueKind(SubExpr->getValueKind());
       assert(E->getObjectKind() == OK_Ordinary);
       return E;
     }
 
     ExprResult VisitParenExpr(ParenExpr *E) {
       return rebuildSugarExpr(E);
     }
 
     ExprResult VisitUnaryExtension(UnaryOperator *E) {
       return rebuildSugarExpr(E);
     }
 
     ExprResult VisitUnaryAddrOf(UnaryOperator *E) {
       const PointerType *Ptr = DestType->getAs<PointerType>();
       if (!Ptr) {
         S.Diag(E->getOperatorLoc(), diag::err_unknown_any_addrof)
           << E->getSourceRange();
         return ExprError();
       }
 
       if (isa<CallExpr>(E->getSubExpr())) {
         S.Diag(E->getOperatorLoc(), diag::err_unknown_any_addrof_call)
           << E->getSourceRange();
         return ExprError();
       }
 
       assert(E->getValueKind() == VK_RValue);
       assert(E->getObjectKind() == OK_Ordinary);
       E->setType(DestType);
 
       // Build the sub-expression as if it were an object of the pointee type.
       DestType = Ptr->getPointeeType();
       ExprResult SubResult = Visit(E->getSubExpr());
       if (SubResult.isInvalid()) return ExprError();
       E->setSubExpr(SubResult.get());
       return E;
     }
 
     ExprResult VisitImplicitCastExpr(ImplicitCastExpr *E);
 
     ExprResult resolveDecl(Expr *E, ValueDecl *VD);
 
     ExprResult VisitMemberExpr(MemberExpr *E) {
       return resolveDecl(E, E->getMemberDecl());
     }
 
     ExprResult VisitDeclRefExpr(DeclRefExpr *E) {
       return resolveDecl(E, E->getDecl());
     }
   };
 }
 
 /// Rebuilds a call expression which yielded __unknown_anytype.
 ExprResult RebuildUnknownAnyExpr::VisitCallExpr(CallExpr *E) {
   Expr *CalleeExpr = E->getCallee();
 
   enum FnKind {
     FK_MemberFunction,
     FK_FunctionPointer,
     FK_BlockPointer
   };
 
   FnKind Kind;
   QualType CalleeType = CalleeExpr->getType();
   if (CalleeType == S.Context.BoundMemberTy) {
     assert(isa<CXXMemberCallExpr>(E) || isa<CXXOperatorCallExpr>(E));
     Kind = FK_MemberFunction;
     CalleeType = Expr::findBoundMemberType(CalleeExpr);
   } else if (const PointerType *Ptr = CalleeType->getAs<PointerType>()) {
     CalleeType = Ptr->getPointeeType();
     Kind = FK_FunctionPointer;
   } else {
     CalleeType = CalleeType->castAs<BlockPointerType>()->getPointeeType();
     Kind = FK_BlockPointer;
   }
   const FunctionType *FnType = CalleeType->castAs<FunctionType>();
 
   // Verify that this is a legal result type of a function.
   if (DestType->isArrayType() || DestType->isFunctionType()) {
     unsigned diagID = diag::err_func_returning_array_function;
     if (Kind == FK_BlockPointer)
       diagID = diag::err_block_returning_array_function;
 
     S.Diag(E->getExprLoc(), diagID)
       << DestType->isFunctionType() << DestType;
     return ExprError();
   }
 
   // Otherwise, go ahead and set DestType as the call's result.
   E->setType(DestType.getNonLValueExprType(S.Context));
   E->setValueKind(Expr::getValueKindForType(DestType));
   assert(E->getObjectKind() == OK_Ordinary);
 
   // Rebuild the function type, replacing the result type with DestType.
   const FunctionProtoType *Proto = dyn_cast<FunctionProtoType>(FnType);
   if (Proto) {
     // __unknown_anytype(...) is a special case used by the debugger when
     // it has no idea what a function's signature is.
     //
     // We want to build this call essentially under the K&R
     // unprototyped rules, but making a FunctionNoProtoType in C++
     // would foul up all sorts of assumptions.  However, we cannot
     // simply pass all arguments as variadic arguments, nor can we
     // portably just call the function under a non-variadic type; see
     // the comment on IR-gen's TargetInfo::isNoProtoCallVariadic.
     // However, it turns out that in practice it is generally safe to
     // call a function declared as "A foo(B,C,D);" under the prototype
     // "A foo(B,C,D,...);".  The only known exception is with the
     // Windows ABI, where any variadic function is implicitly cdecl
     // regardless of its normal CC.  Therefore we change the parameter
     // types to match the types of the arguments.
     //
     // This is a hack, but it is far superior to moving the
     // corresponding target-specific code from IR-gen to Sema/AST.
 
     ArrayRef<QualType> ParamTypes = Proto->getParamTypes();
     SmallVector<QualType, 8> ArgTypes;
     if (ParamTypes.empty() && Proto->isVariadic()) { // the special case
       ArgTypes.reserve(E->getNumArgs());
       for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
         Expr *Arg = E->getArg(i);
         QualType ArgType = Arg->getType();
         if (E->isLValue()) {
           ArgType = S.Context.getLValueReferenceType(ArgType);
         } else if (E->isXValue()) {
           ArgType = S.Context.getRValueReferenceType(ArgType);
         }
         ArgTypes.push_back(ArgType);
       }
       ParamTypes = ArgTypes;
     }
     DestType = S.Context.getFunctionType(DestType, ParamTypes,
                                          Proto->getExtProtoInfo());
   } else {
     DestType = S.Context.getFunctionNoProtoType(DestType,
                                                 FnType->getExtInfo());
   }
 
   // Rebuild the appropriate pointer-to-function type.
   switch (Kind) { 
   case FK_MemberFunction:
     // Nothing to do.
     break;
 
   case FK_FunctionPointer:
     DestType = S.Context.getPointerType(DestType);
     break;
 
   case FK_BlockPointer:
     DestType = S.Context.getBlockPointerType(DestType);
     break;
   }
 
   // Finally, we can recurse.
   ExprResult CalleeResult = Visit(CalleeExpr);
   if (!CalleeResult.isUsable()) return ExprError();
   E->setCallee(CalleeResult.get());
 
   // Bind a temporary if necessary.
   return S.MaybeBindToTemporary(E);
 }
 
 ExprResult RebuildUnknownAnyExpr::VisitObjCMessageExpr(ObjCMessageExpr *E) {
   // Verify that this is a legal result type of a call.
   if (DestType->isArrayType() || DestType->isFunctionType()) {
     S.Diag(E->getExprLoc(), diag::err_func_returning_array_function)
       << DestType->isFunctionType() << DestType;
     return ExprError();
   }
 
   // Rewrite the method result type if available.
   if (ObjCMethodDecl *Method = E->getMethodDecl()) {
     assert(Method->getReturnType() == S.Context.UnknownAnyTy);
     Method->setReturnType(DestType);
   }
 
   // Change the type of the message.
   E->setType(DestType.getNonReferenceType());
   E->setValueKind(Expr::getValueKindForType(DestType));
 
   return S.MaybeBindToTemporary(E);
 }
 
 ExprResult RebuildUnknownAnyExpr::VisitImplicitCastExpr(ImplicitCastExpr *E) {
   // The only case we should ever see here is a function-to-pointer decay.
   if (E->getCastKind() == CK_FunctionToPointerDecay) {
     assert(E->getValueKind() == VK_RValue);
     assert(E->getObjectKind() == OK_Ordinary);
   
     E->setType(DestType);
   
     // Rebuild the sub-expression as the pointee (function) type.
     DestType = DestType->castAs<PointerType>()->getPointeeType();
   
     ExprResult Result = Visit(E->getSubExpr());
     if (!Result.isUsable()) return ExprError();
   
     E->setSubExpr(Result.get());
     return E;
   } else if (E->getCastKind() == CK_LValueToRValue) {
     assert(E->getValueKind() == VK_RValue);
     assert(E->getObjectKind() == OK_Ordinary);
 
     assert(isa<BlockPointerType>(E->getType()));
 
     E->setType(DestType);
 
     // The sub-expression has to be a lvalue reference, so rebuild it as such.
     DestType = S.Context.getLValueReferenceType(DestType);
 
     ExprResult Result = Visit(E->getSubExpr());
     if (!Result.isUsable()) return ExprError();
 
     E->setSubExpr(Result.get());
     return E;
   } else {
     llvm_unreachable("Unhandled cast type!");
   }
 }
 
 ExprResult RebuildUnknownAnyExpr::resolveDecl(Expr *E, ValueDecl *VD) {
   ExprValueKind ValueKind = VK_LValue;
   QualType Type = DestType;
 
   // We know how to make this work for certain kinds of decls:
 
   //  - functions
   if (FunctionDecl *FD = dyn_cast<FunctionDecl>(VD)) {
     if (const PointerType *Ptr = Type->getAs<PointerType>()) {
       DestType = Ptr->getPointeeType();
       ExprResult Result = resolveDecl(E, VD);
       if (Result.isInvalid()) return ExprError();
       return S.ImpCastExprToType(Result.get(), Type,
                                  CK_FunctionToPointerDecay, VK_RValue);
     }
 
     if (!Type->isFunctionType()) {
       S.Diag(E->getExprLoc(), diag::err_unknown_any_function)
         << VD << E->getSourceRange();
       return ExprError();
     }
     if (const FunctionProtoType *FT = Type->getAs<FunctionProtoType>()) {
       // We must match the FunctionDecl's type to the hack introduced in
       // RebuildUnknownAnyExpr::VisitCallExpr to vararg functions of unknown
       // type. See the lengthy commentary in that routine.
       QualType FDT = FD->getType();
       const FunctionType *FnType = FDT->castAs<FunctionType>();
       const FunctionProtoType *Proto = dyn_cast_or_null<FunctionProtoType>(FnType);
       DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E);
       if (DRE && Proto && Proto->getParamTypes().empty() && Proto->isVariadic()) {
         SourceLocation Loc = FD->getLocation();
         FunctionDecl *NewFD = FunctionDecl::Create(FD->getASTContext(),
                                       FD->getDeclContext(),
                                       Loc, Loc, FD->getNameInfo().getName(),
                                       DestType, FD->getTypeSourceInfo(),
                                       SC_None, false/*isInlineSpecified*/,
                                       FD->hasPrototype(),
                                       false/*isConstexprSpecified*/);
           
         if (FD->getQualifier())
           NewFD->setQualifierInfo(FD->getQualifierLoc());
 
         SmallVector<ParmVarDecl*, 16> Params;
         for (const auto &AI : FT->param_types()) {
           ParmVarDecl *Param =
             S.BuildParmVarDeclForTypedef(FD, Loc, AI);
           Param->setScopeInfo(0, Params.size());
           Params.push_back(Param);
         }
         NewFD->setParams(Params);
         DRE->setDecl(NewFD);
         VD = DRE->getDecl();
       }
     }
 
     if (CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(FD))
       if (MD->isInstance()) {
         ValueKind = VK_RValue;
         Type = S.Context.BoundMemberTy;
       }
 
     // Function references aren't l-values in C.
     if (!S.getLangOpts().CPlusPlus)
       ValueKind = VK_RValue;
 
   //  - variables
   } else if (isa<VarDecl>(VD)) {
     if (const ReferenceType *RefTy = Type->getAs<ReferenceType>()) {
       Type = RefTy->getPointeeType();
     } else if (Type->isFunctionType()) {
       S.Diag(E->getExprLoc(), diag::err_unknown_any_var_function_type)
         << VD << E->getSourceRange();
       return ExprError();
     }
 
   //  - nothing else
   } else {
     S.Diag(E->getExprLoc(), diag::err_unsupported_unknown_any_decl)
       << VD << E->getSourceRange();
     return ExprError();
   }
 
   // Modifying the declaration like this is friendly to IR-gen but
   // also really dangerous.
   VD->setType(DestType);
   E->setType(Type);
   E->setValueKind(ValueKind);
   return E;
 }
 
 /// Check a cast of an unknown-any type.  We intentionally only
 /// trigger this for C-style casts.
 ExprResult Sema::checkUnknownAnyCast(SourceRange TypeRange, QualType CastType,
                                      Expr *CastExpr, CastKind &CastKind,
                                      ExprValueKind &VK, CXXCastPath &Path) {
   // The type we're casting to must be either void or complete.
   if (!CastType->isVoidType() &&
       RequireCompleteType(TypeRange.getBegin(), CastType,
                           diag::err_typecheck_cast_to_incomplete))
     return ExprError();
 
   // Rewrite the casted expression from scratch.
   ExprResult result = RebuildUnknownAnyExpr(*this, CastType).Visit(CastExpr);
   if (!result.isUsable()) return ExprError();
 
   CastExpr = result.get();
   VK = CastExpr->getValueKind();
   CastKind = CK_NoOp;
 
   return CastExpr;
 }
 
 ExprResult Sema::forceUnknownAnyToType(Expr *E, QualType ToType) {
   return RebuildUnknownAnyExpr(*this, ToType).Visit(E);
 }
 
 ExprResult Sema::checkUnknownAnyArg(SourceLocation callLoc,
                                     Expr *arg, QualType &paramType) {
   // If the syntactic form of the argument is not an explicit cast of
   // any sort, just do default argument promotion.
   ExplicitCastExpr *castArg = dyn_cast<ExplicitCastExpr>(arg->IgnoreParens());
   if (!castArg) {
     ExprResult result = DefaultArgumentPromotion(arg);
     if (result.isInvalid()) return ExprError();
     paramType = result.get()->getType();
     return result;
   }
 
   // Otherwise, use the type that was written in the explicit cast.
   assert(!arg->hasPlaceholderType());
   paramType = castArg->getTypeAsWritten();
 
   // Copy-initialize a parameter of that type.
   InitializedEntity entity =
     InitializedEntity::InitializeParameter(Context, paramType,
                                            /*consumed*/ false);
   return PerformCopyInitialization(entity, callLoc, arg);
 }
 
 static ExprResult diagnoseUnknownAnyExpr(Sema &S, Expr *E) {
   Expr *orig = E;
   unsigned diagID = diag::err_uncasted_use_of_unknown_any;
   while (true) {
     E = E->IgnoreParenImpCasts();
     if (CallExpr *call = dyn_cast<CallExpr>(E)) {
       E = call->getCallee();
       diagID = diag::err_uncasted_call_of_unknown_any;
     } else {
       break;
     }
   }
 
   SourceLocation loc;
   NamedDecl *d;
   if (DeclRefExpr *ref = dyn_cast<DeclRefExpr>(E)) {
     loc = ref->getLocation();
     d = ref->getDecl();
   } else if (MemberExpr *mem = dyn_cast<MemberExpr>(E)) {
     loc = mem->getMemberLoc();
     d = mem->getMemberDecl();
   } else if (ObjCMessageExpr *msg = dyn_cast<ObjCMessageExpr>(E)) {
     diagID = diag::err_uncasted_call_of_unknown_any;
     loc = msg->getSelectorStartLoc();
     d = msg->getMethodDecl();
     if (!d) {
       S.Diag(loc, diag::err_uncasted_send_to_unknown_any_method)
         << static_cast<unsigned>(msg->isClassMessage()) << msg->getSelector()
         << orig->getSourceRange();
       return ExprError();
     }
   } else {
     S.Diag(E->getExprLoc(), diag::err_unsupported_unknown_any_expr)
       << E->getSourceRange();
     return ExprError();
   }
 
   S.Diag(loc, diagID) << d << orig->getSourceRange();
 
   // Never recoverable.
   return ExprError();
 }
 
 /// Check for operands with placeholder types and complain if found.
 /// Returns ExprError() if there was an error and no recovery was possible.
 ExprResult Sema::CheckPlaceholderExpr(Expr *E) {
   if (!getLangOpts().CPlusPlus) {
     // C cannot handle TypoExpr nodes on either side of a binop because it
     // doesn't handle dependent types properly, so make sure any TypoExprs have
     // been dealt with before checking the operands.
     ExprResult Result = CorrectDelayedTyposInExpr(E);
     if (!Result.isUsable()) return ExprError();
     E = Result.get();
   }
 
   const BuiltinType *placeholderType = E->getType()->getAsPlaceholderType();
   if (!placeholderType) return E;
 
   switch (placeholderType->getKind()) {
 
   // Overloaded expressions.
   case BuiltinType::Overload: {
     // Try to resolve a single function template specialization.
     // This is obligatory.
     ExprResult Result = E;
     if (ResolveAndFixSingleFunctionTemplateSpecialization(Result, false))
       return Result;
 
     // No guarantees that ResolveAndFixSingleFunctionTemplateSpecialization
     // leaves Result unchanged on failure.
     Result = E;
     if (resolveAndFixAddressOfOnlyViableOverloadCandidate(Result))
       return Result;
 
     // If that failed, try to recover with a call.
     tryToRecoverWithCall(Result, PDiag(diag::err_ovl_unresolvable),
                          /*complain*/ true);
     return Result;
   }
 
   // Bound member functions.
   case BuiltinType::BoundMember: {
     ExprResult result = E;
     const Expr *BME = E->IgnoreParens();
     PartialDiagnostic PD = PDiag(diag::err_bound_member_function);
     // Try to give a nicer diagnostic if it is a bound member that we recognize.
     if (isa<CXXPseudoDestructorExpr>(BME)) {
       PD = PDiag(diag::err_dtor_expr_without_call) << /*pseudo-destructor*/ 1;
     } else if (const auto *ME = dyn_cast<MemberExpr>(BME)) {
       if (ME->getMemberNameInfo().getName().getNameKind() ==
           DeclarationName::CXXDestructorName)
         PD = PDiag(diag::err_dtor_expr_without_call) << /*destructor*/ 0;
     }
     tryToRecoverWithCall(result, PD,
                          /*complain*/ true);
     return result;
   }
 
   // ARC unbridged casts.
   case BuiltinType::ARCUnbridgedCast: {
     Expr *realCast = stripARCUnbridgedCast(E);
     diagnoseARCUnbridgedCast(realCast);
     return realCast;
   }
 
   // Expressions of unknown type.
   case BuiltinType::UnknownAny:
     return diagnoseUnknownAnyExpr(*this, E);
 
   // Pseudo-objects.
   case BuiltinType::PseudoObject:
     return checkPseudoObjectRValue(E);
 
   case BuiltinType::BuiltinFn: {
     // Accept __noop without parens by implicitly converting it to a call expr.
     auto *DRE = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts());
     if (DRE) {
       auto *FD = cast<FunctionDecl>(DRE->getDecl());
       if (FD->getBuiltinID() == Builtin::BI__noop) {
         E = ImpCastExprToType(E, Context.getPointerType(FD->getType()),
                               CK_BuiltinFnToFnPtr).get();
         return new (Context) CallExpr(Context, E, None, Context.IntTy,
                                       VK_RValue, SourceLocation());
       }
     }
 
     Diag(E->getLocStart(), diag::err_builtin_fn_use);
     return ExprError();
   }
 
   // Expressions of unknown type.
   case BuiltinType::OMPArraySection:
     Diag(E->getLocStart(), diag::err_omp_array_section_use);
     return ExprError();
 
   // Everything else should be impossible.
 #define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
   case BuiltinType::Id:
 #include "clang/Basic/OpenCLImageTypes.def"
 #define BUILTIN_TYPE(Id, SingletonId) case BuiltinType::Id:
 #define PLACEHOLDER_TYPE(Id, SingletonId)
 #include "clang/AST/BuiltinTypes.def"
     break;
   }
 
   llvm_unreachable("invalid placeholder type!");
 }
 
 bool Sema::CheckCaseExpression(Expr *E) {
   if (E->isTypeDependent())
     return true;
   if (E->isValueDependent() || E->isIntegerConstantExpr(Context))
     return E->getType()->isIntegralOrEnumerationType();
   return false;
 }
 
 /// ActOnObjCBoolLiteral - Parse {__objc_yes,__objc_no} literals.
 ExprResult
 Sema::ActOnObjCBoolLiteral(SourceLocation OpLoc, tok::TokenKind Kind) {
   assert((Kind == tok::kw___objc_yes || Kind == tok::kw___objc_no) &&
          "Unknown Objective-C Boolean value!");
   QualType BoolT = Context.ObjCBuiltinBoolTy;
   if (!Context.getBOOLDecl()) {
     LookupResult Result(*this, &Context.Idents.get("BOOL"), OpLoc,
                         Sema::LookupOrdinaryName);
     if (LookupName(Result, getCurScope()) && Result.isSingleResult()) {
       NamedDecl *ND = Result.getFoundDecl();
       if (TypedefDecl *TD = dyn_cast<TypedefDecl>(ND)) 
         Context.setBOOLDecl(TD);
     }
   }
   if (Context.getBOOLDecl())
     BoolT = Context.getBOOLType();
   return new (Context)
       ObjCBoolLiteralExpr(Kind == tok::kw___objc_yes, BoolT, OpLoc);
 }
 
 ExprResult Sema::ActOnObjCAvailabilityCheckExpr(
     llvm::ArrayRef<AvailabilitySpec> AvailSpecs, SourceLocation AtLoc,
     SourceLocation RParen) {
 
   StringRef Platform = getASTContext().getTargetInfo().getPlatformName();
 
   auto Spec = std::find_if(AvailSpecs.begin(), AvailSpecs.end(),
                            [&](const AvailabilitySpec &Spec) {
                              return Spec.getPlatform() == Platform;
                            });
 
   VersionTuple Version;
   if (Spec != AvailSpecs.end())
     Version = Spec->getVersion();
 
   // The use of `@available` in the enclosing function should be analyzed to
   // warn when it's used inappropriately (i.e. not if(@available)).
   if (getCurFunctionOrMethodDecl())
     getEnclosingFunction()->HasPotentialAvailabilityViolations = true;
   else if (getCurBlock() || getCurLambda())
     getCurFunction()->HasPotentialAvailabilityViolations = true;
 
   return new (Context)
       ObjCAvailabilityCheckExpr(Version, AtLoc, RParen, Context.BoolTy);
 }
Index: head/contrib/llvm/tools/clang
===================================================================
--- head/contrib/llvm/tools/clang	(revision 329982)
+++ head/contrib/llvm/tools/clang	(revision 329983)

Property changes on: head/contrib/llvm/tools/clang
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /vendor/clang/dist-release_60:r329406-329940
Index: head/contrib/llvm/tools/lld/ELF/Driver.cpp
===================================================================
--- head/contrib/llvm/tools/lld/ELF/Driver.cpp	(revision 329982)
+++ head/contrib/llvm/tools/lld/ELF/Driver.cpp	(revision 329983)
@@ -1,1128 +1,1133 @@
 //===- Driver.cpp ---------------------------------------------------------===//
 //
 //                             The LLVM Linker
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // The driver drives the entire linking process. It is responsible for
 // parsing command line options and doing whatever it is instructed to do.
 //
 // One notable thing in the LLD's driver when compared to other linkers is
 // that the LLD's driver is agnostic on the host operating system.
 // Other linkers usually have implicit default values (such as a dynamic
 // linker path or library paths) for each host OS.
 //
 // I don't think implicit default values are useful because they are
 // usually explicitly specified by the compiler driver. They can even
 // be harmful when you are doing cross-linking. Therefore, in LLD, we
 // simply trust the compiler driver to pass all required options and
 // don't try to make effort on our side.
 //
 //===----------------------------------------------------------------------===//
 
 #include "Driver.h"
 #include "Config.h"
 #include "Filesystem.h"
 #include "ICF.h"
 #include "InputFiles.h"
 #include "InputSection.h"
 #include "LinkerScript.h"
 #include "OutputSections.h"
 #include "ScriptParser.h"
 #include "Strings.h"
 #include "SymbolTable.h"
 #include "Symbols.h"
 #include "SyntheticSections.h"
 #include "Target.h"
 #include "Writer.h"
 #include "lld/Common/Args.h"
 #include "lld/Common/Driver.h"
 #include "lld/Common/ErrorHandler.h"
 #include "lld/Common/Memory.h"
 #include "lld/Common/Threads.h"
 #include "lld/Common/Version.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compression.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/TarWriter.h"
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cstdlib>
 #include <utility>
 
 using namespace llvm;
 using namespace llvm::ELF;
 using namespace llvm::object;
 using namespace llvm::sys;
 
 using namespace lld;
 using namespace lld::elf;
 
 Configuration *elf::Config;
 LinkerDriver *elf::Driver;
 
 static void setConfigs();
 
 bool elf::link(ArrayRef<const char *> Args, bool CanExitEarly,
                raw_ostream &Error) {
   errorHandler().LogName = Args[0];
   errorHandler().ErrorLimitExceededMsg =
       "too many errors emitted, stopping now (use "
       "-error-limit=0 to see all errors)";
   errorHandler().ErrorOS = &Error;
   errorHandler().ColorDiagnostics = Error.has_colors();
   InputSections.clear();
   OutputSections.clear();
   Tar = nullptr;
   BinaryFiles.clear();
   BitcodeFiles.clear();
   ObjectFiles.clear();
   SharedFiles.clear();
 
   Config = make<Configuration>();
   Driver = make<LinkerDriver>();
   Script = make<LinkerScript>();
   Symtab = make<SymbolTable>();
   Config->Argv = {Args.begin(), Args.end()};
 
   Driver->main(Args, CanExitEarly);
 
   // Exit immediately if we don't need to return to the caller.
   // This saves time because the overhead of calling destructors
   // for all globally-allocated objects is not negligible.
   if (Config->ExitEarly)
     exitLld(errorCount() ? 1 : 0);
 
   freeArena();
   return !errorCount();
 }
 
 // Parses a linker -m option.
 static std::tuple<ELFKind, uint16_t, uint8_t> parseEmulation(StringRef Emul) {
   uint8_t OSABI = 0;
   StringRef S = Emul;
   if (S.endswith("_fbsd")) {
     S = S.drop_back(5);
     OSABI = ELFOSABI_FREEBSD;
   }
 
   std::pair<ELFKind, uint16_t> Ret =
       StringSwitch<std::pair<ELFKind, uint16_t>>(S)
           .Cases("aarch64elf", "aarch64linux", {ELF64LEKind, EM_AARCH64})
           .Cases("armelf", "armelf_linux_eabi", {ELF32LEKind, EM_ARM})
           .Case("elf32_x86_64", {ELF32LEKind, EM_X86_64})
           .Cases("elf32btsmip", "elf32btsmipn32", {ELF32BEKind, EM_MIPS})
           .Cases("elf32ltsmip", "elf32ltsmipn32", {ELF32LEKind, EM_MIPS})
           .Case("elf32ppc", {ELF32BEKind, EM_PPC})
           .Case("elf64btsmip", {ELF64BEKind, EM_MIPS})
           .Case("elf64ltsmip", {ELF64LEKind, EM_MIPS})
           .Case("elf64ppc", {ELF64BEKind, EM_PPC64})
           .Cases("elf_amd64", "elf_x86_64", {ELF64LEKind, EM_X86_64})
           .Case("elf_i386", {ELF32LEKind, EM_386})
           .Case("elf_iamcu", {ELF32LEKind, EM_IAMCU})
           .Default({ELFNoneKind, EM_NONE});
 
   if (Ret.first == ELFNoneKind)
     error("unknown emulation: " + Emul);
   return std::make_tuple(Ret.first, Ret.second, OSABI);
 }
 
 // Returns slices of MB by parsing MB as an archive file.
 // Each slice consists of a member file in the archive.
 std::vector<std::pair<MemoryBufferRef, uint64_t>> static getArchiveMembers(
     MemoryBufferRef MB) {
   std::unique_ptr<Archive> File =
       CHECK(Archive::create(MB),
             MB.getBufferIdentifier() + ": failed to parse archive");
 
   std::vector<std::pair<MemoryBufferRef, uint64_t>> V;
   Error Err = Error::success();
   bool AddToTar = File->isThin() && Tar;
   for (const ErrorOr<Archive::Child> &COrErr : File->children(Err)) {
     Archive::Child C =
         CHECK(COrErr, MB.getBufferIdentifier() +
                           ": could not get the child of the archive");
     MemoryBufferRef MBRef =
         CHECK(C.getMemoryBufferRef(),
               MB.getBufferIdentifier() +
                   ": could not get the buffer for a child of the archive");
     if (AddToTar)
       Tar->append(relativeToRoot(check(C.getFullName())), MBRef.getBuffer());
     V.push_back(std::make_pair(MBRef, C.getChildOffset()));
   }
   if (Err)
     fatal(MB.getBufferIdentifier() + ": Archive::children failed: " +
           toString(std::move(Err)));
 
   // Take ownership of memory buffers created for members of thin archives.
   for (std::unique_ptr<MemoryBuffer> &MB : File->takeThinBuffers())
     make<std::unique_ptr<MemoryBuffer>>(std::move(MB));
 
   return V;
 }
 
 // Opens a file and create a file object. Path has to be resolved already.
 void LinkerDriver::addFile(StringRef Path, bool WithLOption) {
   using namespace sys::fs;
 
   Optional<MemoryBufferRef> Buffer = readFile(Path);
   if (!Buffer.hasValue())
     return;
   MemoryBufferRef MBRef = *Buffer;
 
   if (InBinary) {
     Files.push_back(make<BinaryFile>(MBRef));
     return;
   }
 
   switch (identify_magic(MBRef.getBuffer())) {
   case file_magic::unknown:
     readLinkerScript(MBRef);
     return;
   case file_magic::archive: {
     // Handle -whole-archive.
     if (InWholeArchive) {
       for (const auto &P : getArchiveMembers(MBRef))
         Files.push_back(createObjectFile(P.first, Path, P.second));
       return;
     }
 
     std::unique_ptr<Archive> File =
         CHECK(Archive::create(MBRef), Path + ": failed to parse archive");
 
     // If an archive file has no symbol table, it is likely that a user
     // is attempting LTO and using a default ar command that doesn't
     // understand the LLVM bitcode file. It is a pretty common error, so
     // we'll handle it as if it had a symbol table.
     if (!File->isEmpty() && !File->hasSymbolTable()) {
       for (const auto &P : getArchiveMembers(MBRef))
         Files.push_back(make<LazyObjFile>(P.first, Path, P.second));
       return;
     }
 
     // Handle the regular case.
     Files.push_back(make<ArchiveFile>(std::move(File)));
     return;
   }
   case file_magic::elf_shared_object:
     if (Config->Relocatable) {
       error("attempted static link of dynamic object " + Path);
       return;
     }
 
     // DSOs usually have DT_SONAME tags in their ELF headers, and the
     // sonames are used to identify DSOs. But if they are missing,
     // they are identified by filenames. We don't know whether the new
     // file has a DT_SONAME or not because we haven't parsed it yet.
     // Here, we set the default soname for the file because we might
     // need it later.
     //
     // If a file was specified by -lfoo, the directory part is not
     // significant, as a user did not specify it. This behavior is
     // compatible with GNU.
     Files.push_back(
         createSharedFile(MBRef, WithLOption ? path::filename(Path) : Path));
     return;
   default:
     if (InLib)
       Files.push_back(make<LazyObjFile>(MBRef, "", 0));
     else
       Files.push_back(createObjectFile(MBRef));
   }
 }
 
 // Add a given library by searching it from input search paths.
 void LinkerDriver::addLibrary(StringRef Name) {
   if (Optional<std::string> Path = searchLibrary(Name))
     addFile(*Path, /*WithLOption=*/true);
   else
     error("unable to find library -l" + Name);
 }
 
 // This function is called on startup. We need this for LTO since
 // LTO calls LLVM functions to compile bitcode files to native code.
 // Technically this can be delayed until we read bitcode files, but
 // we don't bother to do lazily because the initialization is fast.
 static void initLLVM(opt::InputArgList &Args) {
   InitializeAllTargets();
   InitializeAllTargetMCs();
   InitializeAllAsmPrinters();
   InitializeAllAsmParsers();
 
   // Parse and evaluate -mllvm options.
   std::vector<const char *> V;
   V.push_back("lld (LLVM option parsing)");
   for (auto *Arg : Args.filtered(OPT_mllvm))
     V.push_back(Arg->getValue());
   cl::ParseCommandLineOptions(V.size(), V.data());
 }
 
 // Some command line options or some combinations of them are not allowed.
 // This function checks for such errors.
 static void checkOptions(opt::InputArgList &Args) {
   // The MIPS ABI as of 2016 does not support the GNU-style symbol lookup
   // table which is a relatively new feature.
   if (Config->EMachine == EM_MIPS && Config->GnuHash)
     error("the .gnu.hash section is not compatible with the MIPS target.");
 
   if (Config->FixCortexA53Errata843419 && Config->EMachine != EM_AARCH64)
     error("--fix-cortex-a53-843419 is only supported on AArch64 targets.");
 
   if (Config->Pie && Config->Shared)
     error("-shared and -pie may not be used together");
 
   if (!Config->Shared && !Config->FilterList.empty())
     error("-F may not be used without -shared");
 
   if (!Config->Shared && !Config->AuxiliaryList.empty())
     error("-f may not be used without -shared");
 
   if (!Config->Relocatable && !Config->DefineCommon)
     error("-no-define-common not supported in non relocatable output");
 
   if (Config->Relocatable) {
     if (Config->Shared)
       error("-r and -shared may not be used together");
     if (Config->GcSections)
       error("-r and --gc-sections may not be used together");
     if (Config->ICF)
       error("-r and --icf may not be used together");
     if (Config->Pie)
       error("-r and -pie may not be used together");
   }
 }
 
 static const char *getReproduceOption(opt::InputArgList &Args) {
   if (auto *Arg = Args.getLastArg(OPT_reproduce))
     return Arg->getValue();
   return getenv("LLD_REPRODUCE");
 }
 
 static bool hasZOption(opt::InputArgList &Args, StringRef Key) {
   for (auto *Arg : Args.filtered(OPT_z))
     if (Key == Arg->getValue())
       return true;
   return false;
 }
 
 void LinkerDriver::main(ArrayRef<const char *> ArgsArr, bool CanExitEarly) {
   ELFOptTable Parser;
   opt::InputArgList Args = Parser.parse(ArgsArr.slice(1));
 
   // Interpret this flag early because error() depends on them.
   errorHandler().ErrorLimit = args::getInteger(Args, OPT_error_limit, 20);
 
   // Handle -help
   if (Args.hasArg(OPT_help)) {
     printHelp(ArgsArr[0]);
     return;
   }
 
   // Handle -v or -version.
   //
   // A note about "compatible with GNU linkers" message: this is a hack for
   // scripts generated by GNU Libtool 2.4.6 (released in February 2014 and
   // still the newest version in March 2017) or earlier to recognize LLD as
   // a GNU compatible linker. As long as an output for the -v option
   // contains "GNU" or "with BFD", they recognize us as GNU-compatible.
   //
   // This is somewhat ugly hack, but in reality, we had no choice other
   // than doing this. Considering the very long release cycle of Libtool,
   // it is not easy to improve it to recognize LLD as a GNU compatible
   // linker in a timely manner. Even if we can make it, there are still a
   // lot of "configure" scripts out there that are generated by old version
   // of Libtool. We cannot convince every software developer to migrate to
   // the latest version and re-generate scripts. So we have this hack.
   if (Args.hasArg(OPT_v) || Args.hasArg(OPT_version))
     message(getLLDVersion() + " (compatible with GNU linkers)");
 
   // The behavior of -v or --version is a bit strange, but this is
   // needed for compatibility with GNU linkers.
   if (Args.hasArg(OPT_v) && !Args.hasArg(OPT_INPUT))
     return;
   if (Args.hasArg(OPT_version))
     return;
 
   Config->ExitEarly = CanExitEarly && !Args.hasArg(OPT_full_shutdown);
   errorHandler().ExitEarly = Config->ExitEarly;
 
   if (const char *Path = getReproduceOption(Args)) {
     // Note that --reproduce is a debug option so you can ignore it
     // if you are trying to understand the whole picture of the code.
     Expected<std::unique_ptr<TarWriter>> ErrOrWriter =
         TarWriter::create(Path, path::stem(Path));
     if (ErrOrWriter) {
       Tar = ErrOrWriter->get();
       Tar->append("response.txt", createResponseFile(Args));
       Tar->append("version.txt", getLLDVersion() + "\n");
       make<std::unique_ptr<TarWriter>>(std::move(*ErrOrWriter));
     } else {
       error(Twine("--reproduce: failed to open ") + Path + ": " +
             toString(ErrOrWriter.takeError()));
     }
   }
 
   readConfigs(Args);
   initLLVM(Args);
   createFiles(Args);
   inferMachineType();
   setConfigs();
   checkOptions(Args);
   if (errorCount())
     return;
 
   switch (Config->EKind) {
   case ELF32LEKind:
     link<ELF32LE>(Args);
     return;
   case ELF32BEKind:
     link<ELF32BE>(Args);
     return;
   case ELF64LEKind:
     link<ELF64LE>(Args);
     return;
   case ELF64BEKind:
     link<ELF64BE>(Args);
     return;
   default:
     llvm_unreachable("unknown Config->EKind");
   }
 }
 
 static std::string getRpath(opt::InputArgList &Args) {
   std::vector<StringRef> V = args::getStrings(Args, OPT_rpath);
   return llvm::join(V.begin(), V.end(), ":");
 }
 
 // Determines what we should do if there are remaining unresolved
 // symbols after the name resolution.
 static UnresolvedPolicy getUnresolvedSymbolPolicy(opt::InputArgList &Args) {
   if (Args.hasArg(OPT_relocatable))
     return UnresolvedPolicy::IgnoreAll;
 
   UnresolvedPolicy ErrorOrWarn = Args.hasFlag(OPT_error_unresolved_symbols,
                                               OPT_warn_unresolved_symbols, true)
                                      ? UnresolvedPolicy::ReportError
                                      : UnresolvedPolicy::Warn;
 
   // Process the last of -unresolved-symbols, -no-undefined or -z defs.
   for (auto *Arg : llvm::reverse(Args)) {
     switch (Arg->getOption().getID()) {
     case OPT_unresolved_symbols: {
       StringRef S = Arg->getValue();
       if (S == "ignore-all" || S == "ignore-in-object-files")
         return UnresolvedPolicy::Ignore;
       if (S == "ignore-in-shared-libs" || S == "report-all")
         return ErrorOrWarn;
       error("unknown --unresolved-symbols value: " + S);
       continue;
     }
     case OPT_no_undefined:
       return ErrorOrWarn;
     case OPT_z:
       if (StringRef(Arg->getValue()) == "defs")
         return ErrorOrWarn;
       continue;
     }
   }
 
   // -shared implies -unresolved-symbols=ignore-all because missing
   // symbols are likely to be resolved at runtime using other DSOs.
   if (Config->Shared)
     return UnresolvedPolicy::Ignore;
   return ErrorOrWarn;
 }
 
 static Target2Policy getTarget2(opt::InputArgList &Args) {
   StringRef S = Args.getLastArgValue(OPT_target2, "got-rel");
   if (S == "rel")
     return Target2Policy::Rel;
   if (S == "abs")
     return Target2Policy::Abs;
   if (S == "got-rel")
     return Target2Policy::GotRel;
   error("unknown --target2 option: " + S);
   return Target2Policy::GotRel;
 }
 
 static bool isOutputFormatBinary(opt::InputArgList &Args) {
   if (auto *Arg = Args.getLastArg(OPT_oformat)) {
     StringRef S = Arg->getValue();
     if (S == "binary")
       return true;
     error("unknown --oformat value: " + S);
   }
   return false;
 }
 
 static DiscardPolicy getDiscard(opt::InputArgList &Args) {
   if (Args.hasArg(OPT_relocatable))
     return DiscardPolicy::None;
 
   auto *Arg =
       Args.getLastArg(OPT_discard_all, OPT_discard_locals, OPT_discard_none);
   if (!Arg)
     return DiscardPolicy::Default;
   if (Arg->getOption().getID() == OPT_discard_all)
     return DiscardPolicy::All;
   if (Arg->getOption().getID() == OPT_discard_locals)
     return DiscardPolicy::Locals;
   return DiscardPolicy::None;
 }
 
 static StringRef getDynamicLinker(opt::InputArgList &Args) {
   auto *Arg = Args.getLastArg(OPT_dynamic_linker, OPT_no_dynamic_linker);
   if (!Arg || Arg->getOption().getID() == OPT_no_dynamic_linker)
     return "";
   return Arg->getValue();
 }
 
 static StripPolicy getStrip(opt::InputArgList &Args) {
   if (Args.hasArg(OPT_relocatable))
     return StripPolicy::None;
 
   auto *Arg = Args.getLastArg(OPT_strip_all, OPT_strip_debug);
   if (!Arg)
     return StripPolicy::None;
   if (Arg->getOption().getID() == OPT_strip_all)
     return StripPolicy::All;
   return StripPolicy::Debug;
 }
 
 static uint64_t parseSectionAddress(StringRef S, const opt::Arg &Arg) {
   uint64_t VA = 0;
   if (S.startswith("0x"))
     S = S.drop_front(2);
   if (!to_integer(S, VA, 16))
     error("invalid argument: " + toString(Arg));
   return VA;
 }
 
 static StringMap<uint64_t> getSectionStartMap(opt::InputArgList &Args) {
   StringMap<uint64_t> Ret;
   for (auto *Arg : Args.filtered(OPT_section_start)) {
     StringRef Name;
     StringRef Addr;
     std::tie(Name, Addr) = StringRef(Arg->getValue()).split('=');
     Ret[Name] = parseSectionAddress(Addr, *Arg);
   }
 
   if (auto *Arg = Args.getLastArg(OPT_Ttext))
     Ret[".text"] = parseSectionAddress(Arg->getValue(), *Arg);
   if (auto *Arg = Args.getLastArg(OPT_Tdata))
     Ret[".data"] = parseSectionAddress(Arg->getValue(), *Arg);
   if (auto *Arg = Args.getLastArg(OPT_Tbss))
     Ret[".bss"] = parseSectionAddress(Arg->getValue(), *Arg);
   return Ret;
 }
 
 static SortSectionPolicy getSortSection(opt::InputArgList &Args) {
   StringRef S = Args.getLastArgValue(OPT_sort_section);
   if (S == "alignment")
     return SortSectionPolicy::Alignment;
   if (S == "name")
     return SortSectionPolicy::Name;
   if (!S.empty())
     error("unknown --sort-section rule: " + S);
   return SortSectionPolicy::Default;
 }
 
 static OrphanHandlingPolicy getOrphanHandling(opt::InputArgList &Args) {
   StringRef S = Args.getLastArgValue(OPT_orphan_handling, "place");
   if (S == "warn")
     return OrphanHandlingPolicy::Warn;
   if (S == "error")
     return OrphanHandlingPolicy::Error;
   if (S != "place")
     error("unknown --orphan-handling mode: " + S);
   return OrphanHandlingPolicy::Place;
 }
 
 // Parse --build-id or --build-id=<style>. We handle "tree" as a
 // synonym for "sha1" because all our hash functions including
 // -build-id=sha1 are actually tree hashes for performance reasons.
 static std::pair<BuildIdKind, std::vector<uint8_t>>
 getBuildId(opt::InputArgList &Args) {
   auto *Arg = Args.getLastArg(OPT_build_id, OPT_build_id_eq);
   if (!Arg)
     return {BuildIdKind::None, {}};
 
   if (Arg->getOption().getID() == OPT_build_id)
     return {BuildIdKind::Fast, {}};
 
   StringRef S = Arg->getValue();
   if (S == "md5")
     return {BuildIdKind::Md5, {}};
   if (S == "sha1" || S == "tree")
     return {BuildIdKind::Sha1, {}};
   if (S == "uuid")
     return {BuildIdKind::Uuid, {}};
   if (S.startswith("0x"))
     return {BuildIdKind::Hexstring, parseHex(S.substr(2))};
 
   if (S != "none")
     error("unknown --build-id style: " + S);
   return {BuildIdKind::None, {}};
 }
 
 static bool getCompressDebugSections(opt::InputArgList &Args) {
   StringRef S = Args.getLastArgValue(OPT_compress_debug_sections, "none");
   if (S == "none")
     return false;
   if (S != "zlib")
     error("unknown --compress-debug-sections value: " + S);
   if (!zlib::isAvailable())
     error("--compress-debug-sections: zlib is not available");
   return true;
 }
 
 static int parseInt(StringRef S, opt::Arg *Arg) {
   int V = 0;
   if (!to_integer(S, V, 10))
     error(Arg->getSpelling() + ": number expected, but got '" + S + "'");
   return V;
 }
 
 // Initializes Config members by the command line options.
 void LinkerDriver::readConfigs(opt::InputArgList &Args) {
   Config->AllowMultipleDefinition =
       Args.hasArg(OPT_allow_multiple_definition) || hasZOption(Args, "muldefs");
   Config->AuxiliaryList = args::getStrings(Args, OPT_auxiliary);
   Config->Bsymbolic = Args.hasArg(OPT_Bsymbolic);
   Config->BsymbolicFunctions = Args.hasArg(OPT_Bsymbolic_functions);
   Config->Chroot = Args.getLastArgValue(OPT_chroot);
   Config->CompressDebugSections = getCompressDebugSections(Args);
   Config->DefineCommon = Args.hasFlag(OPT_define_common, OPT_no_define_common,
                                       !Args.hasArg(OPT_relocatable));
   Config->Demangle = Args.hasFlag(OPT_demangle, OPT_no_demangle, true);
   Config->DisableVerify = Args.hasArg(OPT_disable_verify);
   Config->Discard = getDiscard(Args);
   Config->DynamicLinker = getDynamicLinker(Args);
   Config->EhFrameHdr =
       Args.hasFlag(OPT_eh_frame_hdr, OPT_no_eh_frame_hdr, false);
   Config->EmitRelocs = Args.hasArg(OPT_emit_relocs);
   Config->EnableNewDtags = !Args.hasArg(OPT_disable_new_dtags);
   Config->Entry = Args.getLastArgValue(OPT_entry);
   Config->ExportDynamic =
       Args.hasFlag(OPT_export_dynamic, OPT_no_export_dynamic, false);
   errorHandler().FatalWarnings =
       Args.hasFlag(OPT_fatal_warnings, OPT_no_fatal_warnings, false);
   Config->FilterList = args::getStrings(Args, OPT_filter);
   Config->Fini = Args.getLastArgValue(OPT_fini, "_fini");
   Config->FixCortexA53Errata843419 = Args.hasArg(OPT_fix_cortex_a53_843419);
   Config->GcSections = Args.hasFlag(OPT_gc_sections, OPT_no_gc_sections, false);
   Config->GdbIndex = Args.hasFlag(OPT_gdb_index, OPT_no_gdb_index, false);
   Config->ICF = Args.hasFlag(OPT_icf_all, OPT_icf_none, false);
   Config->ICFData = Args.hasArg(OPT_icf_data);
   Config->Init = Args.getLastArgValue(OPT_init, "_init");
   Config->LTOAAPipeline = Args.getLastArgValue(OPT_lto_aa_pipeline);
   Config->LTONewPmPasses = Args.getLastArgValue(OPT_lto_newpm_passes);
   Config->LTOO = args::getInteger(Args, OPT_lto_O, 2);
   Config->LTOPartitions = args::getInteger(Args, OPT_lto_partitions, 1);
   Config->MapFile = Args.getLastArgValue(OPT_Map);
   Config->NoGnuUnique = Args.hasArg(OPT_no_gnu_unique);
   Config->MergeArmExidx =
       Args.hasFlag(OPT_merge_exidx_entries, OPT_no_merge_exidx_entries, true);
   Config->NoUndefinedVersion = Args.hasArg(OPT_no_undefined_version);
   Config->NoinhibitExec = Args.hasArg(OPT_noinhibit_exec);
   Config->Nostdlib = Args.hasArg(OPT_nostdlib);
   Config->OFormatBinary = isOutputFormatBinary(Args);
   Config->Omagic = Args.hasFlag(OPT_omagic, OPT_no_omagic, false);
   Config->OptRemarksFilename = Args.getLastArgValue(OPT_opt_remarks_filename);
   Config->OptRemarksWithHotness = Args.hasArg(OPT_opt_remarks_with_hotness);
   Config->Optimize = args::getInteger(Args, OPT_O, 1);
   Config->OrphanHandling = getOrphanHandling(Args);
   Config->OutputFile = Args.getLastArgValue(OPT_o);
-  Config->Pie = Args.hasFlag(OPT_pie, OPT_no_pie, false);
+  Config->Pie = Args.hasFlag(OPT_pie, OPT_nopie, false);
   Config->PrintGcSections =
       Args.hasFlag(OPT_print_gc_sections, OPT_no_print_gc_sections, false);
   Config->Rpath = getRpath(Args);
   Config->Relocatable = Args.hasArg(OPT_relocatable);
   Config->SaveTemps = Args.hasArg(OPT_save_temps);
   Config->SearchPaths = args::getStrings(Args, OPT_library_path);
   Config->SectionStartMap = getSectionStartMap(Args);
   Config->Shared = Args.hasArg(OPT_shared);
   Config->SingleRoRx = Args.hasArg(OPT_no_rosegment);
   Config->SoName = Args.getLastArgValue(OPT_soname);
   Config->SortSection = getSortSection(Args);
   Config->Strip = getStrip(Args);
   Config->Sysroot = Args.getLastArgValue(OPT_sysroot);
   Config->Target1Rel = Args.hasFlag(OPT_target1_rel, OPT_target1_abs, false);
   Config->Target2 = getTarget2(Args);
   Config->ThinLTOCacheDir = Args.getLastArgValue(OPT_thinlto_cache_dir);
   Config->ThinLTOCachePolicy = CHECK(
       parseCachePruningPolicy(Args.getLastArgValue(OPT_thinlto_cache_policy)),
       "--thinlto-cache-policy: invalid cache policy");
   Config->ThinLTOJobs = args::getInteger(Args, OPT_thinlto_jobs, -1u);
   ThreadsEnabled = Args.hasFlag(OPT_threads, OPT_no_threads, true);
   Config->Trace = Args.hasArg(OPT_trace);
   Config->Undefined = args::getStrings(Args, OPT_undefined);
   Config->UnresolvedSymbols = getUnresolvedSymbolPolicy(Args);
   Config->Verbose = Args.hasArg(OPT_verbose);
   errorHandler().Verbose = Config->Verbose;
   Config->WarnCommon = Args.hasArg(OPT_warn_common);
   Config->ZCombreloc = !hasZOption(Args, "nocombreloc");
   Config->ZExecstack = hasZOption(Args, "execstack");
   Config->ZNocopyreloc = hasZOption(Args, "nocopyreloc");
   Config->ZNodelete = hasZOption(Args, "nodelete");
   Config->ZNodlopen = hasZOption(Args, "nodlopen");
   Config->ZNow = hasZOption(Args, "now");
   Config->ZOrigin = hasZOption(Args, "origin");
   Config->ZRelro = !hasZOption(Args, "norelro");
   Config->ZRetpolineplt = hasZOption(Args, "retpolineplt");
   Config->ZRodynamic = hasZOption(Args, "rodynamic");
   Config->ZStackSize = args::getZOptionValue(Args, OPT_z, "stack-size", 0);
   Config->ZText = !hasZOption(Args, "notext");
   Config->ZWxneeded = hasZOption(Args, "wxneeded");
 
   // Parse LTO plugin-related options for compatibility with gold.
   for (auto *Arg : Args.filtered(OPT_plugin_opt, OPT_plugin_opt_eq)) {
     StringRef S = Arg->getValue();
     if (S == "disable-verify")
       Config->DisableVerify = true;
     else if (S == "save-temps")
       Config->SaveTemps = true;
     else if (S.startswith("O"))
       Config->LTOO = parseInt(S.substr(1), Arg);
     else if (S.startswith("lto-partitions="))
       Config->LTOPartitions = parseInt(S.substr(15), Arg);
     else if (S.startswith("jobs="))
       Config->ThinLTOJobs = parseInt(S.substr(5), Arg);
     else if (!S.startswith("/") && !S.startswith("-fresolution=") &&
              !S.startswith("-pass-through=") && !S.startswith("mcpu=") &&
              !S.startswith("thinlto") && S != "-function-sections" &&
              S != "-data-sections")
       error(Arg->getSpelling() + ": unknown option: " + S);
   }
 
   if (Config->LTOO > 3)
     error("invalid optimization level for LTO: " + Twine(Config->LTOO));
   if (Config->LTOPartitions == 0)
     error("--lto-partitions: number of threads must be > 0");
   if (Config->ThinLTOJobs == 0)
     error("--thinlto-jobs: number of threads must be > 0");
 
   // Parse ELF{32,64}{LE,BE} and CPU type.
   if (auto *Arg = Args.getLastArg(OPT_m)) {
     StringRef S = Arg->getValue();
     std::tie(Config->EKind, Config->EMachine, Config->OSABI) =
         parseEmulation(S);
     Config->MipsN32Abi = (S == "elf32btsmipn32" || S == "elf32ltsmipn32");
     Config->Emulation = S;
   }
 
   // Parse -hash-style={sysv,gnu,both}.
   if (auto *Arg = Args.getLastArg(OPT_hash_style)) {
     StringRef S = Arg->getValue();
     if (S == "sysv")
       Config->SysvHash = true;
     else if (S == "gnu")
       Config->GnuHash = true;
     else if (S == "both")
       Config->SysvHash = Config->GnuHash = true;
     else
       error("unknown -hash-style: " + S);
   }
 
   if (Args.hasArg(OPT_print_map))
     Config->MapFile = "-";
 
   // --omagic is an option to create old-fashioned executables in which
   // .text segments are writable. Today, the option is still in use to
   // create special-purpose programs such as boot loaders. It doesn't
   // make sense to create PT_GNU_RELRO for such executables.
   if (Config->Omagic)
     Config->ZRelro = false;
 
   std::tie(Config->BuildId, Config->BuildIdVector) = getBuildId(Args);
 
   if (auto *Arg = Args.getLastArg(OPT_pack_dyn_relocs_eq)) {
     StringRef S = Arg->getValue();
     if (S == "android")
       Config->AndroidPackDynRelocs = true;
     else if (S != "none")
       error("unknown -pack-dyn-relocs format: " + S);
   }
 
   if (auto *Arg = Args.getLastArg(OPT_symbol_ordering_file))
     if (Optional<MemoryBufferRef> Buffer = readFile(Arg->getValue()))
       Config->SymbolOrderingFile = args::getLines(*Buffer);
 
   // If --retain-symbol-file is used, we'll keep only the symbols listed in
   // the file and discard all others.
   if (auto *Arg = Args.getLastArg(OPT_retain_symbols_file)) {
     Config->DefaultSymbolVersion = VER_NDX_LOCAL;
     if (Optional<MemoryBufferRef> Buffer = readFile(Arg->getValue()))
       for (StringRef S : args::getLines(*Buffer))
         Config->VersionScriptGlobals.push_back(
             {S, /*IsExternCpp*/ false, /*HasWildcard*/ false});
   }
 
   bool HasExportDynamic =
       Args.hasFlag(OPT_export_dynamic, OPT_no_export_dynamic, false);
 
   // Parses -dynamic-list and -export-dynamic-symbol. They make some
   // symbols private. Note that -export-dynamic takes precedence over them
   // as it says all symbols should be exported.
   if (!HasExportDynamic) {
     for (auto *Arg : Args.filtered(OPT_dynamic_list))
       if (Optional<MemoryBufferRef> Buffer = readFile(Arg->getValue()))
         readDynamicList(*Buffer);
 
     for (auto *Arg : Args.filtered(OPT_export_dynamic_symbol))
       Config->DynamicList.push_back(
           {Arg->getValue(), /*IsExternCpp*/ false, /*HasWildcard*/ false});
   }
 
   for (auto *Arg : Args.filtered(OPT_version_script))
     if (Optional<MemoryBufferRef> Buffer = readFile(Arg->getValue()))
       readVersionScript(*Buffer);
 }
 
 // Some Config members do not directly correspond to any particular
 // command line options, but computed based on other Config values.
 // This function initialize such members. See Config.h for the details
 // of these values.
 static void setConfigs() {
   ELFKind Kind = Config->EKind;
   uint16_t Machine = Config->EMachine;
 
   // There is an ILP32 ABI for x86-64, although it's not very popular.
   // It is called the x32 ABI.
   bool IsX32 = (Kind == ELF32LEKind && Machine == EM_X86_64);
 
   Config->CopyRelocs = (Config->Relocatable || Config->EmitRelocs);
   Config->Is64 = (Kind == ELF64LEKind || Kind == ELF64BEKind);
   Config->IsLE = (Kind == ELF32LEKind || Kind == ELF64LEKind);
   Config->Endianness =
       Config->IsLE ? support::endianness::little : support::endianness::big;
   Config->IsMips64EL = (Kind == ELF64LEKind && Machine == EM_MIPS);
   Config->IsRela = Config->Is64 || IsX32 || Config->MipsN32Abi;
   Config->Pic = Config->Pie || Config->Shared;
   Config->Wordsize = Config->Is64 ? 8 : 4;
 }
 
 // Returns a value of "-format" option.
 static bool getBinaryOption(StringRef S) {
   if (S == "binary")
     return true;
   if (S == "elf" || S == "default")
     return false;
   error("unknown -format value: " + S +
         " (supported formats: elf, default, binary)");
   return false;
 }
 
 void LinkerDriver::createFiles(opt::InputArgList &Args) {
   for (auto *Arg : Args) {
     switch (Arg->getOption().getUnaliasedOption().getID()) {
     case OPT_library:
       addLibrary(Arg->getValue());
       break;
     case OPT_INPUT:
       addFile(Arg->getValue(), /*WithLOption=*/false);
       break;
     case OPT_script:
       if (Optional<std::string> Path = searchLinkerScript(Arg->getValue())) {
         if (Optional<MemoryBufferRef> MB = readFile(*Path))
           readLinkerScript(*MB);
         break;
       }
       error(Twine("cannot find linker script ") + Arg->getValue());
       break;
     case OPT_as_needed:
       Config->AsNeeded = true;
       break;
     case OPT_format:
       InBinary = getBinaryOption(Arg->getValue());
       break;
     case OPT_no_as_needed:
       Config->AsNeeded = false;
       break;
     case OPT_Bstatic:
       Config->Static = true;
       break;
     case OPT_Bdynamic:
       Config->Static = false;
       break;
     case OPT_whole_archive:
       InWholeArchive = true;
       break;
     case OPT_no_whole_archive:
       InWholeArchive = false;
       break;
     case OPT_start_lib:
       InLib = true;
       break;
     case OPT_end_lib:
       InLib = false;
       break;
     }
   }
 
   if (Files.empty() && errorCount() == 0)
     error("no input files");
 }
 
 // If -m <machine_type> was not given, infer it from object files.
 void LinkerDriver::inferMachineType() {
   if (Config->EKind != ELFNoneKind)
     return;
 
   for (InputFile *F : Files) {
     if (F->EKind == ELFNoneKind)
       continue;
     Config->EKind = F->EKind;
     Config->EMachine = F->EMachine;
     Config->OSABI = F->OSABI;
     Config->MipsN32Abi = Config->EMachine == EM_MIPS && isMipsN32Abi(F);
     return;
   }
   error("target emulation unknown: -m or at least one .o file required");
 }
 
 // Parse -z max-page-size=<value>. The default value is defined by
 // each target.
 static uint64_t getMaxPageSize(opt::InputArgList &Args) {
   uint64_t Val = args::getZOptionValue(Args, OPT_z, "max-page-size",
                                        Target->DefaultMaxPageSize);
   if (!isPowerOf2_64(Val))
     error("max-page-size: value isn't a power of 2");
   return Val;
 }
 
 // Parses -image-base option.
 static Optional<uint64_t> getImageBase(opt::InputArgList &Args) {
   // Because we are using "Config->MaxPageSize" here, this function has to be
   // called after the variable is initialized.
   auto *Arg = Args.getLastArg(OPT_image_base);
   if (!Arg)
     return None;
 
   StringRef S = Arg->getValue();
   uint64_t V;
   if (!to_integer(S, V)) {
     error("-image-base: number expected, but got " + S);
     return 0;
   }
   if ((V % Config->MaxPageSize) != 0)
     warn("-image-base: address isn't multiple of page size: " + S);
   return V;
 }
 
 // Parses `--exclude-libs=lib,lib,...`.
 // The library names may be delimited by commas or colons.
 static DenseSet<StringRef> getExcludeLibs(opt::InputArgList &Args) {
   DenseSet<StringRef> Ret;
   for (auto *Arg : Args.filtered(OPT_exclude_libs)) {
     StringRef S = Arg->getValue();
     for (;;) {
       size_t Pos = S.find_first_of(",:");
       if (Pos == StringRef::npos)
         break;
       Ret.insert(S.substr(0, Pos));
       S = S.substr(Pos + 1);
     }
     Ret.insert(S);
   }
   return Ret;
 }
 
 static Optional<StringRef> getArchiveName(InputFile *File) {
   if (isa<ArchiveFile>(File))
     return File->getName();
   if (!File->ArchiveName.empty())
     return File->ArchiveName;
   return None;
 }
 
 // Handles the -exclude-libs option. If a static library file is specified
 // by the -exclude-libs option, all public symbols from the archive become
 // private unless otherwise specified by version scripts or something.
 // A special library name "ALL" means all archive files.
 //
 // This is not a popular option, but some programs such as bionic libc use it.
 template <class ELFT>
 static void excludeLibs(opt::InputArgList &Args, ArrayRef<InputFile *> Files) {
   DenseSet<StringRef> Libs = getExcludeLibs(Args);
   bool All = Libs.count("ALL");
 
   for (InputFile *File : Files)
     if (Optional<StringRef> Archive = getArchiveName(File))
       if (All || Libs.count(path::filename(*Archive)))
         for (Symbol *Sym : File->getSymbols())
           if (!Sym->isLocal())
             Sym->VersionId = VER_NDX_LOCAL;
 }
 
 // Do actual linking. Note that when this function is called,
 // all linker scripts have already been parsed.
 template <class ELFT> void LinkerDriver::link(opt::InputArgList &Args) {
   Target = getTarget();
 
   Config->MaxPageSize = getMaxPageSize(Args);
   Config->ImageBase = getImageBase(Args);
 
   // If a -hash-style option was not given, set to a default value,
   // which varies depending on the target.
   if (!Args.hasArg(OPT_hash_style)) {
     if (Config->EMachine == EM_MIPS)
       Config->SysvHash = true;
     else
       Config->SysvHash = Config->GnuHash = true;
   }
 
   // Default output filename is "a.out" by the Unix tradition.
   if (Config->OutputFile.empty())
     Config->OutputFile = "a.out";
 
   // Fail early if the output file or map file is not writable. If a user has a
   // long link, e.g. due to a large LTO link, they do not wish to run it and
   // find that it failed because there was a mistake in their command-line.
   if (auto E = tryCreateFile(Config->OutputFile))
     error("cannot open output file " + Config->OutputFile + ": " + E.message());
   if (auto E = tryCreateFile(Config->MapFile))
     error("cannot open map file " + Config->MapFile + ": " + E.message());
   if (errorCount())
     return;
 
   // Use default entry point name if no name was given via the command
   // line nor linker scripts. For some reason, MIPS entry point name is
   // different from others.
   Config->WarnMissingEntry =
       (!Config->Entry.empty() || (!Config->Shared && !Config->Relocatable));
   if (Config->Entry.empty() && !Config->Relocatable)
     Config->Entry = (Config->EMachine == EM_MIPS) ? "__start" : "_start";
 
   // Handle --trace-symbol.
   for (auto *Arg : Args.filtered(OPT_trace_symbol))
     Symtab->trace(Arg->getValue());
 
   // Add all files to the symbol table. This will add almost all
   // symbols that we need to the symbol table.
   for (InputFile *F : Files)
     Symtab->addFile<ELFT>(F);
 
   // Process -defsym option.
   for (auto *Arg : Args.filtered(OPT_defsym)) {
     StringRef From;
     StringRef To;
     std::tie(From, To) = StringRef(Arg->getValue()).split('=');
     readDefsym(From, MemoryBufferRef(To, "-defsym"));
   }
 
   // Now that we have every file, we can decide if we will need a
   // dynamic symbol table.
   // We need one if we were asked to export dynamic symbols or if we are
   // producing a shared library.
   // We also need one if any shared libraries are used and for pie executables
   // (probably because the dynamic linker needs it).
   Config->HasDynSymTab =
       !SharedFiles.empty() || Config->Pic || Config->ExportDynamic;
 
   // Some symbols (such as __ehdr_start) are defined lazily only when there
   // are undefined symbols for them, so we add these to trigger that logic.
   for (StringRef Sym : Script->ReferencedSymbols)
     Symtab->addUndefined<ELFT>(Sym);
 
   // Handle the `--undefined <sym>` options.
   for (StringRef S : Config->Undefined)
     Symtab->fetchIfLazy<ELFT>(S);
 
   // If an entry symbol is in a static archive, pull out that file now
   // to complete the symbol table. After this, no new names except a
   // few linker-synthesized ones will be added to the symbol table.
   Symtab->fetchIfLazy<ELFT>(Config->Entry);
 
   // Return if there were name resolution errors.
   if (errorCount())
     return;
 
   // Handle undefined symbols in DSOs.
   if (!Config->Shared)
     Symtab->scanShlibUndefined<ELFT>();
 
   // Handle the -exclude-libs option.
   if (Args.hasArg(OPT_exclude_libs))
     excludeLibs<ELFT>(Args, Files);
 
   // Create ElfHeader early. We need a dummy section in
   // addReservedSymbols to mark the created symbols as not absolute.
   Out::ElfHeader = make<OutputSection>("", 0, SHF_ALLOC);
   Out::ElfHeader->Size = sizeof(typename ELFT::Ehdr);
 
   // We need to create some reserved symbols such as _end. Create them.
   if (!Config->Relocatable)
     addReservedSymbols();
 
   // Apply version scripts.
-  Symtab->scanVersionScript();
+  //
+  // For a relocatable output, version scripts don't make sense, and
+  // parsing a symbol version string (e.g. dropping "@ver1" from a symbol
+  // name "foo@ver1") rather do harm, so we don't call this if -r is given.
+  if (!Config->Relocatable)
+    Symtab->scanVersionScript();
 
   // Create wrapped symbols for -wrap option.
   for (auto *Arg : Args.filtered(OPT_wrap))
     Symtab->addSymbolWrap<ELFT>(Arg->getValue());
 
   Symtab->addCombinedLTOObject<ELFT>();
   if (errorCount())
     return;
 
   // Apply symbol renames for -wrap.
   Symtab->applySymbolWrap();
 
   // Now that we have a complete list of input files.
   // Beyond this point, no new files are added.
   // Aggregate all input sections into one place.
   for (InputFile *F : ObjectFiles)
     for (InputSectionBase *S : F->getSections())
       if (S && S != &InputSection::Discarded)
         InputSections.push_back(S);
   for (BinaryFile *F : BinaryFiles)
     for (InputSectionBase *S : F->getSections())
       InputSections.push_back(cast<InputSection>(S));
 
   // We do not want to emit debug sections if --strip-all
   // or -strip-debug are given.
   if (Config->Strip != StripPolicy::None)
     llvm::erase_if(InputSections, [](InputSectionBase *S) {
       return S->Name.startswith(".debug") || S->Name.startswith(".zdebug");
     });
 
   Config->EFlags = Target->calcEFlags();
 
   if (Config->EMachine == EM_ARM) {
     // FIXME: These warnings can be removed when lld only uses these features
     // when the input objects have been compiled with an architecture that
     // supports them.
     if (Config->ARMHasBlx == false)
       warn("lld uses blx instruction, no object with architecture supporting "
            "feature detected.");
     if (Config->ARMJ1J2BranchEncoding == false)
       warn("lld uses extended branch encoding, no object with architecture "
            "supporting feature detected.");
     if (Config->ARMHasMovtMovw == false)
       warn("lld may use movt/movw, no object with architecture supporting "
            "feature detected.");
   }
 
   // This adds a .comment section containing a version string. We have to add it
   // before decompressAndMergeSections because the .comment section is a
   // mergeable section.
   if (!Config->Relocatable)
     InputSections.push_back(createCommentSection());
 
   // Do size optimizations: garbage collection, merging of SHF_MERGE sections
   // and identical code folding.
   markLive<ELFT>();
   decompressSections();
   mergeSections();
   if (Config->ICF)
     doIcf<ELFT>();
 
   // Write the result to the file.
   writeResult<ELFT>();
 }
Index: head/contrib/llvm/tools/lld/ELF/Options.td
===================================================================
--- head/contrib/llvm/tools/lld/ELF/Options.td	(revision 329982)
+++ head/contrib/llvm/tools/lld/ELF/Options.td	(revision 329983)
@@ -1,437 +1,437 @@
 include "llvm/Option/OptParser.td"
 
 // For options whose names are multiple letters, either one dash or
 // two can precede the option name except those that start with 'o'.
 class F<string name>: Flag<["--", "-"], name>;
 class J<string name>: Joined<["--", "-"], name>;
 class S<string name>: Separate<["--", "-"], name>;
 
 multiclass Eq<string name> {
   def "": Separate<["--", "-"], name>;
   def _eq: Joined<["--", "-"], name # "=">, Alias<!cast<Separate>(NAME)>;
 }
 
 def auxiliary: S<"auxiliary">, HelpText<"Set DT_AUXILIARY field to the specified name">;
 
 def Bsymbolic: F<"Bsymbolic">, HelpText<"Bind defined symbols locally">;
 
 def Bsymbolic_functions: F<"Bsymbolic-functions">,
   HelpText<"Bind defined function symbols locally">;
 
 def Bdynamic: F<"Bdynamic">, HelpText<"Link against shared libraries">;
 
 def Bstatic: F<"Bstatic">, HelpText<"Do not link against shared libraries">;
 
 def build_id: F<"build-id">, HelpText<"Generate build ID note">;
 
 def build_id_eq: J<"build-id=">, HelpText<"Generate build ID note">;
 
 defm compress_debug_sections : Eq<"compress-debug-sections">,
   HelpText<"Compress DWARF debug sections">;
 
 defm defsym: Eq<"defsym">, HelpText<"Define a symbol alias">;
 
 defm library_path: Eq<"library-path">,
   HelpText<"Add a directory to the library search path">, MetaVarName<"<dir>">;
 
 def O: JoinedOrSeparate<["-"], "O">, HelpText<"Optimize output file size">;
 
 defm Tbss: Eq<"Tbss">,
   HelpText<"Same as --section-start with .bss as the sectionname">;
 
 defm Tdata: Eq<"Tdata">,
   HelpText<"Same as --section-start with .data as the sectionname">;
 
 defm Ttext: Eq<"Ttext">,
   HelpText<"Same as --section-start with .text as the sectionname">;
 
 def allow_multiple_definition: F<"allow-multiple-definition">,
   HelpText<"Allow multiple definitions">;
 
 def as_needed: F<"as-needed">,
   HelpText<"Only set DT_NEEDED for shared libraries if used">;
 
 // -chroot doesn't have a help text because it is an internal option.
 def chroot: S<"chroot">;
 
 def color_diagnostics: F<"color-diagnostics">,
   HelpText<"Use colors in diagnostics">;
 
 def color_diagnostics_eq: J<"color-diagnostics=">,
   HelpText<"Use colors in diagnostics">;
 
 def define_common: F<"define-common">,
   HelpText<"Assign space to common symbols">;
 
 def demangle: F<"demangle">, HelpText<"Demangle symbol names">;
 
 def disable_new_dtags: F<"disable-new-dtags">,
   HelpText<"Disable new dynamic tags">;
 
 def discard_all: F<"discard-all">, HelpText<"Delete all local symbols">;
 
 def discard_locals: F<"discard-locals">,
   HelpText<"Delete temporary local symbols">;
 
 def discard_none: F<"discard-none">,
   HelpText<"Keep all symbols in the symbol table">;
 
 def dynamic_linker: S<"dynamic-linker">,
   HelpText<"Which dynamic linker to use">;
 
 defm dynamic_list: Eq<"dynamic-list">,
   HelpText<"Read a list of dynamic symbols">;
 
 def eh_frame_hdr: F<"eh-frame-hdr">,
   HelpText<"Request creation of .eh_frame_hdr section and PT_GNU_EH_FRAME segment header">;
 
 def emit_relocs: F<"emit-relocs">, HelpText<"Generate relocations in output">;
 
 def enable_new_dtags: F<"enable-new-dtags">,
   HelpText<"Enable new dynamic tags">;
 
 def end_lib: F<"end-lib">,
   HelpText<"End a grouping of objects that should be treated as if they were together in an archive">;
 
 defm entry: Eq<"entry">, HelpText<"Name of entry point symbol">,
   MetaVarName<"<entry>">;
 
 defm error_limit: Eq<"error-limit">,
   HelpText<"Maximum number of errors to emit before stopping (0 = no limit)">;
 
 def error_unresolved_symbols: F<"error-unresolved-symbols">,
   HelpText<"Report unresolved symbols as errors">;
 
 defm exclude_libs: Eq<"exclude-libs">,
   HelpText<"Exclude static libraries from automatic export">;
 
 def export_dynamic: F<"export-dynamic">,
   HelpText<"Put symbols in the dynamic symbol table">;
 
 defm export_dynamic_symbol: Eq<"export-dynamic-symbol">,
   HelpText<"Put a symbol in the dynamic symbol table">;
 
 def fatal_warnings: F<"fatal-warnings">,
   HelpText<"Treat warnings as errors">;
 
 defm filter: Eq<"filter">,
   HelpText<"Set DT_FILTER field to the specified name">;
 
 defm fini: Eq<"fini">,
   HelpText<"Specify a finalizer function">, MetaVarName<"<symbol>">;
 
 def fix_cortex_a53_843419: F<"fix-cortex-a53-843419">,
   HelpText<"Apply fixes for AArch64 Cortex-A53 erratum 843419">;
 
 def full_shutdown : F<"full-shutdown">,
   HelpText<"Perform a full shutdown instead of calling _exit">;
 
 defm format: Eq<"format">,
   HelpText<"Change the input format of the inputs following this option">,
   MetaVarName<"<input-format>">;
 
 def gc_sections: F<"gc-sections">,
   HelpText<"Enable garbage collection of unused sections">;
 
 def gdb_index: F<"gdb-index">,
   HelpText<"Generate .gdb_index section">;
 
 defm hash_style: Eq<"hash-style">,
   HelpText<"Specify hash style (sysv, gnu or both)">;
 
 def help: F<"help">, HelpText<"Print option help">;
 
 def icf_all: F<"icf=all">, HelpText<"Enable identical code folding">;
 
 def icf_data: F<"icf-data">,
   HelpText<"Enable ICF to also fold identical read only data">;
 
 def icf_none: F<"icf=none">, HelpText<"Disable identical code folding">;
 
 defm image_base : Eq<"image-base">, HelpText<"Set the base address">;
 
 defm init: Eq<"init">, HelpText<"Specify an initializer function">,
   MetaVarName<"<symbol>">;
 
 defm library: Eq<"library">, HelpText<"Root name of library to use">,
   MetaVarName<"<libName>">;
 
 def lto_O: J<"lto-O">, MetaVarName<"<opt-level>">,
   HelpText<"Optimization level for LTO">;
 
 def m: JoinedOrSeparate<["-"], "m">, HelpText<"Set target emulation">;
 
 defm Map: Eq<"Map">, HelpText<"Print a link map to the specified file">;
 
 def merge_exidx_entries: F<"merge-exidx-entries">,
   HelpText<"Enable merging .ARM.exidx entries">;
 
 def nostdlib: F<"nostdlib">,
   HelpText<"Only search directories specified on the command line">;
 
 def no_as_needed: F<"no-as-needed">,
   HelpText<"Always DT_NEEDED for shared libraries">;
 
 def no_color_diagnostics: F<"no-color-diagnostics">,
   HelpText<"Do not use colors in diagnostics">;
 
 def no_define_common: F<"no-define-common">,
   HelpText<"Do not assign space to common symbols">;
 
 def no_demangle: F<"no-demangle">,
   HelpText<"Do not demangle symbol names">;
 
 def no_dynamic_linker: F<"no-dynamic-linker">,
   HelpText<"Inhibit output of .interp section">;
 
 def no_eh_frame_hdr: F<"no-eh-frame-hdr">,
   HelpText<"Do not create .eh_frame_hdr section">;
 
 def no_export_dynamic: F<"no-export-dynamic">;
 def no_fatal_warnings: F<"no-fatal-warnings">;
 
 def no_gc_sections: F<"no-gc-sections">,
   HelpText<"Disable garbage collection of unused sections">;
 
 def no_gdb_index: F<"no-gdb-index">,
   HelpText<"Do not generate .gdb_index section">;
 
 def no_gnu_unique: F<"no-gnu-unique">,
   HelpText<"Disable STB_GNU_UNIQUE symbol binding">;
 
 def no_merge_exidx_entries: F<"no-merge-exidx-entries">,
   HelpText<"Disable merging .ARM.exidx entries">;
 
-def no_pie: F<"no-pie">, HelpText<"Do not create a position independent executable">;
-
 def no_threads: F<"no-threads">,
   HelpText<"Do not run the linker multi-threaded">;
 
 def no_whole_archive: F<"no-whole-archive">,
   HelpText<"Restores the default behavior of loading archive members">;
 
 def noinhibit_exec: F<"noinhibit-exec">,
   HelpText<"Retain the executable output file whenever it is still usable">;
+
+def nopie: F<"nopie">, HelpText<"Do not create a position independent executable">;
 
 def no_omagic: Flag<["--"], "no-omagic">, MetaVarName<"<magic>">,
   HelpText<"Do not set the text data sections to be writable">;
 
 def no_print_gc_sections: F<"no-print-gc-sections">,
   HelpText<"Do not list removed unused sections">;
 
 def no_rosegment: F<"no-rosegment">,
   HelpText<"Do not put read-only non-executable sections in their own segment">;
 
 def no_undefined: F<"no-undefined">,
   HelpText<"Report unresolved symbols even if the linker is creating a shared library">;
 
 def no_undefined_version: F<"no-undefined-version">,
   HelpText<"Report version scripts that refer undefined symbols">;
 
 def o: JoinedOrSeparate<["-"], "o">, MetaVarName<"<path>">,
   HelpText<"Path to file to write output">;
 
 def oformat: Separate<["--"], "oformat">, MetaVarName<"<format>">,
   HelpText<"Specify the binary format for the output object file">;
 
 def omagic: Flag<["--"], "omagic">, MetaVarName<"<magic>">,
   HelpText<"Set the text and data sections to be readable and writable">;
 
 defm orphan_handling: Eq<"orphan-handling">,
   HelpText<"Control how orphan sections are handled when linker script used">;
 
 def pack_dyn_relocs_eq: J<"pack-dyn-relocs=">, MetaVarName<"<format>">,
   HelpText<"Pack dynamic relocations in the given format (none or android)">;
 
 def pie: F<"pie">, HelpText<"Create a position independent executable">;
 
 def print_gc_sections: F<"print-gc-sections">,
   HelpText<"List removed unused sections">;
 
 def print_map: F<"print-map">,
   HelpText<"Print a link map to the standard output">;
 
 defm reproduce: Eq<"reproduce">,
   HelpText<"Dump linker invocation and input files for debugging">;
 
 defm rpath: Eq<"rpath">, HelpText<"Add a DT_RUNPATH to the output">;
 
 def relocatable: F<"relocatable">, HelpText<"Create relocatable object file">;
 
 defm retain_symbols_file: Eq<"retain-symbols-file">,
   HelpText<"Retain only the symbols listed in the file">,
   MetaVarName<"<file>">;
 
 defm script: Eq<"script">, HelpText<"Read linker script">;
 
 def section_start: S<"section-start">, MetaVarName<"<address>">,
   HelpText<"Set address of section">;
 
 def shared: F<"shared">, HelpText<"Build a shared object">;
 
 defm soname: Eq<"soname">, HelpText<"Set DT_SONAME">;
 
 defm sort_section: Eq<"sort-section">,
   HelpText<"Specifies sections sorting rule when linkerscript is used">;
 
 def start_lib: F<"start-lib">,
   HelpText<"Start a grouping of objects that should be treated as if they were together in an archive">;
 
 def strip_all: F<"strip-all">, HelpText<"Strip all symbols">;
 
 def strip_debug: F<"strip-debug">, HelpText<"Strip debugging information">;
 
 def symbol_ordering_file: S<"symbol-ordering-file">,
   HelpText<"Layout sections in the order specified by symbol file">;
 
 defm sysroot: Eq<"sysroot">, HelpText<"Set the system root">;
 
 def target1_rel: F<"target1-rel">, HelpText<"Interpret R_ARM_TARGET1 as R_ARM_REL32">;
 
 def target1_abs: F<"target1-abs">, HelpText<"Interpret R_ARM_TARGET1 as R_ARM_ABS32">;
 
 defm target2: Eq<"target2">,
   HelpText<"Interpret R_ARM_TARGET2 as <type>, where <type> is one of rel, abs, or got-rel">,
   MetaVarName<"<type>">;
 
 def threads: F<"threads">, HelpText<"Run the linker multi-threaded">;
 
 def trace: F<"trace">, HelpText<"Print the names of the input files">;
 
 defm trace_symbol : Eq<"trace-symbol">, HelpText<"Trace references to symbols">;
 
 defm undefined: Eq<"undefined">,
   HelpText<"Force undefined symbol during linking">;
 
 defm unresolved_symbols: Eq<"unresolved-symbols">,
   HelpText<"Determine how to handle unresolved symbols">;
 
 defm rsp_quoting: Eq<"rsp-quoting">,
   HelpText<"Quoting style for response files. Values supported: windows|posix">;
 
 def v: Flag<["-"], "v">, HelpText<"Display the version number">;
 
 def verbose: F<"verbose">, HelpText<"Verbose mode">;
 
 def version: F<"version">, HelpText<"Display the version number and exit">;
 
 defm version_script: Eq<"version-script">, HelpText<"Read a version script">;
 
 def warn_common: F<"warn-common">,
   HelpText<"Warn about duplicate common symbols">;
 
 def warn_unresolved_symbols: F<"warn-unresolved-symbols">,
   HelpText<"Report unresolved symbols as warnings">;
 
 def whole_archive: F<"whole-archive">,
   HelpText<"Force load of all members in a static library">;
 
 defm wrap: Eq<"wrap">, HelpText<"Use wrapper functions for symbol">,
   MetaVarName<"<symbol>">;
 
 def z: JoinedOrSeparate<["-"], "z">, MetaVarName<"<option>">,
   HelpText<"Linker option extensions">;
 
 // Aliases
 def alias_auxiliary: Separate<["-"], "f">, Alias<auxiliary>;
 def alias_Bdynamic_call_shared: F<"call_shared">, Alias<Bdynamic>;
 def alias_Bdynamic_dy: F<"dy">, Alias<Bdynamic>;
 def alias_Bstatic_dn: F<"dn">, Alias<Bstatic>;
 def alias_Bstatic_non_shared: F<"non_shared">, Alias<Bstatic>;
 def alias_Bstatic_static: F<"static">, Alias<Bstatic>;
 def alias_define_common_d: Flag<["-"], "d">, Alias<define_common>;
 def alias_define_common_dc: F<"dc">, Alias<define_common>;
 def alias_define_common_dp: F<"dp">, Alias<define_common>;
 def alias_discard_all_x: Flag<["-"], "x">, Alias<discard_all>;
 def alias_discard_locals_X: Flag<["-"], "X">, Alias<discard_locals>;
 def alias_emit_relocs: Flag<["-"], "q">, Alias<emit_relocs>;
 def alias_entry_e: JoinedOrSeparate<["-"], "e">, Alias<entry>;
 def alias_export_dynamic_E: Flag<["-"], "E">, Alias<export_dynamic>;
 def alias_filter: Separate<["-"], "F">, Alias<filter>;
 def alias_format_b: S<"b">, Alias<format>;
 def alias_library: JoinedOrSeparate<["-"], "l">, Alias<library>;
 def alias_library_path: JoinedOrSeparate<["-"], "L">, Alias<library_path>;
 def alias_omagic: Flag<["-"], "N">, Alias<omagic>;
 def alias_o_output: Joined<["--"], "output=">, Alias<o>;
 def alias_o_output2 : Separate<["--"], "output">, Alias<o>;
 def alias_pie_pic_executable: F<"pic-executable">, Alias<pie>;
 def alias_print_map_M: Flag<["-"], "M">, Alias<print_map>;
 def alias_relocatable_r: Flag<["-"], "r">, Alias<relocatable>;
 def alias_rpath_R: JoinedOrSeparate<["-"], "R">, Alias<rpath>;
 def alias_script_T: JoinedOrSeparate<["-"], "T">, Alias<script>;
 def alias_shared_Bshareable: F<"Bshareable">, Alias<shared>;
 def alias_soname_h: JoinedOrSeparate<["-"], "h">, Alias<soname>;
 def alias_strip_all: Flag<["-"], "s">, Alias<strip_all>;
 def alias_strip_debug_S: Flag<["-"], "S">, Alias<strip_debug>;
 def alias_trace: Flag<["-"], "t">, Alias<trace>;
 def alias_trace_symbol_y : JoinedOrSeparate<["-"], "y">, Alias<trace_symbol>;
 def alias_Ttext_segment: S<"Ttext-segment">, Alias<Ttext>;
 def alias_Ttext_segment_eq: J<"Ttext-segment=">, Alias<Ttext>;
 def alias_undefined_u: JoinedOrSeparate<["-"], "u">, Alias<undefined>;
 def alias_version_V: Flag<["-"], "V">, Alias<version>;
 
 // Our symbol resolution algorithm handles symbols in archive files differently
 // than traditional linkers, so we don't need --start-group and --end-group.
 // These options are recongized for compatibility but ignored.
 def end_group: F<"end-group">;
 def end_group_paren: Flag<["-"], ")">;
 def start_group: F<"start-group">;
 def start_group_paren: Flag<["-"], "(">;
 
 // LTO-related options.
 def lto_aa_pipeline: J<"lto-aa-pipeline=">,
   HelpText<"AA pipeline to run during LTO. Used in conjunction with -lto-newpm-passes">;
 def lto_newpm_passes: J<"lto-newpm-passes=">,
   HelpText<"Passes to run during LTO">;
 def lto_partitions: J<"lto-partitions=">,
   HelpText<"Number of LTO codegen partitions">;
 def disable_verify: F<"disable-verify">;
 def mllvm: S<"mllvm">;
 def opt_remarks_filename: Separate<["--"], "opt-remarks-filename">,
   HelpText<"YAML output file for optimization remarks">;
 def opt_remarks_with_hotness: Flag<["--"], "opt-remarks-with-hotness">,
   HelpText<"Include hotness informations in the optimization remarks file">;
 defm plugin_opt: Eq<"plugin-opt">,
   HelpText<"specifies LTO options for compatibility with GNU linkers">;
 def save_temps: F<"save-temps">;
 def thinlto_cache_dir: J<"thinlto-cache-dir=">,
   HelpText<"Path to ThinLTO cached object file directory">;
 def thinlto_cache_policy: S<"thinlto-cache-policy">,
   HelpText<"Pruning policy for the ThinLTO cache">;
 def thinlto_jobs: J<"thinlto-jobs=">, HelpText<"Number of ThinLTO jobs">;
 
 // Ignore LTO plugin-related options.
 // clang -flto passes -plugin and -plugin-opt to the linker. This is required
 // for ld.gold and ld.bfd to get LTO working. But it's not for lld which doesn't
 // rely on a plugin. Instead of detecting which linker is used on clang side we
 // just ignore the option on lld side as it's easier. In fact, the linker could
 // be called 'ld' and understanding which linker is used would require parsing of
 // --version output.
 def plugin: S<"plugin">;
 def plugin_eq: J<"plugin=">;
 
 // Options listed below are silently ignored for now for compatibility.
 def allow_shlib_undefined: F<"allow-shlib-undefined">;
 def cref: F<"cref">;
 def detect_odr_violations: F<"detect-odr-violations">;
 def g: Flag<["-"], "g">;
 def long_plt: F<"long-plt">;
 def no_add_needed: F<"no-add-needed">;
 def no_allow_shlib_undefined: F<"no-allow-shlib-undefined">;
 def no_copy_dt_needed_entries: F<"no-copy-dt-needed-entries">;
 def no_ctors_in_init_array: F<"no-ctors-in-init-array">;
 def no_keep_memory: F<"no-keep-memory">;
 def no_mmap_output_file: F<"no-mmap-output-file">;
 def no_warn_common: F<"no-warn-common">;
 def no_warn_mismatch: F<"no-warn-mismatch">;
 def rpath_link: S<"rpath-link">;
 def rpath_link_eq: J<"rpath-link=">;
 def sort_common: F<"sort-common">;
 def stats: F<"stats">;
 def warn_execstack: F<"warn-execstack">;
 def warn_once: F<"warn-once">;
 def warn_shared_textrel: F<"warn-shared-textrel">;
 def EB : F<"EB">;
 def EL : F<"EL">;
 def G: JoinedOrSeparate<["-"], "G">;
 def Qy : F<"Qy">;
Index: head/contrib/llvm/tools/lld
===================================================================
--- head/contrib/llvm/tools/lld	(revision 329982)
+++ head/contrib/llvm/tools/lld	(revision 329983)

Property changes on: head/contrib/llvm/tools/lld
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /vendor/lld/dist-release_60:r329406-329940
Index: head/contrib/llvm/tools/lldb
===================================================================
--- head/contrib/llvm/tools/lldb	(revision 329982)
+++ head/contrib/llvm/tools/lldb	(revision 329983)

Property changes on: head/contrib/llvm/tools/lldb
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /vendor/lldb/dist-release_60:r329406-329940
Index: head/contrib/llvm
===================================================================
--- head/contrib/llvm	(revision 329982)
+++ head/contrib/llvm	(revision 329983)

Property changes on: head/contrib/llvm
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /vendor/llvm/dist-release_60:r329406-329940
Index: head/lib/clang/include/clang/Basic/Version.inc
===================================================================
--- head/lib/clang/include/clang/Basic/Version.inc	(revision 329982)
+++ head/lib/clang/include/clang/Basic/Version.inc	(revision 329983)
@@ -1,11 +1,11 @@
 /* $FreeBSD$ */
 
 #define	CLANG_VERSION			6.0.0
 #define	CLANG_VERSION_STRING		"6.0.0"
 #define	CLANG_VERSION_MAJOR		6
 #define	CLANG_VERSION_MINOR		0
 #define	CLANG_VERSION_PATCHLEVEL	0
 
 #define	CLANG_VENDOR			"FreeBSD "
 
-#define	SVN_REVISION			"325330"
+#define	SVN_REVISION			"325932"
Index: head/lib/clang/include/lld/Common/Version.inc
===================================================================
--- head/lib/clang/include/lld/Common/Version.inc	(revision 329982)
+++ head/lib/clang/include/lld/Common/Version.inc	(revision 329983)
@@ -1,8 +1,8 @@
 // $FreeBSD$
 
 #define LLD_VERSION 6.0.0
 #define LLD_VERSION_STRING "6.0.0"
 #define LLD_VERSION_MAJOR 6
 #define LLD_VERSION_MINOR 0
-#define LLD_REVISION_STRING "325330"
+#define LLD_REVISION_STRING "325932"
 #define LLD_REPOSITORY_STRING "FreeBSD"
Index: head/lib/clang/include/llvm/Support/VCSRevision.h
===================================================================
--- head/lib/clang/include/llvm/Support/VCSRevision.h	(revision 329982)
+++ head/lib/clang/include/llvm/Support/VCSRevision.h	(revision 329983)
@@ -1,2 +1,2 @@
 /* $FreeBSD$ */
-#define LLVM_REVISION "svn-r325330"
+#define LLVM_REVISION "svn-r325932"