Index: head/contrib/compiler-rt =================================================================== --- head/contrib/compiler-rt (revision 329982) +++ head/contrib/compiler-rt (revision 329983) Property changes on: head/contrib/compiler-rt ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /vendor/compiler-rt/dist-release_60:r329406-329940 Index: head/contrib/libc++ =================================================================== --- head/contrib/libc++ (revision 329982) +++ head/contrib/libc++ (revision 329983) Property changes on: head/contrib/libc++ ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /vendor/libc++/dist-release_60:r329406-329940 Index: head/contrib/llvm/include/llvm/Bitcode/LLVMBitCodes.h =================================================================== --- head/contrib/llvm/include/llvm/Bitcode/LLVMBitCodes.h (revision 329982) +++ head/contrib/llvm/include/llvm/Bitcode/LLVMBitCodes.h (revision 329983) @@ -1,585 +1,599 @@ //===- LLVMBitCodes.h - Enum values for the LLVM bitcode format -*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This header defines Bitcode enum values for LLVM IR bitcode files. // // The enum values defined in this file should be considered permanent. If // new features are added, they should have values added at the end of the // respective lists. // //===----------------------------------------------------------------------===// #ifndef LLVM_BITCODE_LLVMBITCODES_H #define LLVM_BITCODE_LLVMBITCODES_H #include "llvm/Bitcode/BitCodes.h" namespace llvm { namespace bitc { // The only top-level block types are MODULE, IDENTIFICATION, STRTAB and SYMTAB. enum BlockIDs { // Blocks MODULE_BLOCK_ID = FIRST_APPLICATION_BLOCKID, // Module sub-block id's. PARAMATTR_BLOCK_ID, PARAMATTR_GROUP_BLOCK_ID, CONSTANTS_BLOCK_ID, FUNCTION_BLOCK_ID, // Block intended to contains information on the bitcode versioning. // Can be used to provide better error messages when we fail to parse a // bitcode file. IDENTIFICATION_BLOCK_ID, VALUE_SYMTAB_BLOCK_ID, METADATA_BLOCK_ID, METADATA_ATTACHMENT_ID, TYPE_BLOCK_ID_NEW, USELIST_BLOCK_ID, MODULE_STRTAB_BLOCK_ID, GLOBALVAL_SUMMARY_BLOCK_ID, OPERAND_BUNDLE_TAGS_BLOCK_ID, METADATA_KIND_BLOCK_ID, STRTAB_BLOCK_ID, FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID, SYMTAB_BLOCK_ID, SYNC_SCOPE_NAMES_BLOCK_ID, }; /// Identification block contains a string that describes the producer details, /// and an epoch that defines the auto-upgrade capability. enum IdentificationCodes { IDENTIFICATION_CODE_STRING = 1, // IDENTIFICATION: [strchr x N] IDENTIFICATION_CODE_EPOCH = 2, // EPOCH: [epoch#] }; /// The epoch that defines the auto-upgrade compatibility for the bitcode. /// /// LLVM guarantees in a major release that a minor release can read bitcode /// generated by previous minor releases. We translate this by making the reader /// accepting only bitcode with the same epoch, except for the X.0 release which /// also accepts N-1. enum { BITCODE_CURRENT_EPOCH = 0 }; /// MODULE blocks have a number of optional fields and subblocks. enum ModuleCodes { MODULE_CODE_VERSION = 1, // VERSION: [version#] MODULE_CODE_TRIPLE = 2, // TRIPLE: [strchr x N] MODULE_CODE_DATALAYOUT = 3, // DATALAYOUT: [strchr x N] MODULE_CODE_ASM = 4, // ASM: [strchr x N] MODULE_CODE_SECTIONNAME = 5, // SECTIONNAME: [strchr x N] // FIXME: Remove DEPLIB in 4.0. MODULE_CODE_DEPLIB = 6, // DEPLIB: [strchr x N] // GLOBALVAR: [pointer type, isconst, initid, // linkage, alignment, section, visibility, threadlocal] MODULE_CODE_GLOBALVAR = 7, // FUNCTION: [type, callingconv, isproto, linkage, paramattrs, alignment, // section, visibility, gc, unnamed_addr] MODULE_CODE_FUNCTION = 8, // ALIAS: [alias type, aliasee val#, linkage, visibility] MODULE_CODE_ALIAS_OLD = 9, MODULE_CODE_GCNAME = 11, // GCNAME: [strchr x N] MODULE_CODE_COMDAT = 12, // COMDAT: [selection_kind, name] MODULE_CODE_VSTOFFSET = 13, // VSTOFFSET: [offset] // ALIAS: [alias value type, addrspace, aliasee val#, linkage, visibility] MODULE_CODE_ALIAS = 14, MODULE_CODE_METADATA_VALUES_UNUSED = 15, // SOURCE_FILENAME: [namechar x N] MODULE_CODE_SOURCE_FILENAME = 16, // HASH: [5*i32] MODULE_CODE_HASH = 17, // IFUNC: [ifunc value type, addrspace, resolver val#, linkage, visibility] MODULE_CODE_IFUNC = 18, }; /// PARAMATTR blocks have code for defining a parameter attribute set. enum AttributeCodes { // FIXME: Remove `PARAMATTR_CODE_ENTRY_OLD' in 4.0 PARAMATTR_CODE_ENTRY_OLD = 1, // ENTRY: [paramidx0, attr0, // paramidx1, attr1...] PARAMATTR_CODE_ENTRY = 2, // ENTRY: [attrgrp0, attrgrp1, ...] PARAMATTR_GRP_CODE_ENTRY = 3 // ENTRY: [grpid, idx, attr0, attr1, ...] }; /// TYPE blocks have codes for each type primitive they use. enum TypeCodes { TYPE_CODE_NUMENTRY = 1, // NUMENTRY: [numentries] // Type Codes TYPE_CODE_VOID = 2, // VOID TYPE_CODE_FLOAT = 3, // FLOAT TYPE_CODE_DOUBLE = 4, // DOUBLE TYPE_CODE_LABEL = 5, // LABEL TYPE_CODE_OPAQUE = 6, // OPAQUE TYPE_CODE_INTEGER = 7, // INTEGER: [width] TYPE_CODE_POINTER = 8, // POINTER: [pointee type] TYPE_CODE_FUNCTION_OLD = 9, // FUNCTION: [vararg, attrid, retty, // paramty x N] TYPE_CODE_HALF = 10, // HALF TYPE_CODE_ARRAY = 11, // ARRAY: [numelts, eltty] TYPE_CODE_VECTOR = 12, // VECTOR: [numelts, eltty] // These are not with the other floating point types because they're // a late addition, and putting them in the right place breaks // binary compatibility. TYPE_CODE_X86_FP80 = 13, // X86 LONG DOUBLE TYPE_CODE_FP128 = 14, // LONG DOUBLE (112 bit mantissa) TYPE_CODE_PPC_FP128 = 15, // PPC LONG DOUBLE (2 doubles) TYPE_CODE_METADATA = 16, // METADATA TYPE_CODE_X86_MMX = 17, // X86 MMX TYPE_CODE_STRUCT_ANON = 18, // STRUCT_ANON: [ispacked, eltty x N] TYPE_CODE_STRUCT_NAME = 19, // STRUCT_NAME: [strchr x N] TYPE_CODE_STRUCT_NAMED = 20, // STRUCT_NAMED: [ispacked, eltty x N] TYPE_CODE_FUNCTION = 21, // FUNCTION: [vararg, retty, paramty x N] TYPE_CODE_TOKEN = 22 // TOKEN }; enum OperandBundleTagCode { OPERAND_BUNDLE_TAG = 1, // TAG: [strchr x N] }; enum SyncScopeNameCode { SYNC_SCOPE_NAME = 1, }; // Value symbol table codes. enum ValueSymtabCodes { VST_CODE_ENTRY = 1, // VST_ENTRY: [valueid, namechar x N] VST_CODE_BBENTRY = 2, // VST_BBENTRY: [bbid, namechar x N] VST_CODE_FNENTRY = 3, // VST_FNENTRY: [valueid, offset, namechar x N] // VST_COMBINED_ENTRY: [valueid, refguid] VST_CODE_COMBINED_ENTRY = 5 }; // The module path symbol table only has one code (MST_CODE_ENTRY). enum ModulePathSymtabCodes { MST_CODE_ENTRY = 1, // MST_ENTRY: [modid, namechar x N] MST_CODE_HASH = 2, // MST_HASH: [5*i32] }; // The summary section uses different codes in the per-module // and combined index cases. enum GlobalValueSummarySymtabCodes { // PERMODULE: [valueid, flags, instcount, numrefs, numrefs x valueid, // n x (valueid)] FS_PERMODULE = 1, // PERMODULE_PROFILE: [valueid, flags, instcount, numrefs, // numrefs x valueid, // n x (valueid, hotness)] FS_PERMODULE_PROFILE = 2, // PERMODULE_GLOBALVAR_INIT_REFS: [valueid, flags, n x valueid] FS_PERMODULE_GLOBALVAR_INIT_REFS = 3, // COMBINED: [valueid, modid, flags, instcount, numrefs, numrefs x valueid, // n x (valueid)] FS_COMBINED = 4, // COMBINED_PROFILE: [valueid, modid, flags, instcount, numrefs, // numrefs x valueid, // n x (valueid, hotness)] FS_COMBINED_PROFILE = 5, // COMBINED_GLOBALVAR_INIT_REFS: [valueid, modid, flags, n x valueid] FS_COMBINED_GLOBALVAR_INIT_REFS = 6, // ALIAS: [valueid, flags, valueid] FS_ALIAS = 7, // COMBINED_ALIAS: [valueid, modid, flags, valueid] FS_COMBINED_ALIAS = 8, // COMBINED_ORIGINAL_NAME: [original_name_hash] FS_COMBINED_ORIGINAL_NAME = 9, // VERSION of the summary, bumped when adding flags for instance. FS_VERSION = 10, // The list of llvm.type.test type identifiers used by the following function // that are used other than by an llvm.assume. // [n x typeid] FS_TYPE_TESTS = 11, // The list of virtual calls made by this function using // llvm.assume(llvm.type.test) intrinsics that do not have all constant // integer arguments. // [n x (typeid, offset)] FS_TYPE_TEST_ASSUME_VCALLS = 12, // The list of virtual calls made by this function using // llvm.type.checked.load intrinsics that do not have all constant integer // arguments. // [n x (typeid, offset)] FS_TYPE_CHECKED_LOAD_VCALLS = 13, // Identifies a virtual call made by this function using an // llvm.assume(llvm.type.test) intrinsic with all constant integer arguments. // [typeid, offset, n x arg] FS_TYPE_TEST_ASSUME_CONST_VCALL = 14, // Identifies a virtual call made by this function using an // llvm.type.checked.load intrinsic with all constant integer arguments. // [typeid, offset, n x arg] FS_TYPE_CHECKED_LOAD_CONST_VCALL = 15, // Assigns a GUID to a value ID. This normally appears only in combined // summaries, but it can also appear in per-module summaries for PGO data. // [valueid, guid] FS_VALUE_GUID = 16, // The list of local functions with CFI jump tables. Function names are // strings in strtab. // [n * name] FS_CFI_FUNCTION_DEFS = 17, // The list of external functions with CFI jump tables. Function names are // strings in strtab. // [n * name] FS_CFI_FUNCTION_DECLS = 18, }; enum MetadataCodes { METADATA_STRING_OLD = 1, // MDSTRING: [values] METADATA_VALUE = 2, // VALUE: [type num, value num] METADATA_NODE = 3, // NODE: [n x md num] METADATA_NAME = 4, // STRING: [values] METADATA_DISTINCT_NODE = 5, // DISTINCT_NODE: [n x md num] METADATA_KIND = 6, // [n x [id, name]] METADATA_LOCATION = 7, // [distinct, line, col, scope, inlined-at?] METADATA_OLD_NODE = 8, // OLD_NODE: [n x (type num, value num)] METADATA_OLD_FN_NODE = 9, // OLD_FN_NODE: [n x (type num, value num)] METADATA_NAMED_NODE = 10, // NAMED_NODE: [n x mdnodes] METADATA_ATTACHMENT = 11, // [m x [value, [n x [id, mdnode]]] METADATA_GENERIC_DEBUG = 12, // [distinct, tag, vers, header, n x md num] METADATA_SUBRANGE = 13, // [distinct, count, lo] METADATA_ENUMERATOR = 14, // [distinct, value, name] METADATA_BASIC_TYPE = 15, // [distinct, tag, name, size, align, enc] METADATA_FILE = 16, // [distinct, filename, directory, checksumkind, checksum] METADATA_DERIVED_TYPE = 17, // [distinct, ...] METADATA_COMPOSITE_TYPE = 18, // [distinct, ...] METADATA_SUBROUTINE_TYPE = 19, // [distinct, flags, types, cc] METADATA_COMPILE_UNIT = 20, // [distinct, ...] METADATA_SUBPROGRAM = 21, // [distinct, ...] METADATA_LEXICAL_BLOCK = 22, // [distinct, scope, file, line, column] METADATA_LEXICAL_BLOCK_FILE = 23, //[distinct, scope, file, discriminator] METADATA_NAMESPACE = 24, // [distinct, scope, file, name, line, exportSymbols] METADATA_TEMPLATE_TYPE = 25, // [distinct, scope, name, type, ...] METADATA_TEMPLATE_VALUE = 26, // [distinct, scope, name, type, value, ...] METADATA_GLOBAL_VAR = 27, // [distinct, ...] METADATA_LOCAL_VAR = 28, // [distinct, ...] METADATA_EXPRESSION = 29, // [distinct, n x element] METADATA_OBJC_PROPERTY = 30, // [distinct, name, file, line, ...] METADATA_IMPORTED_ENTITY = 31, // [distinct, tag, scope, entity, line, name] METADATA_MODULE = 32, // [distinct, scope, name, ...] METADATA_MACRO = 33, // [distinct, macinfo, line, name, value] METADATA_MACRO_FILE = 34, // [distinct, macinfo, line, file, ...] METADATA_STRINGS = 35, // [count, offset] blob([lengths][chars]) METADATA_GLOBAL_DECL_ATTACHMENT = 36, // [valueid, n x [id, mdnode]] METADATA_GLOBAL_VAR_EXPR = 37, // [distinct, var, expr] METADATA_INDEX_OFFSET = 38, // [offset] METADATA_INDEX = 39, // [bitpos] }; // The constants block (CONSTANTS_BLOCK_ID) describes emission for each // constant and maintains an implicit current type value. enum ConstantsCodes { CST_CODE_SETTYPE = 1, // SETTYPE: [typeid] CST_CODE_NULL = 2, // NULL CST_CODE_UNDEF = 3, // UNDEF CST_CODE_INTEGER = 4, // INTEGER: [intval] CST_CODE_WIDE_INTEGER = 5, // WIDE_INTEGER: [n x intval] CST_CODE_FLOAT = 6, // FLOAT: [fpval] CST_CODE_AGGREGATE = 7, // AGGREGATE: [n x value number] CST_CODE_STRING = 8, // STRING: [values] CST_CODE_CSTRING = 9, // CSTRING: [values] CST_CODE_CE_BINOP = 10, // CE_BINOP: [opcode, opval, opval] CST_CODE_CE_CAST = 11, // CE_CAST: [opcode, opty, opval] CST_CODE_CE_GEP = 12, // CE_GEP: [n x operands] CST_CODE_CE_SELECT = 13, // CE_SELECT: [opval, opval, opval] CST_CODE_CE_EXTRACTELT = 14, // CE_EXTRACTELT: [opty, opval, opval] CST_CODE_CE_INSERTELT = 15, // CE_INSERTELT: [opval, opval, opval] CST_CODE_CE_SHUFFLEVEC = 16, // CE_SHUFFLEVEC: [opval, opval, opval] CST_CODE_CE_CMP = 17, // CE_CMP: [opty, opval, opval, pred] CST_CODE_INLINEASM_OLD = 18, // INLINEASM: [sideeffect|alignstack, // asmstr,conststr] CST_CODE_CE_SHUFVEC_EX = 19, // SHUFVEC_EX: [opty, opval, opval, opval] CST_CODE_CE_INBOUNDS_GEP = 20, // INBOUNDS_GEP: [n x operands] CST_CODE_BLOCKADDRESS = 21, // CST_CODE_BLOCKADDRESS [fnty, fnval, bb#] CST_CODE_DATA = 22, // DATA: [n x elements] CST_CODE_INLINEASM = 23, // INLINEASM: [sideeffect|alignstack| // asmdialect,asmstr,conststr] CST_CODE_CE_GEP_WITH_INRANGE_INDEX = 24, // [opty, flags, n x operands] }; /// CastOpcodes - These are values used in the bitcode files to encode which /// cast a CST_CODE_CE_CAST or a XXX refers to. The values of these enums /// have no fixed relation to the LLVM IR enum values. Changing these will /// break compatibility with old files. enum CastOpcodes { CAST_TRUNC = 0, CAST_ZEXT = 1, CAST_SEXT = 2, CAST_FPTOUI = 3, CAST_FPTOSI = 4, CAST_UITOFP = 5, CAST_SITOFP = 6, CAST_FPTRUNC = 7, CAST_FPEXT = 8, CAST_PTRTOINT = 9, CAST_INTTOPTR = 10, CAST_BITCAST = 11, CAST_ADDRSPACECAST = 12 }; /// BinaryOpcodes - These are values used in the bitcode files to encode which /// binop a CST_CODE_CE_BINOP or a XXX refers to. The values of these enums /// have no fixed relation to the LLVM IR enum values. Changing these will /// break compatibility with old files. enum BinaryOpcodes { BINOP_ADD = 0, BINOP_SUB = 1, BINOP_MUL = 2, BINOP_UDIV = 3, BINOP_SDIV = 4, // overloaded for FP BINOP_UREM = 5, BINOP_SREM = 6, // overloaded for FP BINOP_SHL = 7, BINOP_LSHR = 8, BINOP_ASHR = 9, BINOP_AND = 10, BINOP_OR = 11, BINOP_XOR = 12 }; /// These are values used in the bitcode files to encode AtomicRMW operations. /// The values of these enums have no fixed relation to the LLVM IR enum /// values. Changing these will break compatibility with old files. enum RMWOperations { RMW_XCHG = 0, RMW_ADD = 1, RMW_SUB = 2, RMW_AND = 3, RMW_NAND = 4, RMW_OR = 5, RMW_XOR = 6, RMW_MAX = 7, RMW_MIN = 8, RMW_UMAX = 9, RMW_UMIN = 10 }; /// OverflowingBinaryOperatorOptionalFlags - Flags for serializing /// OverflowingBinaryOperator's SubclassOptionalData contents. enum OverflowingBinaryOperatorOptionalFlags { OBO_NO_UNSIGNED_WRAP = 0, OBO_NO_SIGNED_WRAP = 1 }; +/// FastMath Flags +/// This is a fixed layout derived from the bitcode emitted by LLVM 5.0 +/// intended to decouple the in-memory representation from the serialization. +enum FastMathMap { + UnsafeAlgebra = (1 << 0), // Legacy + NoNaNs = (1 << 1), + NoInfs = (1 << 2), + NoSignedZeros = (1 << 3), + AllowReciprocal = (1 << 4), + AllowContract = (1 << 5), + ApproxFunc = (1 << 6), + AllowReassoc = (1 << 7) +}; + /// PossiblyExactOperatorOptionalFlags - Flags for serializing /// PossiblyExactOperator's SubclassOptionalData contents. enum PossiblyExactOperatorOptionalFlags { PEO_EXACT = 0 }; /// Encoded AtomicOrdering values. enum AtomicOrderingCodes { ORDERING_NOTATOMIC = 0, ORDERING_UNORDERED = 1, ORDERING_MONOTONIC = 2, ORDERING_ACQUIRE = 3, ORDERING_RELEASE = 4, ORDERING_ACQREL = 5, ORDERING_SEQCST = 6 }; /// Markers and flags for call instruction. enum CallMarkersFlags { CALL_TAIL = 0, CALL_CCONV = 1, CALL_MUSTTAIL = 14, CALL_EXPLICIT_TYPE = 15, CALL_NOTAIL = 16, CALL_FMF = 17 // Call has optional fast-math-flags. }; // The function body block (FUNCTION_BLOCK_ID) describes function bodies. It // can contain a constant block (CONSTANTS_BLOCK_ID). enum FunctionCodes { FUNC_CODE_DECLAREBLOCKS = 1, // DECLAREBLOCKS: [n] FUNC_CODE_INST_BINOP = 2, // BINOP: [opcode, ty, opval, opval] FUNC_CODE_INST_CAST = 3, // CAST: [opcode, ty, opty, opval] FUNC_CODE_INST_GEP_OLD = 4, // GEP: [n x operands] FUNC_CODE_INST_SELECT = 5, // SELECT: [ty, opval, opval, opval] FUNC_CODE_INST_EXTRACTELT = 6, // EXTRACTELT: [opty, opval, opval] FUNC_CODE_INST_INSERTELT = 7, // INSERTELT: [ty, opval, opval, opval] FUNC_CODE_INST_SHUFFLEVEC = 8, // SHUFFLEVEC: [ty, opval, opval, opval] FUNC_CODE_INST_CMP = 9, // CMP: [opty, opval, opval, pred] FUNC_CODE_INST_RET = 10, // RET: [opty,opval] FUNC_CODE_INST_BR = 11, // BR: [bb#, bb#, cond] or [bb#] FUNC_CODE_INST_SWITCH = 12, // SWITCH: [opty, op0, op1, ...] FUNC_CODE_INST_INVOKE = 13, // INVOKE: [attr, fnty, op0,op1, ...] // 14 is unused. FUNC_CODE_INST_UNREACHABLE = 15, // UNREACHABLE FUNC_CODE_INST_PHI = 16, // PHI: [ty, val0,bb0, ...] // 17 is unused. // 18 is unused. FUNC_CODE_INST_ALLOCA = 19, // ALLOCA: [instty, opty, op, align] FUNC_CODE_INST_LOAD = 20, // LOAD: [opty, op, align, vol] // 21 is unused. // 22 is unused. FUNC_CODE_INST_VAARG = 23, // VAARG: [valistty, valist, instty] // This store code encodes the pointer type, rather than the value type // this is so information only available in the pointer type (e.g. address // spaces) is retained. FUNC_CODE_INST_STORE_OLD = 24, // STORE: [ptrty,ptr,val, align, vol] // 25 is unused. FUNC_CODE_INST_EXTRACTVAL = 26, // EXTRACTVAL: [n x operands] FUNC_CODE_INST_INSERTVAL = 27, // INSERTVAL: [n x operands] // fcmp/icmp returning Int1TY or vector of Int1Ty. Same as CMP, exists to // support legacy vicmp/vfcmp instructions. FUNC_CODE_INST_CMP2 = 28, // CMP2: [opty, opval, opval, pred] // new select on i1 or [N x i1] FUNC_CODE_INST_VSELECT = 29, // VSELECT: [ty,opval,opval,predty,pred] FUNC_CODE_INST_INBOUNDS_GEP_OLD = 30, // INBOUNDS_GEP: [n x operands] FUNC_CODE_INST_INDIRECTBR = 31, // INDIRECTBR: [opty, op0, op1, ...] // 32 is unused. FUNC_CODE_DEBUG_LOC_AGAIN = 33, // DEBUG_LOC_AGAIN FUNC_CODE_INST_CALL = 34, // CALL: [attr, cc, fnty, fnid, args...] FUNC_CODE_DEBUG_LOC = 35, // DEBUG_LOC: [Line,Col,ScopeVal, IAVal] FUNC_CODE_INST_FENCE = 36, // FENCE: [ordering, synchscope] FUNC_CODE_INST_CMPXCHG_OLD = 37, // CMPXCHG: [ptrty,ptr,cmp,new, align, vol, // ordering, synchscope] FUNC_CODE_INST_ATOMICRMW = 38, // ATOMICRMW: [ptrty,ptr,val, operation, // align, vol, // ordering, synchscope] FUNC_CODE_INST_RESUME = 39, // RESUME: [opval] FUNC_CODE_INST_LANDINGPAD_OLD = 40, // LANDINGPAD: [ty,val,val,num,id0,val0...] FUNC_CODE_INST_LOADATOMIC = 41, // LOAD: [opty, op, align, vol, // ordering, synchscope] FUNC_CODE_INST_STOREATOMIC_OLD = 42, // STORE: [ptrty,ptr,val, align, vol // ordering, synchscope] FUNC_CODE_INST_GEP = 43, // GEP: [inbounds, n x operands] FUNC_CODE_INST_STORE = 44, // STORE: [ptrty,ptr,valty,val, align, vol] FUNC_CODE_INST_STOREATOMIC = 45, // STORE: [ptrty,ptr,val, align, vol FUNC_CODE_INST_CMPXCHG = 46, // CMPXCHG: [ptrty,ptr,valty,cmp,new, align, // vol,ordering,synchscope] FUNC_CODE_INST_LANDINGPAD = 47, // LANDINGPAD: [ty,val,num,id0,val0...] FUNC_CODE_INST_CLEANUPRET = 48, // CLEANUPRET: [val] or [val,bb#] FUNC_CODE_INST_CATCHRET = 49, // CATCHRET: [val,bb#] FUNC_CODE_INST_CATCHPAD = 50, // CATCHPAD: [bb#,bb#,num,args...] FUNC_CODE_INST_CLEANUPPAD = 51, // CLEANUPPAD: [num,args...] FUNC_CODE_INST_CATCHSWITCH = 52, // CATCHSWITCH: [num,args...] or [num,args...,bb] // 53 is unused. // 54 is unused. FUNC_CODE_OPERAND_BUNDLE = 55, // OPERAND_BUNDLE: [tag#, value...] }; enum UseListCodes { USELIST_CODE_DEFAULT = 1, // DEFAULT: [index..., value-id] USELIST_CODE_BB = 2 // BB: [index..., bb-id] }; enum AttributeKindCodes { // = 0 is unused ATTR_KIND_ALIGNMENT = 1, ATTR_KIND_ALWAYS_INLINE = 2, ATTR_KIND_BY_VAL = 3, ATTR_KIND_INLINE_HINT = 4, ATTR_KIND_IN_REG = 5, ATTR_KIND_MIN_SIZE = 6, ATTR_KIND_NAKED = 7, ATTR_KIND_NEST = 8, ATTR_KIND_NO_ALIAS = 9, ATTR_KIND_NO_BUILTIN = 10, ATTR_KIND_NO_CAPTURE = 11, ATTR_KIND_NO_DUPLICATE = 12, ATTR_KIND_NO_IMPLICIT_FLOAT = 13, ATTR_KIND_NO_INLINE = 14, ATTR_KIND_NON_LAZY_BIND = 15, ATTR_KIND_NO_RED_ZONE = 16, ATTR_KIND_NO_RETURN = 17, ATTR_KIND_NO_UNWIND = 18, ATTR_KIND_OPTIMIZE_FOR_SIZE = 19, ATTR_KIND_READ_NONE = 20, ATTR_KIND_READ_ONLY = 21, ATTR_KIND_RETURNED = 22, ATTR_KIND_RETURNS_TWICE = 23, ATTR_KIND_S_EXT = 24, ATTR_KIND_STACK_ALIGNMENT = 25, ATTR_KIND_STACK_PROTECT = 26, ATTR_KIND_STACK_PROTECT_REQ = 27, ATTR_KIND_STACK_PROTECT_STRONG = 28, ATTR_KIND_STRUCT_RET = 29, ATTR_KIND_SANITIZE_ADDRESS = 30, ATTR_KIND_SANITIZE_THREAD = 31, ATTR_KIND_SANITIZE_MEMORY = 32, ATTR_KIND_UW_TABLE = 33, ATTR_KIND_Z_EXT = 34, ATTR_KIND_BUILTIN = 35, ATTR_KIND_COLD = 36, ATTR_KIND_OPTIMIZE_NONE = 37, ATTR_KIND_IN_ALLOCA = 38, ATTR_KIND_NON_NULL = 39, ATTR_KIND_JUMP_TABLE = 40, ATTR_KIND_DEREFERENCEABLE = 41, ATTR_KIND_DEREFERENCEABLE_OR_NULL = 42, ATTR_KIND_CONVERGENT = 43, ATTR_KIND_SAFESTACK = 44, ATTR_KIND_ARGMEMONLY = 45, ATTR_KIND_SWIFT_SELF = 46, ATTR_KIND_SWIFT_ERROR = 47, ATTR_KIND_NO_RECURSE = 48, ATTR_KIND_INACCESSIBLEMEM_ONLY = 49, ATTR_KIND_INACCESSIBLEMEM_OR_ARGMEMONLY = 50, ATTR_KIND_ALLOC_SIZE = 51, ATTR_KIND_WRITEONLY = 52, ATTR_KIND_SPECULATABLE = 53, ATTR_KIND_STRICT_FP = 54, ATTR_KIND_SANITIZE_HWADDRESS = 55, }; enum ComdatSelectionKindCodes { COMDAT_SELECTION_KIND_ANY = 1, COMDAT_SELECTION_KIND_EXACT_MATCH = 2, COMDAT_SELECTION_KIND_LARGEST = 3, COMDAT_SELECTION_KIND_NO_DUPLICATES = 4, COMDAT_SELECTION_KIND_SAME_SIZE = 5, }; enum StrtabCodes { STRTAB_BLOB = 1, }; enum SymtabCodes { SYMTAB_BLOB = 1, }; } // End bitc namespace } // End llvm namespace #endif Index: head/contrib/llvm/include/llvm/MC/MCAsmMacro.h =================================================================== --- head/contrib/llvm/include/llvm/MC/MCAsmMacro.h (revision 329982) +++ head/contrib/llvm/include/llvm/MC/MCAsmMacro.h (revision 329983) @@ -1,38 +1,38 @@ //===- MCAsmMacro.h - Assembly Macros ---------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #ifndef LLVM_MC_MCASMMACRO_H #define LLVM_MC_MCASMMACRO_H #include "llvm/MC/MCParser/MCAsmLexer.h" namespace llvm { struct MCAsmMacroParameter { StringRef Name; std::vector Value; bool Required = false; bool Vararg = false; MCAsmMacroParameter() = default; }; typedef std::vector MCAsmMacroParameters; struct MCAsmMacro { StringRef Name; StringRef Body; MCAsmMacroParameters Parameters; public: MCAsmMacro(StringRef N, StringRef B, MCAsmMacroParameters P) : Name(N), Body(B), Parameters(std::move(P)) {} }; -}; // namespace llvm +} // namespace llvm #endif Index: head/contrib/llvm/include/llvm/Transforms/Utils/LoopUtils.h =================================================================== --- head/contrib/llvm/include/llvm/Transforms/Utils/LoopUtils.h (revision 329982) +++ head/contrib/llvm/include/llvm/Transforms/Utils/LoopUtils.h (revision 329983) @@ -1,575 +1,568 @@ //===- llvm/Transforms/Utils/LoopUtils.h - Loop utilities -------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file defines some loop transformation utilities. // //===----------------------------------------------------------------------===// #ifndef LLVM_TRANSFORMS_UTILS_LOOPUTILS_H #define LLVM_TRANSFORMS_UTILS_LOOPUTILS_H #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/DemandedBits.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Operator.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Casting.h" namespace llvm { class AliasSet; class AliasSetTracker; class BasicBlock; class DataLayout; class Loop; class LoopInfo; class OptimizationRemarkEmitter; class PredicatedScalarEvolution; class PredIteratorCache; class ScalarEvolution; class SCEV; class TargetLibraryInfo; class TargetTransformInfo; /// \brief Captures loop safety information. /// It keep information for loop & its header may throw exception. struct LoopSafetyInfo { bool MayThrow = false; // The current loop contains an instruction which // may throw. bool HeaderMayThrow = false; // Same as previous, but specific to loop header // Used to update funclet bundle operands. DenseMap BlockColors; LoopSafetyInfo() = default; }; /// The RecurrenceDescriptor is used to identify recurrences variables in a /// loop. Reduction is a special case of recurrence that has uses of the /// recurrence variable outside the loop. The method isReductionPHI identifies /// reductions that are basic recurrences. /// /// Basic recurrences are defined as the summation, product, OR, AND, XOR, min, /// or max of a set of terms. For example: for(i=0; i &CI) : StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK), UnsafeAlgebraInst(UAI), RecurrenceType(RT), IsSigned(Signed) { CastInsts.insert(CI.begin(), CI.end()); } /// This POD struct holds information about a potential recurrence operation. class InstDesc { public: InstDesc(bool IsRecur, Instruction *I, Instruction *UAI = nullptr) : IsRecurrence(IsRecur), PatternLastInst(I), MinMaxKind(MRK_Invalid), UnsafeAlgebraInst(UAI) {} InstDesc(Instruction *I, MinMaxRecurrenceKind K, Instruction *UAI = nullptr) : IsRecurrence(true), PatternLastInst(I), MinMaxKind(K), UnsafeAlgebraInst(UAI) {} bool isRecurrence() { return IsRecurrence; } bool hasUnsafeAlgebra() { return UnsafeAlgebraInst != nullptr; } Instruction *getUnsafeAlgebraInst() { return UnsafeAlgebraInst; } MinMaxRecurrenceKind getMinMaxKind() { return MinMaxKind; } Instruction *getPatternInst() { return PatternLastInst; } private: // Is this instruction a recurrence candidate. bool IsRecurrence; // The last instruction in a min/max pattern (select of the select(icmp()) // pattern), or the current recurrence instruction otherwise. Instruction *PatternLastInst; // If this is a min/max pattern the comparison predicate. MinMaxRecurrenceKind MinMaxKind; // Recurrence has unsafe algebra. Instruction *UnsafeAlgebraInst; }; /// Returns a struct describing if the instruction 'I' can be a recurrence /// variable of type 'Kind'. If the recurrence is a min/max pattern of /// select(icmp()) this function advances the instruction pointer 'I' from the /// compare instruction to the select instruction and stores this pointer in /// 'PatternLastInst' member of the returned struct. static InstDesc isRecurrenceInstr(Instruction *I, RecurrenceKind Kind, InstDesc &Prev, bool HasFunNoNaNAttr); /// Returns true if instruction I has multiple uses in Insts static bool hasMultipleUsesOf(Instruction *I, SmallPtrSetImpl &Insts); /// Returns true if all uses of the instruction I is within the Set. static bool areAllUsesIn(Instruction *I, SmallPtrSetImpl &Set); /// Returns a struct describing if the instruction if the instruction is a /// Select(ICmp(X, Y), X, Y) instruction pattern corresponding to a min(X, Y) /// or max(X, Y). static InstDesc isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev); /// Returns identity corresponding to the RecurrenceKind. static Constant *getRecurrenceIdentity(RecurrenceKind K, Type *Tp); /// Returns the opcode of binary operation corresponding to the /// RecurrenceKind. static unsigned getRecurrenceBinOp(RecurrenceKind Kind); /// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind. static Value *createMinMaxOp(IRBuilder<> &Builder, MinMaxRecurrenceKind RK, Value *Left, Value *Right); /// Returns true if Phi is a reduction of type Kind and adds it to the - /// RecurrenceDescriptor. + /// RecurrenceDescriptor. If either \p DB is non-null or \p AC and \p DT are + /// non-null, the minimal bit width needed to compute the reduction will be + /// computed. static bool AddReductionVar(PHINode *Phi, RecurrenceKind Kind, Loop *TheLoop, bool HasFunNoNaNAttr, - RecurrenceDescriptor &RedDes); + RecurrenceDescriptor &RedDes, + DemandedBits *DB = nullptr, + AssumptionCache *AC = nullptr, + DominatorTree *DT = nullptr); - /// Returns true if Phi is a reduction in TheLoop. The RecurrenceDescriptor is - /// returned in RedDes. + /// Returns true if Phi is a reduction in TheLoop. The RecurrenceDescriptor + /// is returned in RedDes. If either \p DB is non-null or \p AC and \p DT are + /// non-null, the minimal bit width needed to compute the reduction will be + /// computed. static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, - RecurrenceDescriptor &RedDes); + RecurrenceDescriptor &RedDes, + DemandedBits *DB = nullptr, + AssumptionCache *AC = nullptr, + DominatorTree *DT = nullptr); /// Returns true if Phi is a first-order recurrence. A first-order recurrence /// is a non-reduction recurrence relation in which the value of the /// recurrence in the current loop iteration equals a value defined in the /// previous iteration. \p SinkAfter includes pairs of instructions where the /// first will be rescheduled to appear after the second if/when the loop is /// vectorized. It may be augmented with additional pairs if needed in order /// to handle Phi as a first-order recurrence. static bool isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop, DenseMap &SinkAfter, DominatorTree *DT); RecurrenceKind getRecurrenceKind() { return Kind; } MinMaxRecurrenceKind getMinMaxRecurrenceKind() { return MinMaxKind; } TrackingVH getRecurrenceStartValue() { return StartValue; } Instruction *getLoopExitInstr() { return LoopExitInstr; } /// Returns true if the recurrence has unsafe algebra which requires a relaxed /// floating-point model. bool hasUnsafeAlgebra() { return UnsafeAlgebraInst != nullptr; } /// Returns first unsafe algebra instruction in the PHI node's use-chain. Instruction *getUnsafeAlgebraInst() { return UnsafeAlgebraInst; } /// Returns true if the recurrence kind is an integer kind. static bool isIntegerRecurrenceKind(RecurrenceKind Kind); /// Returns true if the recurrence kind is a floating point kind. static bool isFloatingPointRecurrenceKind(RecurrenceKind Kind); /// Returns true if the recurrence kind is an arithmetic kind. static bool isArithmeticRecurrenceKind(RecurrenceKind Kind); - - /// Determines if Phi may have been type-promoted. If Phi has a single user - /// that ANDs the Phi with a type mask, return the user. RT is updated to - /// account for the narrower bit width represented by the mask, and the AND - /// instruction is added to CI. - static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT, - SmallPtrSetImpl &Visited, - SmallPtrSetImpl &CI); - - /// Returns true if all the source operands of a recurrence are either - /// SExtInsts or ZExtInsts. This function is intended to be used with - /// lookThroughAnd to determine if the recurrence has been type-promoted. The - /// source operands are added to CI, and IsSigned is updated to indicate if - /// all source operands are SExtInsts. - static bool getSourceExtensionKind(Instruction *Start, Instruction *Exit, - Type *RT, bool &IsSigned, - SmallPtrSetImpl &Visited, - SmallPtrSetImpl &CI); /// Returns the type of the recurrence. This type can be narrower than the /// actual type of the Phi if the recurrence has been type-promoted. Type *getRecurrenceType() { return RecurrenceType; } /// Returns a reference to the instructions used for type-promoting the /// recurrence. SmallPtrSet &getCastInsts() { return CastInsts; } /// Returns true if all source operands of the recurrence are SExtInsts. bool isSigned() { return IsSigned; } private: // The starting value of the recurrence. // It does not have to be zero! TrackingVH StartValue; // The instruction who's value is used outside the loop. Instruction *LoopExitInstr = nullptr; // The kind of the recurrence. RecurrenceKind Kind = RK_NoRecurrence; // If this a min/max recurrence the kind of recurrence. MinMaxRecurrenceKind MinMaxKind = MRK_Invalid; // First occurrence of unasfe algebra in the PHI's use-chain. Instruction *UnsafeAlgebraInst = nullptr; // The type of the recurrence. Type *RecurrenceType = nullptr; // True if all source operands of the recurrence are SExtInsts. bool IsSigned = false; // Instructions used for type-promoting the recurrence. SmallPtrSet CastInsts; }; /// A struct for saving information about induction variables. class InductionDescriptor { public: /// This enum represents the kinds of inductions that we support. enum InductionKind { IK_NoInduction, ///< Not an induction variable. IK_IntInduction, ///< Integer induction variable. Step = C. IK_PtrInduction, ///< Pointer induction var. Step = C / sizeof(elem). IK_FpInduction ///< Floating point induction variable. }; public: /// Default constructor - creates an invalid induction. InductionDescriptor() = default; /// Get the consecutive direction. Returns: /// 0 - unknown or non-consecutive. /// 1 - consecutive and increasing. /// -1 - consecutive and decreasing. int getConsecutiveDirection() const; /// Compute the transformed value of Index at offset StartValue using step /// StepValue. /// For integer induction, returns StartValue + Index * StepValue. /// For pointer induction, returns StartValue[Index * StepValue]. /// FIXME: The newly created binary instructions should contain nsw/nuw /// flags, which can be found from the original scalar operations. Value *transform(IRBuilder<> &B, Value *Index, ScalarEvolution *SE, const DataLayout& DL) const; Value *getStartValue() const { return StartValue; } InductionKind getKind() const { return IK; } const SCEV *getStep() const { return Step; } ConstantInt *getConstIntStepValue() const; /// Returns true if \p Phi is an induction in the loop \p L. If \p Phi is an /// induction, the induction descriptor \p D will contain the data describing /// this induction. If by some other means the caller has a better SCEV /// expression for \p Phi than the one returned by the ScalarEvolution /// analysis, it can be passed through \p Expr. If the def-use chain /// associated with the phi includes casts (that we know we can ignore /// under proper runtime checks), they are passed through \p CastsToIgnore. static bool isInductionPHI(PHINode *Phi, const Loop* L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr = nullptr, SmallVectorImpl *CastsToIgnore = nullptr); /// Returns true if \p Phi is a floating point induction in the loop \p L. /// If \p Phi is an induction, the induction descriptor \p D will contain /// the data describing this induction. static bool isFPInductionPHI(PHINode *Phi, const Loop* L, ScalarEvolution *SE, InductionDescriptor &D); /// Returns true if \p Phi is a loop \p L induction, in the context associated /// with the run-time predicate of PSE. If \p Assume is true, this can add /// further SCEV predicates to \p PSE in order to prove that \p Phi is an /// induction. /// If \p Phi is an induction, \p D will contain the data describing this /// induction. static bool isInductionPHI(PHINode *Phi, const Loop* L, PredicatedScalarEvolution &PSE, InductionDescriptor &D, bool Assume = false); /// Returns true if the induction type is FP and the binary operator does /// not have the "fast-math" property. Such operation requires a relaxed FP /// mode. bool hasUnsafeAlgebra() { return InductionBinOp && !cast(InductionBinOp)->isFast(); } /// Returns induction operator that does not have "fast-math" property /// and requires FP unsafe mode. Instruction *getUnsafeAlgebraInst() { if (!InductionBinOp || cast(InductionBinOp)->isFast()) return nullptr; return InductionBinOp; } /// Returns binary opcode of the induction operator. Instruction::BinaryOps getInductionOpcode() const { return InductionBinOp ? InductionBinOp->getOpcode() : Instruction::BinaryOpsEnd; } /// Returns a reference to the type cast instructions in the induction /// update chain, that are redundant when guarded with a runtime /// SCEV overflow check. const SmallVectorImpl &getCastInsts() const { return RedundantCasts; } private: /// Private constructor - used by \c isInductionPHI. InductionDescriptor(Value *Start, InductionKind K, const SCEV *Step, BinaryOperator *InductionBinOp = nullptr, SmallVectorImpl *Casts = nullptr); /// Start value. TrackingVH StartValue; /// Induction kind. InductionKind IK = IK_NoInduction; /// Step value. const SCEV *Step = nullptr; // Instruction that advances induction variable. BinaryOperator *InductionBinOp = nullptr; // Instructions used for type-casts of the induction variable, // that are redundant when guarded with a runtime SCEV overflow check. SmallVector RedundantCasts; }; BasicBlock *InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA); /// Ensure that all exit blocks of the loop are dedicated exits. /// /// For any loop exit block with non-loop predecessors, we split the loop /// predecessors to use a dedicated loop exit block. We update the dominator /// tree and loop info if provided, and will preserve LCSSA if requested. bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA); /// Ensures LCSSA form for every instruction from the Worklist in the scope of /// innermost containing loop. /// /// For the given instruction which have uses outside of the loop, an LCSSA PHI /// node is inserted and the uses outside the loop are rewritten to use this /// node. /// /// LoopInfo and DominatorTree are required and, since the routine makes no /// changes to CFG, preserved. /// /// Returns true if any modifications are made. bool formLCSSAForInstructions(SmallVectorImpl &Worklist, DominatorTree &DT, LoopInfo &LI); /// \brief Put loop into LCSSA form. /// /// Looks at all instructions in the loop which have uses outside of the /// current loop. For each, an LCSSA PHI node is inserted and the uses outside /// the loop are rewritten to use this node. /// /// LoopInfo and DominatorTree are required and preserved. /// /// If ScalarEvolution is passed in, it will be preserved. /// /// Returns true if any modifications are made to the loop. bool formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE); /// \brief Put a loop nest into LCSSA form. /// /// This recursively forms LCSSA for a loop nest. /// /// LoopInfo and DominatorTree are required and preserved. /// /// If ScalarEvolution is passed in, it will be preserved. /// /// Returns true if any modifications are made to the loop. bool formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE); /// \brief Walk the specified region of the CFG (defined by all blocks /// dominated by the specified block, and that are in the current loop) in /// reverse depth first order w.r.t the DominatorTree. This allows us to visit /// uses before definitions, allowing us to sink a loop body in one pass without /// iteration. Takes DomTreeNode, AliasAnalysis, LoopInfo, DominatorTree, /// DataLayout, TargetLibraryInfo, Loop, AliasSet information for all /// instructions of the loop and loop safety information as /// arguments. Diagnostics is emitted via \p ORE. It returns changed status. bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *, TargetLibraryInfo *, TargetTransformInfo *, Loop *, AliasSetTracker *, LoopSafetyInfo *, OptimizationRemarkEmitter *ORE); /// \brief Walk the specified region of the CFG (defined by all blocks /// dominated by the specified block, and that are in the current loop) in depth /// first order w.r.t the DominatorTree. This allows us to visit definitions /// before uses, allowing us to hoist a loop body in one pass without iteration. /// Takes DomTreeNode, AliasAnalysis, LoopInfo, DominatorTree, DataLayout, /// TargetLibraryInfo, Loop, AliasSet information for all instructions of the /// loop and loop safety information as arguments. Diagnostics is emitted via \p /// ORE. It returns changed status. bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *, TargetLibraryInfo *, Loop *, AliasSetTracker *, LoopSafetyInfo *, OptimizationRemarkEmitter *ORE); /// This function deletes dead loops. The caller of this function needs to /// guarantee that the loop is infact dead. /// The function requires a bunch or prerequisites to be present: /// - The loop needs to be in LCSSA form /// - The loop needs to have a Preheader /// - A unique dedicated exit block must exist /// /// This also updates the relevant analysis information in \p DT, \p SE, and \p /// LI if pointers to those are provided. /// It also updates the loop PM if an updater struct is provided. void deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, LoopInfo *LI); /// \brief Try to promote memory values to scalars by sinking stores out of /// the loop and moving loads to before the loop. We do this by looping over /// the stores in the loop, looking for stores to Must pointers which are /// loop invariant. It takes a set of must-alias values, Loop exit blocks /// vector, loop exit blocks insertion point vector, PredIteratorCache, /// LoopInfo, DominatorTree, Loop, AliasSet information for all instructions /// of the loop and loop safety information as arguments. /// Diagnostics is emitted via \p ORE. It returns changed status. bool promoteLoopAccessesToScalars(const SmallSetVector &, SmallVectorImpl &, SmallVectorImpl &, PredIteratorCache &, LoopInfo *, DominatorTree *, const TargetLibraryInfo *, Loop *, AliasSetTracker *, LoopSafetyInfo *, OptimizationRemarkEmitter *); /// Does a BFS from a given node to all of its children inside a given loop. /// The returned vector of nodes includes the starting point. SmallVector collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop); /// \brief Computes safety information for a loop /// checks loop body & header for the possibility of may throw /// exception, it takes LoopSafetyInfo and loop as argument. /// Updates safety information in LoopSafetyInfo argument. void computeLoopSafetyInfo(LoopSafetyInfo *, Loop *); /// Returns true if the instruction in a loop is guaranteed to execute at least /// once. bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo); /// \brief Returns the instructions that use values defined in the loop. SmallVector findDefsUsedOutsideOfLoop(Loop *L); /// \brief Find string metadata for loop /// /// If it has a value (e.g. {"llvm.distribute", 1} return the value as an /// operand or null otherwise. If the string metadata is not found return /// Optional's not-a-value. Optional findStringMetadataForLoop(Loop *TheLoop, StringRef Name); /// \brief Set input string into loop metadata by keeping other values intact. void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, unsigned V = 0); /// \brief Get a loop's estimated trip count based on branch weight metadata. /// Returns 0 when the count is estimated to be 0, or None when a meaningful /// estimate can not be made. Optional getLoopEstimatedTripCount(Loop *L); /// Helper to consistently add the set of standard passes to a loop pass's \c /// AnalysisUsage. /// /// All loop passes should call this as part of implementing their \c /// getAnalysisUsage. void getLoopAnalysisUsage(AnalysisUsage &AU); /// Returns true if the hoister and sinker can handle this instruction. /// If SafetyInfo is null, we are checking for sinking instructions from /// preheader to loop body (no speculation). /// If SafetyInfo is not null, we are checking for hoisting/sinking /// instructions from loop body to preheader/exit. Check if the instruction /// can execute speculatively. /// If \p ORE is set use it to emit optimization remarks. bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, Loop *CurLoop, AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE = nullptr); /// Generates a vector reduction using shufflevectors to reduce the value. Value *getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op, RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind = RecurrenceDescriptor::MRK_Invalid, ArrayRef RedOps = ArrayRef()); /// Create a target reduction of the given vector. The reduction operation /// is described by the \p Opcode parameter. min/max reductions require /// additional information supplied in \p Flags. /// The target is queried to determine if intrinsics or shuffle sequences are /// required to implement the reduction. Value * createSimpleTargetReduction(IRBuilder<> &B, const TargetTransformInfo *TTI, unsigned Opcode, Value *Src, TargetTransformInfo::ReductionFlags Flags = TargetTransformInfo::ReductionFlags(), ArrayRef RedOps = ArrayRef()); /// Create a generic target reduction using a recurrence descriptor \p Desc /// The target is queried to determine if intrinsics or shuffle sequences are /// required to implement the reduction. Value *createTargetReduction(IRBuilder<> &B, const TargetTransformInfo *TTI, RecurrenceDescriptor &Desc, Value *Src, bool NoNaN = false); /// Get the intersection (logical and) of all of the potential IR flags /// of each scalar operation (VL) that will be converted into a vector (I). /// If OpValue is non-null, we only consider operations similar to OpValue /// when intersecting. /// Flag set: NSW, NUW, exact, and all of fast-math. void propagateIRFlags(Value *I, ArrayRef VL, Value *OpValue = nullptr); } // end namespace llvm #endif // LLVM_TRANSFORMS_UTILS_LOOPUTILS_H Index: head/contrib/llvm/lib/Analysis/ScalarEvolution.cpp =================================================================== --- head/contrib/llvm/lib/Analysis/ScalarEvolution.cpp (revision 329982) +++ head/contrib/llvm/lib/Analysis/ScalarEvolution.cpp (revision 329983) @@ -1,11781 +1,11788 @@ //===- ScalarEvolution.cpp - Scalar Evolution Analysis --------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file contains the implementation of the scalar evolution analysis // engine, which is used primarily to analyze expressions involving induction // variables in loops. // // There are several aspects to this library. First is the representation of // scalar expressions, which are represented as subclasses of the SCEV class. // These classes are used to represent certain types of subexpressions that we // can handle. We only create one SCEV of a particular shape, so // pointer-comparisons for equality are legal. // // One important aspect of the SCEV objects is that they are never cyclic, even // if there is a cycle in the dataflow for an expression (ie, a PHI node). If // the PHI node is one of the idioms that we can represent (e.g., a polynomial // recurrence) then we represent it directly as a recurrence node, otherwise we // represent it as a SCEVUnknown node. // // In addition to being able to represent expressions of various types, we also // have folders that are used to build the *canonical* representation for a // particular expression. These folders are capable of using a variety of // rewrite rules to simplify the expressions. // // Once the folders are defined, we can implement the more interesting // higher-level code, such as the code that recognizes PHI nodes of various // types, computes the execution count of a loop, etc. // // TODO: We should use these routines and value representations to implement // dependence analysis! // //===----------------------------------------------------------------------===// // // There are several good references for the techniques used in this analysis. // // Chains of recurrences -- a method to expedite the evaluation // of closed-form functions // Olaf Bachmann, Paul S. Wang, Eugene V. Zima // // On computational properties of chains of recurrences // Eugene V. Zima // // Symbolic Evaluation of Chains of Recurrences for Loop Optimization // Robert A. van Engelen // // Efficient Symbolic Analysis for Optimizing Compilers // Robert A. van Engelen // // Using the chains of recurrences algebra for data dependence testing and // induction variable substitution // MS Thesis, Johnie Birch // //===----------------------------------------------------------------------===// #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/EquivalenceClasses.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/Sequence.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Argument.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constant.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/SaveAndRestore.h" #include "llvm/Support/raw_ostream.h" #include #include #include #include #include #include #include #include #include #include #include using namespace llvm; #define DEBUG_TYPE "scalar-evolution" STATISTIC(NumArrayLenItCounts, "Number of trip counts computed with array length"); STATISTIC(NumTripCountsComputed, "Number of loops with predictable loop counts"); STATISTIC(NumTripCountsNotComputed, "Number of loops without predictable loop counts"); STATISTIC(NumBruteForceTripCountsComputed, "Number of loops with trip counts computed by force"); static cl::opt MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden, cl::desc("Maximum number of iterations SCEV will " "symbolically execute a constant " "derived loop"), cl::init(100)); // FIXME: Enable this with EXPENSIVE_CHECKS when the test suite is clean. static cl::opt VerifySCEV( "verify-scev", cl::Hidden, cl::desc("Verify ScalarEvolution's backedge taken counts (slow)")); static cl::opt VerifySCEVMap("verify-scev-maps", cl::Hidden, cl::desc("Verify no dangling value in ScalarEvolution's " "ExprValueMap (slow)")); static cl::opt MulOpsInlineThreshold( "scev-mulops-inline-threshold", cl::Hidden, cl::desc("Threshold for inlining multiplication operands into a SCEV"), cl::init(32)); static cl::opt AddOpsInlineThreshold( "scev-addops-inline-threshold", cl::Hidden, cl::desc("Threshold for inlining addition operands into a SCEV"), cl::init(500)); static cl::opt MaxSCEVCompareDepth( "scalar-evolution-max-scev-compare-depth", cl::Hidden, cl::desc("Maximum depth of recursive SCEV complexity comparisons"), cl::init(32)); static cl::opt MaxSCEVOperationsImplicationDepth( "scalar-evolution-max-scev-operations-implication-depth", cl::Hidden, cl::desc("Maximum depth of recursive SCEV operations implication analysis"), cl::init(2)); static cl::opt MaxValueCompareDepth( "scalar-evolution-max-value-compare-depth", cl::Hidden, cl::desc("Maximum depth of recursive value complexity comparisons"), cl::init(2)); static cl::opt MaxArithDepth("scalar-evolution-max-arith-depth", cl::Hidden, cl::desc("Maximum depth of recursive arithmetics"), cl::init(32)); static cl::opt MaxConstantEvolvingDepth( "scalar-evolution-max-constant-evolving-depth", cl::Hidden, cl::desc("Maximum depth of recursive constant evolving"), cl::init(32)); static cl::opt MaxExtDepth("scalar-evolution-max-ext-depth", cl::Hidden, cl::desc("Maximum depth of recursive SExt/ZExt"), cl::init(8)); static cl::opt MaxAddRecSize("scalar-evolution-max-add-rec-size", cl::Hidden, cl::desc("Max coefficients in AddRec during evolving"), cl::init(16)); +static cl::opt VersionUnknown( + "scev-version-unknown", cl::Hidden, + cl::desc("Use predicated scalar evolution to version SCEVUnknowns"), + cl::init(false)); + //===----------------------------------------------------------------------===// // SCEV class definitions //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // Implementation of the SCEV class. // #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void SCEV::dump() const { print(dbgs()); dbgs() << '\n'; } #endif void SCEV::print(raw_ostream &OS) const { switch (static_cast(getSCEVType())) { case scConstant: cast(this)->getValue()->printAsOperand(OS, false); return; case scTruncate: { const SCEVTruncateExpr *Trunc = cast(this); const SCEV *Op = Trunc->getOperand(); OS << "(trunc " << *Op->getType() << " " << *Op << " to " << *Trunc->getType() << ")"; return; } case scZeroExtend: { const SCEVZeroExtendExpr *ZExt = cast(this); const SCEV *Op = ZExt->getOperand(); OS << "(zext " << *Op->getType() << " " << *Op << " to " << *ZExt->getType() << ")"; return; } case scSignExtend: { const SCEVSignExtendExpr *SExt = cast(this); const SCEV *Op = SExt->getOperand(); OS << "(sext " << *Op->getType() << " " << *Op << " to " << *SExt->getType() << ")"; return; } case scAddRecExpr: { const SCEVAddRecExpr *AR = cast(this); OS << "{" << *AR->getOperand(0); for (unsigned i = 1, e = AR->getNumOperands(); i != e; ++i) OS << ",+," << *AR->getOperand(i); OS << "}<"; if (AR->hasNoUnsignedWrap()) OS << "nuw><"; if (AR->hasNoSignedWrap()) OS << "nsw><"; if (AR->hasNoSelfWrap() && !AR->getNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW))) OS << "nw><"; AR->getLoop()->getHeader()->printAsOperand(OS, /*PrintType=*/false); OS << ">"; return; } case scAddExpr: case scMulExpr: case scUMaxExpr: case scSMaxExpr: { const SCEVNAryExpr *NAry = cast(this); const char *OpStr = nullptr; switch (NAry->getSCEVType()) { case scAddExpr: OpStr = " + "; break; case scMulExpr: OpStr = " * "; break; case scUMaxExpr: OpStr = " umax "; break; case scSMaxExpr: OpStr = " smax "; break; } OS << "("; for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); I != E; ++I) { OS << **I; if (std::next(I) != E) OS << OpStr; } OS << ")"; switch (NAry->getSCEVType()) { case scAddExpr: case scMulExpr: if (NAry->hasNoUnsignedWrap()) OS << ""; if (NAry->hasNoSignedWrap()) OS << ""; } return; } case scUDivExpr: { const SCEVUDivExpr *UDiv = cast(this); OS << "(" << *UDiv->getLHS() << " /u " << *UDiv->getRHS() << ")"; return; } case scUnknown: { const SCEVUnknown *U = cast(this); Type *AllocTy; if (U->isSizeOf(AllocTy)) { OS << "sizeof(" << *AllocTy << ")"; return; } if (U->isAlignOf(AllocTy)) { OS << "alignof(" << *AllocTy << ")"; return; } Type *CTy; Constant *FieldNo; if (U->isOffsetOf(CTy, FieldNo)) { OS << "offsetof(" << *CTy << ", "; FieldNo->printAsOperand(OS, false); OS << ")"; return; } // Otherwise just print it normally. U->getValue()->printAsOperand(OS, false); return; } case scCouldNotCompute: OS << "***COULDNOTCOMPUTE***"; return; } llvm_unreachable("Unknown SCEV kind!"); } Type *SCEV::getType() const { switch (static_cast(getSCEVType())) { case scConstant: return cast(this)->getType(); case scTruncate: case scZeroExtend: case scSignExtend: return cast(this)->getType(); case scAddRecExpr: case scMulExpr: case scUMaxExpr: case scSMaxExpr: return cast(this)->getType(); case scAddExpr: return cast(this)->getType(); case scUDivExpr: return cast(this)->getType(); case scUnknown: return cast(this)->getType(); case scCouldNotCompute: llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); } llvm_unreachable("Unknown SCEV kind!"); } bool SCEV::isZero() const { if (const SCEVConstant *SC = dyn_cast(this)) return SC->getValue()->isZero(); return false; } bool SCEV::isOne() const { if (const SCEVConstant *SC = dyn_cast(this)) return SC->getValue()->isOne(); return false; } bool SCEV::isAllOnesValue() const { if (const SCEVConstant *SC = dyn_cast(this)) return SC->getValue()->isMinusOne(); return false; } bool SCEV::isNonConstantNegative() const { const SCEVMulExpr *Mul = dyn_cast(this); if (!Mul) return false; // If there is a constant factor, it will be first. const SCEVConstant *SC = dyn_cast(Mul->getOperand(0)); if (!SC) return false; // Return true if the value is negative, this matches things like (-42 * V). return SC->getAPInt().isNegative(); } SCEVCouldNotCompute::SCEVCouldNotCompute() : SCEV(FoldingSetNodeIDRef(), scCouldNotCompute) {} bool SCEVCouldNotCompute::classof(const SCEV *S) { return S->getSCEVType() == scCouldNotCompute; } const SCEV *ScalarEvolution::getConstant(ConstantInt *V) { FoldingSetNodeID ID; ID.AddInteger(scConstant); ID.AddPointer(V); void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; SCEV *S = new (SCEVAllocator) SCEVConstant(ID.Intern(SCEVAllocator), V); UniqueSCEVs.InsertNode(S, IP); return S; } const SCEV *ScalarEvolution::getConstant(const APInt &Val) { return getConstant(ConstantInt::get(getContext(), Val)); } const SCEV * ScalarEvolution::getConstant(Type *Ty, uint64_t V, bool isSigned) { IntegerType *ITy = cast(getEffectiveSCEVType(Ty)); return getConstant(ConstantInt::get(ITy, V, isSigned)); } SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID, unsigned SCEVTy, const SCEV *op, Type *ty) : SCEV(ID, SCEVTy), Op(op), Ty(ty) {} SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID, const SCEV *op, Type *ty) : SCEVCastExpr(ID, scTruncate, op, ty) { assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot truncate non-integer value!"); } SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID, const SCEV *op, Type *ty) : SCEVCastExpr(ID, scZeroExtend, op, ty) { assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot zero extend non-integer value!"); } SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID, const SCEV *op, Type *ty) : SCEVCastExpr(ID, scSignExtend, op, ty) { assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot sign extend non-integer value!"); } void SCEVUnknown::deleted() { // Clear this SCEVUnknown from various maps. SE->forgetMemoizedResults(this); // Remove this SCEVUnknown from the uniquing map. SE->UniqueSCEVs.RemoveNode(this); // Release the value. setValPtr(nullptr); } void SCEVUnknown::allUsesReplacedWith(Value *New) { // Remove this SCEVUnknown from the uniquing map. SE->UniqueSCEVs.RemoveNode(this); // Update this SCEVUnknown to point to the new value. This is needed // because there may still be outstanding SCEVs which still point to // this SCEVUnknown. setValPtr(New); } bool SCEVUnknown::isSizeOf(Type *&AllocTy) const { if (ConstantExpr *VCE = dyn_cast(getValue())) if (VCE->getOpcode() == Instruction::PtrToInt) if (ConstantExpr *CE = dyn_cast(VCE->getOperand(0))) if (CE->getOpcode() == Instruction::GetElementPtr && CE->getOperand(0)->isNullValue() && CE->getNumOperands() == 2) if (ConstantInt *CI = dyn_cast(CE->getOperand(1))) if (CI->isOne()) { AllocTy = cast(CE->getOperand(0)->getType()) ->getElementType(); return true; } return false; } bool SCEVUnknown::isAlignOf(Type *&AllocTy) const { if (ConstantExpr *VCE = dyn_cast(getValue())) if (VCE->getOpcode() == Instruction::PtrToInt) if (ConstantExpr *CE = dyn_cast(VCE->getOperand(0))) if (CE->getOpcode() == Instruction::GetElementPtr && CE->getOperand(0)->isNullValue()) { Type *Ty = cast(CE->getOperand(0)->getType())->getElementType(); if (StructType *STy = dyn_cast(Ty)) if (!STy->isPacked() && CE->getNumOperands() == 3 && CE->getOperand(1)->isNullValue()) { if (ConstantInt *CI = dyn_cast(CE->getOperand(2))) if (CI->isOne() && STy->getNumElements() == 2 && STy->getElementType(0)->isIntegerTy(1)) { AllocTy = STy->getElementType(1); return true; } } } return false; } bool SCEVUnknown::isOffsetOf(Type *&CTy, Constant *&FieldNo) const { if (ConstantExpr *VCE = dyn_cast(getValue())) if (VCE->getOpcode() == Instruction::PtrToInt) if (ConstantExpr *CE = dyn_cast(VCE->getOperand(0))) if (CE->getOpcode() == Instruction::GetElementPtr && CE->getNumOperands() == 3 && CE->getOperand(0)->isNullValue() && CE->getOperand(1)->isNullValue()) { Type *Ty = cast(CE->getOperand(0)->getType())->getElementType(); // Ignore vector types here so that ScalarEvolutionExpander doesn't // emit getelementptrs that index into vectors. if (Ty->isStructTy() || Ty->isArrayTy()) { CTy = Ty; FieldNo = CE->getOperand(2); return true; } } return false; } //===----------------------------------------------------------------------===// // SCEV Utilities //===----------------------------------------------------------------------===// /// Compare the two values \p LV and \p RV in terms of their "complexity" where /// "complexity" is a partial (and somewhat ad-hoc) relation used to order /// operands in SCEV expressions. \p EqCache is a set of pairs of values that /// have been previously deemed to be "equally complex" by this routine. It is /// intended to avoid exponential time complexity in cases like: /// /// %a = f(%x, %y) /// %b = f(%a, %a) /// %c = f(%b, %b) /// /// %d = f(%x, %y) /// %e = f(%d, %d) /// %f = f(%e, %e) /// /// CompareValueComplexity(%f, %c) /// /// Since we do not continue running this routine on expression trees once we /// have seen unequal values, there is no need to track them in the cache. static int CompareValueComplexity(EquivalenceClasses &EqCacheValue, const LoopInfo *const LI, Value *LV, Value *RV, unsigned Depth) { if (Depth > MaxValueCompareDepth || EqCacheValue.isEquivalent(LV, RV)) return 0; // Order pointer values after integer values. This helps SCEVExpander form // GEPs. bool LIsPointer = LV->getType()->isPointerTy(), RIsPointer = RV->getType()->isPointerTy(); if (LIsPointer != RIsPointer) return (int)LIsPointer - (int)RIsPointer; // Compare getValueID values. unsigned LID = LV->getValueID(), RID = RV->getValueID(); if (LID != RID) return (int)LID - (int)RID; // Sort arguments by their position. if (const auto *LA = dyn_cast(LV)) { const auto *RA = cast(RV); unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo(); return (int)LArgNo - (int)RArgNo; } if (const auto *LGV = dyn_cast(LV)) { const auto *RGV = cast(RV); const auto IsGVNameSemantic = [&](const GlobalValue *GV) { auto LT = GV->getLinkage(); return !(GlobalValue::isPrivateLinkage(LT) || GlobalValue::isInternalLinkage(LT)); }; // Use the names to distinguish the two values, but only if the // names are semantically important. if (IsGVNameSemantic(LGV) && IsGVNameSemantic(RGV)) return LGV->getName().compare(RGV->getName()); } // For instructions, compare their loop depth, and their operand count. This // is pretty loose. if (const auto *LInst = dyn_cast(LV)) { const auto *RInst = cast(RV); // Compare loop depths. const BasicBlock *LParent = LInst->getParent(), *RParent = RInst->getParent(); if (LParent != RParent) { unsigned LDepth = LI->getLoopDepth(LParent), RDepth = LI->getLoopDepth(RParent); if (LDepth != RDepth) return (int)LDepth - (int)RDepth; } // Compare the number of operands. unsigned LNumOps = LInst->getNumOperands(), RNumOps = RInst->getNumOperands(); if (LNumOps != RNumOps) return (int)LNumOps - (int)RNumOps; for (unsigned Idx : seq(0u, LNumOps)) { int Result = CompareValueComplexity(EqCacheValue, LI, LInst->getOperand(Idx), RInst->getOperand(Idx), Depth + 1); if (Result != 0) return Result; } } EqCacheValue.unionSets(LV, RV); return 0; } // Return negative, zero, or positive, if LHS is less than, equal to, or greater // than RHS, respectively. A three-way result allows recursive comparisons to be // more efficient. static int CompareSCEVComplexity( EquivalenceClasses &EqCacheSCEV, EquivalenceClasses &EqCacheValue, const LoopInfo *const LI, const SCEV *LHS, const SCEV *RHS, DominatorTree &DT, unsigned Depth = 0) { // Fast-path: SCEVs are uniqued so we can do a quick equality check. if (LHS == RHS) return 0; // Primarily, sort the SCEVs by their getSCEVType(). unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType(); if (LType != RType) return (int)LType - (int)RType; if (Depth > MaxSCEVCompareDepth || EqCacheSCEV.isEquivalent(LHS, RHS)) return 0; // Aside from the getSCEVType() ordering, the particular ordering // isn't very important except that it's beneficial to be consistent, // so that (a + b) and (b + a) don't end up as different expressions. switch (static_cast(LType)) { case scUnknown: { const SCEVUnknown *LU = cast(LHS); const SCEVUnknown *RU = cast(RHS); int X = CompareValueComplexity(EqCacheValue, LI, LU->getValue(), RU->getValue(), Depth + 1); if (X == 0) EqCacheSCEV.unionSets(LHS, RHS); return X; } case scConstant: { const SCEVConstant *LC = cast(LHS); const SCEVConstant *RC = cast(RHS); // Compare constant values. const APInt &LA = LC->getAPInt(); const APInt &RA = RC->getAPInt(); unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth(); if (LBitWidth != RBitWidth) return (int)LBitWidth - (int)RBitWidth; return LA.ult(RA) ? -1 : 1; } case scAddRecExpr: { const SCEVAddRecExpr *LA = cast(LHS); const SCEVAddRecExpr *RA = cast(RHS); // There is always a dominance between two recs that are used by one SCEV, // so we can safely sort recs by loop header dominance. We require such // order in getAddExpr. const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop(); if (LLoop != RLoop) { const BasicBlock *LHead = LLoop->getHeader(), *RHead = RLoop->getHeader(); assert(LHead != RHead && "Two loops share the same header?"); if (DT.dominates(LHead, RHead)) return 1; else assert(DT.dominates(RHead, LHead) && "No dominance between recurrences used by one SCEV?"); return -1; } // Addrec complexity grows with operand count. unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands(); if (LNumOps != RNumOps) return (int)LNumOps - (int)RNumOps; // Compare NoWrap flags. if (LA->getNoWrapFlags() != RA->getNoWrapFlags()) return (int)LA->getNoWrapFlags() - (int)RA->getNoWrapFlags(); // Lexicographically compare. for (unsigned i = 0; i != LNumOps; ++i) { int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LA->getOperand(i), RA->getOperand(i), DT, Depth + 1); if (X != 0) return X; } EqCacheSCEV.unionSets(LHS, RHS); return 0; } case scAddExpr: case scMulExpr: case scSMaxExpr: case scUMaxExpr: { const SCEVNAryExpr *LC = cast(LHS); const SCEVNAryExpr *RC = cast(RHS); // Lexicographically compare n-ary expressions. unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands(); if (LNumOps != RNumOps) return (int)LNumOps - (int)RNumOps; // Compare NoWrap flags. if (LC->getNoWrapFlags() != RC->getNoWrapFlags()) return (int)LC->getNoWrapFlags() - (int)RC->getNoWrapFlags(); for (unsigned i = 0; i != LNumOps; ++i) { int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LC->getOperand(i), RC->getOperand(i), DT, Depth + 1); if (X != 0) return X; } EqCacheSCEV.unionSets(LHS, RHS); return 0; } case scUDivExpr: { const SCEVUDivExpr *LC = cast(LHS); const SCEVUDivExpr *RC = cast(RHS); // Lexicographically compare udiv expressions. int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LC->getLHS(), RC->getLHS(), DT, Depth + 1); if (X != 0) return X; X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LC->getRHS(), RC->getRHS(), DT, Depth + 1); if (X == 0) EqCacheSCEV.unionSets(LHS, RHS); return X; } case scTruncate: case scZeroExtend: case scSignExtend: { const SCEVCastExpr *LC = cast(LHS); const SCEVCastExpr *RC = cast(RHS); // Compare cast expressions by operand. int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LC->getOperand(), RC->getOperand(), DT, Depth + 1); if (X == 0) EqCacheSCEV.unionSets(LHS, RHS); return X; } case scCouldNotCompute: llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); } llvm_unreachable("Unknown SCEV kind!"); } /// Given a list of SCEV objects, order them by their complexity, and group /// objects of the same complexity together by value. When this routine is /// finished, we know that any duplicates in the vector are consecutive and that /// complexity is monotonically increasing. /// /// Note that we go take special precautions to ensure that we get deterministic /// results from this routine. In other words, we don't want the results of /// this to depend on where the addresses of various SCEV objects happened to /// land in memory. static void GroupByComplexity(SmallVectorImpl &Ops, LoopInfo *LI, DominatorTree &DT) { if (Ops.size() < 2) return; // Noop EquivalenceClasses EqCacheSCEV; EquivalenceClasses EqCacheValue; if (Ops.size() == 2) { // This is the common case, which also happens to be trivially simple. // Special case it. const SCEV *&LHS = Ops[0], *&RHS = Ops[1]; if (CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, RHS, LHS, DT) < 0) std::swap(LHS, RHS); return; } // Do the rough sort by complexity. std::stable_sort(Ops.begin(), Ops.end(), [&](const SCEV *LHS, const SCEV *RHS) { return CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LHS, RHS, DT) < 0; }); // Now that we are sorted by complexity, group elements of the same // complexity. Note that this is, at worst, N^2, but the vector is likely to // be extremely short in practice. Note that we take this approach because we // do not want to depend on the addresses of the objects we are grouping. for (unsigned i = 0, e = Ops.size(); i != e-2; ++i) { const SCEV *S = Ops[i]; unsigned Complexity = S->getSCEVType(); // If there are any objects of the same complexity and same value as this // one, group them. for (unsigned j = i+1; j != e && Ops[j]->getSCEVType() == Complexity; ++j) { if (Ops[j] == S) { // Found a duplicate. // Move it to immediately after i'th element. std::swap(Ops[i+1], Ops[j]); ++i; // no need to rescan it. if (i == e-2) return; // Done! } } } } // Returns the size of the SCEV S. static inline int sizeOfSCEV(const SCEV *S) { struct FindSCEVSize { int Size = 0; FindSCEVSize() = default; bool follow(const SCEV *S) { ++Size; // Keep looking at all operands of S. return true; } bool isDone() const { return false; } }; FindSCEVSize F; SCEVTraversal ST(F); ST.visitAll(S); return F.Size; } namespace { struct SCEVDivision : public SCEVVisitor { public: // Computes the Quotient and Remainder of the division of Numerator by // Denominator. static void divide(ScalarEvolution &SE, const SCEV *Numerator, const SCEV *Denominator, const SCEV **Quotient, const SCEV **Remainder) { assert(Numerator && Denominator && "Uninitialized SCEV"); SCEVDivision D(SE, Numerator, Denominator); // Check for the trivial case here to avoid having to check for it in the // rest of the code. if (Numerator == Denominator) { *Quotient = D.One; *Remainder = D.Zero; return; } if (Numerator->isZero()) { *Quotient = D.Zero; *Remainder = D.Zero; return; } // A simple case when N/1. The quotient is N. if (Denominator->isOne()) { *Quotient = Numerator; *Remainder = D.Zero; return; } // Split the Denominator when it is a product. if (const SCEVMulExpr *T = dyn_cast(Denominator)) { const SCEV *Q, *R; *Quotient = Numerator; for (const SCEV *Op : T->operands()) { divide(SE, *Quotient, Op, &Q, &R); *Quotient = Q; // Bail out when the Numerator is not divisible by one of the terms of // the Denominator. if (!R->isZero()) { *Quotient = D.Zero; *Remainder = Numerator; return; } } *Remainder = D.Zero; return; } D.visit(Numerator); *Quotient = D.Quotient; *Remainder = D.Remainder; } // Except in the trivial case described above, we do not know how to divide // Expr by Denominator for the following functions with empty implementation. void visitTruncateExpr(const SCEVTruncateExpr *Numerator) {} void visitZeroExtendExpr(const SCEVZeroExtendExpr *Numerator) {} void visitSignExtendExpr(const SCEVSignExtendExpr *Numerator) {} void visitUDivExpr(const SCEVUDivExpr *Numerator) {} void visitSMaxExpr(const SCEVSMaxExpr *Numerator) {} void visitUMaxExpr(const SCEVUMaxExpr *Numerator) {} void visitUnknown(const SCEVUnknown *Numerator) {} void visitCouldNotCompute(const SCEVCouldNotCompute *Numerator) {} void visitConstant(const SCEVConstant *Numerator) { if (const SCEVConstant *D = dyn_cast(Denominator)) { APInt NumeratorVal = Numerator->getAPInt(); APInt DenominatorVal = D->getAPInt(); uint32_t NumeratorBW = NumeratorVal.getBitWidth(); uint32_t DenominatorBW = DenominatorVal.getBitWidth(); if (NumeratorBW > DenominatorBW) DenominatorVal = DenominatorVal.sext(NumeratorBW); else if (NumeratorBW < DenominatorBW) NumeratorVal = NumeratorVal.sext(DenominatorBW); APInt QuotientVal(NumeratorVal.getBitWidth(), 0); APInt RemainderVal(NumeratorVal.getBitWidth(), 0); APInt::sdivrem(NumeratorVal, DenominatorVal, QuotientVal, RemainderVal); Quotient = SE.getConstant(QuotientVal); Remainder = SE.getConstant(RemainderVal); return; } } void visitAddRecExpr(const SCEVAddRecExpr *Numerator) { const SCEV *StartQ, *StartR, *StepQ, *StepR; if (!Numerator->isAffine()) return cannotDivide(Numerator); divide(SE, Numerator->getStart(), Denominator, &StartQ, &StartR); divide(SE, Numerator->getStepRecurrence(SE), Denominator, &StepQ, &StepR); // Bail out if the types do not match. Type *Ty = Denominator->getType(); if (Ty != StartQ->getType() || Ty != StartR->getType() || Ty != StepQ->getType() || Ty != StepR->getType()) return cannotDivide(Numerator); Quotient = SE.getAddRecExpr(StartQ, StepQ, Numerator->getLoop(), Numerator->getNoWrapFlags()); Remainder = SE.getAddRecExpr(StartR, StepR, Numerator->getLoop(), Numerator->getNoWrapFlags()); } void visitAddExpr(const SCEVAddExpr *Numerator) { SmallVector Qs, Rs; Type *Ty = Denominator->getType(); for (const SCEV *Op : Numerator->operands()) { const SCEV *Q, *R; divide(SE, Op, Denominator, &Q, &R); // Bail out if types do not match. if (Ty != Q->getType() || Ty != R->getType()) return cannotDivide(Numerator); Qs.push_back(Q); Rs.push_back(R); } if (Qs.size() == 1) { Quotient = Qs[0]; Remainder = Rs[0]; return; } Quotient = SE.getAddExpr(Qs); Remainder = SE.getAddExpr(Rs); } void visitMulExpr(const SCEVMulExpr *Numerator) { SmallVector Qs; Type *Ty = Denominator->getType(); bool FoundDenominatorTerm = false; for (const SCEV *Op : Numerator->operands()) { // Bail out if types do not match. if (Ty != Op->getType()) return cannotDivide(Numerator); if (FoundDenominatorTerm) { Qs.push_back(Op); continue; } // Check whether Denominator divides one of the product operands. const SCEV *Q, *R; divide(SE, Op, Denominator, &Q, &R); if (!R->isZero()) { Qs.push_back(Op); continue; } // Bail out if types do not match. if (Ty != Q->getType()) return cannotDivide(Numerator); FoundDenominatorTerm = true; Qs.push_back(Q); } if (FoundDenominatorTerm) { Remainder = Zero; if (Qs.size() == 1) Quotient = Qs[0]; else Quotient = SE.getMulExpr(Qs); return; } if (!isa(Denominator)) return cannotDivide(Numerator); // The Remainder is obtained by replacing Denominator by 0 in Numerator. ValueToValueMap RewriteMap; RewriteMap[cast(Denominator)->getValue()] = cast(Zero)->getValue(); Remainder = SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true); if (Remainder->isZero()) { // The Quotient is obtained by replacing Denominator by 1 in Numerator. RewriteMap[cast(Denominator)->getValue()] = cast(One)->getValue(); Quotient = SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true); return; } // Quotient is (Numerator - Remainder) divided by Denominator. const SCEV *Q, *R; const SCEV *Diff = SE.getMinusSCEV(Numerator, Remainder); // This SCEV does not seem to simplify: fail the division here. if (sizeOfSCEV(Diff) > sizeOfSCEV(Numerator)) return cannotDivide(Numerator); divide(SE, Diff, Denominator, &Q, &R); if (R != Zero) return cannotDivide(Numerator); Quotient = Q; } private: SCEVDivision(ScalarEvolution &S, const SCEV *Numerator, const SCEV *Denominator) : SE(S), Denominator(Denominator) { Zero = SE.getZero(Denominator->getType()); One = SE.getOne(Denominator->getType()); // We generally do not know how to divide Expr by Denominator. We // initialize the division to a "cannot divide" state to simplify the rest // of the code. cannotDivide(Numerator); } // Convenience function for giving up on the division. We set the quotient to // be equal to zero and the remainder to be equal to the numerator. void cannotDivide(const SCEV *Numerator) { Quotient = Zero; Remainder = Numerator; } ScalarEvolution &SE; const SCEV *Denominator, *Quotient, *Remainder, *Zero, *One; }; } // end anonymous namespace //===----------------------------------------------------------------------===// // Simple SCEV method implementations //===----------------------------------------------------------------------===// /// Compute BC(It, K). The result has width W. Assume, K > 0. static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K, ScalarEvolution &SE, Type *ResultTy) { // Handle the simplest case efficiently. if (K == 1) return SE.getTruncateOrZeroExtend(It, ResultTy); // We are using the following formula for BC(It, K): // // BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / K! // // Suppose, W is the bitwidth of the return value. We must be prepared for // overflow. Hence, we must assure that the result of our computation is // equal to the accurate one modulo 2^W. Unfortunately, division isn't // safe in modular arithmetic. // // However, this code doesn't use exactly that formula; the formula it uses // is something like the following, where T is the number of factors of 2 in // K! (i.e. trailing zeros in the binary representation of K!), and ^ is // exponentiation: // // BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / 2^T / (K! / 2^T) // // This formula is trivially equivalent to the previous formula. However, // this formula can be implemented much more efficiently. The trick is that // K! / 2^T is odd, and exact division by an odd number *is* safe in modular // arithmetic. To do exact division in modular arithmetic, all we have // to do is multiply by the inverse. Therefore, this step can be done at // width W. // // The next issue is how to safely do the division by 2^T. The way this // is done is by doing the multiplication step at a width of at least W + T // bits. This way, the bottom W+T bits of the product are accurate. Then, // when we perform the division by 2^T (which is equivalent to a right shift // by T), the bottom W bits are accurate. Extra bits are okay; they'll get // truncated out after the division by 2^T. // // In comparison to just directly using the first formula, this technique // is much more efficient; using the first formula requires W * K bits, // but this formula less than W + K bits. Also, the first formula requires // a division step, whereas this formula only requires multiplies and shifts. // // It doesn't matter whether the subtraction step is done in the calculation // width or the input iteration count's width; if the subtraction overflows, // the result must be zero anyway. We prefer here to do it in the width of // the induction variable because it helps a lot for certain cases; CodeGen // isn't smart enough to ignore the overflow, which leads to much less // efficient code if the width of the subtraction is wider than the native // register width. // // (It's possible to not widen at all by pulling out factors of 2 before // the multiplication; for example, K=2 can be calculated as // It/2*(It+(It*INT_MIN/INT_MIN)+-1). However, it requires // extra arithmetic, so it's not an obvious win, and it gets // much more complicated for K > 3.) // Protection from insane SCEVs; this bound is conservative, // but it probably doesn't matter. if (K > 1000) return SE.getCouldNotCompute(); unsigned W = SE.getTypeSizeInBits(ResultTy); // Calculate K! / 2^T and T; we divide out the factors of two before // multiplying for calculating K! / 2^T to avoid overflow. // Other overflow doesn't matter because we only care about the bottom // W bits of the result. APInt OddFactorial(W, 1); unsigned T = 1; for (unsigned i = 3; i <= K; ++i) { APInt Mult(W, i); unsigned TwoFactors = Mult.countTrailingZeros(); T += TwoFactors; Mult.lshrInPlace(TwoFactors); OddFactorial *= Mult; } // We need at least W + T bits for the multiplication step unsigned CalculationBits = W + T; // Calculate 2^T, at width T+W. APInt DivFactor = APInt::getOneBitSet(CalculationBits, T); // Calculate the multiplicative inverse of K! / 2^T; // this multiplication factor will perform the exact division by // K! / 2^T. APInt Mod = APInt::getSignedMinValue(W+1); APInt MultiplyFactor = OddFactorial.zext(W+1); MultiplyFactor = MultiplyFactor.multiplicativeInverse(Mod); MultiplyFactor = MultiplyFactor.trunc(W); // Calculate the product, at width T+W IntegerType *CalculationTy = IntegerType::get(SE.getContext(), CalculationBits); const SCEV *Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy); for (unsigned i = 1; i != K; ++i) { const SCEV *S = SE.getMinusSCEV(It, SE.getConstant(It->getType(), i)); Dividend = SE.getMulExpr(Dividend, SE.getTruncateOrZeroExtend(S, CalculationTy)); } // Divide by 2^T const SCEV *DivResult = SE.getUDivExpr(Dividend, SE.getConstant(DivFactor)); // Truncate the result, and divide by K! / 2^T. return SE.getMulExpr(SE.getConstant(MultiplyFactor), SE.getTruncateOrZeroExtend(DivResult, ResultTy)); } /// Return the value of this chain of recurrences at the specified iteration /// number. We can evaluate this recurrence by multiplying each element in the /// chain by the binomial coefficient corresponding to it. In other words, we /// can evaluate {A,+,B,+,C,+,D} as: /// /// A*BC(It, 0) + B*BC(It, 1) + C*BC(It, 2) + D*BC(It, 3) /// /// where BC(It, k) stands for binomial coefficient. const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const { const SCEV *Result = getStart(); for (unsigned i = 1, e = getNumOperands(); i != e; ++i) { // The computation is correct in the face of overflow provided that the // multiplication is performed _after_ the evaluation of the binomial // coefficient. const SCEV *Coeff = BinomialCoefficient(It, i, SE, getType()); if (isa(Coeff)) return Coeff; Result = SE.getAddExpr(Result, SE.getMulExpr(getOperand(i), Coeff)); } return Result; } //===----------------------------------------------------------------------===// // SCEV Expression folder implementations //===----------------------------------------------------------------------===// const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, Type *Ty) { assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) && "This is not a truncating conversion!"); assert(isSCEVable(Ty) && "This is not a conversion to a SCEVable type!"); Ty = getEffectiveSCEVType(Ty); FoldingSetNodeID ID; ID.AddInteger(scTruncate); ID.AddPointer(Op); ID.AddPointer(Ty); void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; // Fold if the operand is constant. if (const SCEVConstant *SC = dyn_cast(Op)) return getConstant( cast(ConstantExpr::getTrunc(SC->getValue(), Ty))); // trunc(trunc(x)) --> trunc(x) if (const SCEVTruncateExpr *ST = dyn_cast(Op)) return getTruncateExpr(ST->getOperand(), Ty); // trunc(sext(x)) --> sext(x) if widening or trunc(x) if narrowing if (const SCEVSignExtendExpr *SS = dyn_cast(Op)) return getTruncateOrSignExtend(SS->getOperand(), Ty); // trunc(zext(x)) --> zext(x) if widening or trunc(x) if narrowing if (const SCEVZeroExtendExpr *SZ = dyn_cast(Op)) return getTruncateOrZeroExtend(SZ->getOperand(), Ty); // trunc(x1+x2+...+xN) --> trunc(x1)+trunc(x2)+...+trunc(xN) if we can // eliminate all the truncates, or we replace other casts with truncates. if (const SCEVAddExpr *SA = dyn_cast(Op)) { SmallVector Operands; bool hasTrunc = false; for (unsigned i = 0, e = SA->getNumOperands(); i != e && !hasTrunc; ++i) { const SCEV *S = getTruncateExpr(SA->getOperand(i), Ty); if (!isa(SA->getOperand(i))) hasTrunc = isa(S); Operands.push_back(S); } if (!hasTrunc) return getAddExpr(Operands); // In spite we checked in the beginning that ID is not in the cache, // it is possible that during recursion and different modification // ID came to cache, so if we found it, just return it. if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; } // trunc(x1*x2*...*xN) --> trunc(x1)*trunc(x2)*...*trunc(xN) if we can // eliminate all the truncates, or we replace other casts with truncates. if (const SCEVMulExpr *SM = dyn_cast(Op)) { SmallVector Operands; bool hasTrunc = false; for (unsigned i = 0, e = SM->getNumOperands(); i != e && !hasTrunc; ++i) { const SCEV *S = getTruncateExpr(SM->getOperand(i), Ty); if (!isa(SM->getOperand(i))) hasTrunc = isa(S); Operands.push_back(S); } if (!hasTrunc) return getMulExpr(Operands); // In spite we checked in the beginning that ID is not in the cache, // it is possible that during recursion and different modification // ID came to cache, so if we found it, just return it. if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; } // If the input value is a chrec scev, truncate the chrec's operands. if (const SCEVAddRecExpr *AddRec = dyn_cast(Op)) { SmallVector Operands; for (const SCEV *Op : AddRec->operands()) Operands.push_back(getTruncateExpr(Op, Ty)); return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap); } // The cast wasn't folded; create an explicit cast node. We can reuse // the existing insert position since if we get here, we won't have // made any changes which would invalidate it. SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator), Op, Ty); UniqueSCEVs.InsertNode(S, IP); addToLoopUseLists(S); return S; } // Get the limit of a recurrence such that incrementing by Step cannot cause // signed overflow as long as the value of the recurrence within the // loop does not exceed this limit before incrementing. static const SCEV *getSignedOverflowLimitForStep(const SCEV *Step, ICmpInst::Predicate *Pred, ScalarEvolution *SE) { unsigned BitWidth = SE->getTypeSizeInBits(Step->getType()); if (SE->isKnownPositive(Step)) { *Pred = ICmpInst::ICMP_SLT; return SE->getConstant(APInt::getSignedMinValue(BitWidth) - SE->getSignedRangeMax(Step)); } if (SE->isKnownNegative(Step)) { *Pred = ICmpInst::ICMP_SGT; return SE->getConstant(APInt::getSignedMaxValue(BitWidth) - SE->getSignedRangeMin(Step)); } return nullptr; } // Get the limit of a recurrence such that incrementing by Step cannot cause // unsigned overflow as long as the value of the recurrence within the loop does // not exceed this limit before incrementing. static const SCEV *getUnsignedOverflowLimitForStep(const SCEV *Step, ICmpInst::Predicate *Pred, ScalarEvolution *SE) { unsigned BitWidth = SE->getTypeSizeInBits(Step->getType()); *Pred = ICmpInst::ICMP_ULT; return SE->getConstant(APInt::getMinValue(BitWidth) - SE->getUnsignedRangeMax(Step)); } namespace { struct ExtendOpTraitsBase { typedef const SCEV *(ScalarEvolution::*GetExtendExprTy)(const SCEV *, Type *, unsigned); }; // Used to make code generic over signed and unsigned overflow. template struct ExtendOpTraits { // Members present: // // static const SCEV::NoWrapFlags WrapType; // // static const ExtendOpTraitsBase::GetExtendExprTy GetExtendExpr; // // static const SCEV *getOverflowLimitForStep(const SCEV *Step, // ICmpInst::Predicate *Pred, // ScalarEvolution *SE); }; template <> struct ExtendOpTraits : public ExtendOpTraitsBase { static const SCEV::NoWrapFlags WrapType = SCEV::FlagNSW; static const GetExtendExprTy GetExtendExpr; static const SCEV *getOverflowLimitForStep(const SCEV *Step, ICmpInst::Predicate *Pred, ScalarEvolution *SE) { return getSignedOverflowLimitForStep(Step, Pred, SE); } }; const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits< SCEVSignExtendExpr>::GetExtendExpr = &ScalarEvolution::getSignExtendExpr; template <> struct ExtendOpTraits : public ExtendOpTraitsBase { static const SCEV::NoWrapFlags WrapType = SCEV::FlagNUW; static const GetExtendExprTy GetExtendExpr; static const SCEV *getOverflowLimitForStep(const SCEV *Step, ICmpInst::Predicate *Pred, ScalarEvolution *SE) { return getUnsignedOverflowLimitForStep(Step, Pred, SE); } }; const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits< SCEVZeroExtendExpr>::GetExtendExpr = &ScalarEvolution::getZeroExtendExpr; } // end anonymous namespace // The recurrence AR has been shown to have no signed/unsigned wrap or something // close to it. Typically, if we can prove NSW/NUW for AR, then we can just as // easily prove NSW/NUW for its preincrement or postincrement sibling. This // allows normalizing a sign/zero extended AddRec as such: {sext/zext(Step + // Start),+,Step} => {(Step + sext/zext(Start),+,Step} As a result, the // expression "Step + sext/zext(PreIncAR)" is congruent with // "sext/zext(PostIncAR)" template static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty, ScalarEvolution *SE, unsigned Depth) { auto WrapType = ExtendOpTraits::WrapType; auto GetExtendExpr = ExtendOpTraits::GetExtendExpr; const Loop *L = AR->getLoop(); const SCEV *Start = AR->getStart(); const SCEV *Step = AR->getStepRecurrence(*SE); // Check for a simple looking step prior to loop entry. const SCEVAddExpr *SA = dyn_cast(Start); if (!SA) return nullptr; // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV // subtraction is expensive. For this purpose, perform a quick and dirty // difference, by checking for Step in the operand list. SmallVector DiffOps; for (const SCEV *Op : SA->operands()) if (Op != Step) DiffOps.push_back(Op); if (DiffOps.size() == SA->getNumOperands()) return nullptr; // Try to prove `WrapType` (SCEV::FlagNSW or SCEV::FlagNUW) on `PreStart` + // `Step`: // 1. NSW/NUW flags on the step increment. auto PreStartFlags = ScalarEvolution::maskFlags(SA->getNoWrapFlags(), SCEV::FlagNUW); const SCEV *PreStart = SE->getAddExpr(DiffOps, PreStartFlags); const SCEVAddRecExpr *PreAR = dyn_cast( SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap)); // "{S,+,X} is /" and "the backedge is taken at least once" implies // "S+X does not sign/unsign-overflow". // const SCEV *BECount = SE->getBackedgeTakenCount(L); if (PreAR && PreAR->getNoWrapFlags(WrapType) && !isa(BECount) && SE->isKnownPositive(BECount)) return PreStart; // 2. Direct overflow check on the step operation's expression. unsigned BitWidth = SE->getTypeSizeInBits(AR->getType()); Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2); const SCEV *OperandExtendedStart = SE->getAddExpr((SE->*GetExtendExpr)(PreStart, WideTy, Depth), (SE->*GetExtendExpr)(Step, WideTy, Depth)); if ((SE->*GetExtendExpr)(Start, WideTy, Depth) == OperandExtendedStart) { if (PreAR && AR->getNoWrapFlags(WrapType)) { // If we know `AR` == {`PreStart`+`Step`,+,`Step`} is `WrapType` (FlagNSW // or FlagNUW) and that `PreStart` + `Step` is `WrapType` too, then // `PreAR` == {`PreStart`,+,`Step`} is also `WrapType`. Cache this fact. const_cast(PreAR)->setNoWrapFlags(WrapType); } return PreStart; } // 3. Loop precondition. ICmpInst::Predicate Pred; const SCEV *OverflowLimit = ExtendOpTraits::getOverflowLimitForStep(Step, &Pred, SE); if (OverflowLimit && SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) return PreStart; return nullptr; } // Get the normalized zero or sign extended expression for this AddRec's Start. template static const SCEV *getExtendAddRecStart(const SCEVAddRecExpr *AR, Type *Ty, ScalarEvolution *SE, unsigned Depth) { auto GetExtendExpr = ExtendOpTraits::GetExtendExpr; const SCEV *PreStart = getPreStartForExtend(AR, Ty, SE, Depth); if (!PreStart) return (SE->*GetExtendExpr)(AR->getStart(), Ty, Depth); return SE->getAddExpr((SE->*GetExtendExpr)(AR->getStepRecurrence(*SE), Ty, Depth), (SE->*GetExtendExpr)(PreStart, Ty, Depth)); } // Try to prove away overflow by looking at "nearby" add recurrences. A // motivating example for this rule: if we know `{0,+,4}` is `ult` `-1` and it // does not itself wrap then we can conclude that `{1,+,4}` is `nuw`. // // Formally: // // {S,+,X} == {S-T,+,X} + T // => Ext({S,+,X}) == Ext({S-T,+,X} + T) // // If ({S-T,+,X} + T) does not overflow ... (1) // // RHS == Ext({S-T,+,X} + T) == Ext({S-T,+,X}) + Ext(T) // // If {S-T,+,X} does not overflow ... (2) // // RHS == Ext({S-T,+,X}) + Ext(T) == {Ext(S-T),+,Ext(X)} + Ext(T) // == {Ext(S-T)+Ext(T),+,Ext(X)} // // If (S-T)+T does not overflow ... (3) // // RHS == {Ext(S-T)+Ext(T),+,Ext(X)} == {Ext(S-T+T),+,Ext(X)} // == {Ext(S),+,Ext(X)} == LHS // // Thus, if (1), (2) and (3) are true for some T, then // Ext({S,+,X}) == {Ext(S),+,Ext(X)} // // (3) is implied by (1) -- "(S-T)+T does not overflow" is simply "({S-T,+,X}+T) // does not overflow" restricted to the 0th iteration. Therefore we only need // to check for (1) and (2). // // In the current context, S is `Start`, X is `Step`, Ext is `ExtendOpTy` and T // is `Delta` (defined below). template bool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start, const SCEV *Step, const Loop *L) { auto WrapType = ExtendOpTraits::WrapType; // We restrict `Start` to a constant to prevent SCEV from spending too much // time here. It is correct (but more expensive) to continue with a // non-constant `Start` and do a general SCEV subtraction to compute // `PreStart` below. const SCEVConstant *StartC = dyn_cast(Start); if (!StartC) return false; APInt StartAI = StartC->getAPInt(); for (unsigned Delta : {-2, -1, 1, 2}) { const SCEV *PreStart = getConstant(StartAI - Delta); FoldingSetNodeID ID; ID.AddInteger(scAddRecExpr); ID.AddPointer(PreStart); ID.AddPointer(Step); ID.AddPointer(L); void *IP = nullptr; const auto *PreAR = static_cast(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); // Give up if we don't already have the add recurrence we need because // actually constructing an add recurrence is relatively expensive. if (PreAR && PreAR->getNoWrapFlags(WrapType)) { // proves (2) const SCEV *DeltaS = getConstant(StartC->getType(), Delta); ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; const SCEV *Limit = ExtendOpTraits::getOverflowLimitForStep( DeltaS, &Pred, this); if (Limit && isKnownPredicate(Pred, PreAR, Limit)) // proves (1) return true; } } return false; } const SCEV * ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && "This is not an extending conversion!"); assert(isSCEVable(Ty) && "This is not a conversion to a SCEVable type!"); Ty = getEffectiveSCEVType(Ty); // Fold if the operand is constant. if (const SCEVConstant *SC = dyn_cast(Op)) return getConstant( cast(ConstantExpr::getZExt(SC->getValue(), Ty))); // zext(zext(x)) --> zext(x) if (const SCEVZeroExtendExpr *SZ = dyn_cast(Op)) return getZeroExtendExpr(SZ->getOperand(), Ty, Depth + 1); // Before doing any expensive analysis, check to see if we've already // computed a SCEV for this Op and Ty. FoldingSetNodeID ID; ID.AddInteger(scZeroExtend); ID.AddPointer(Op); ID.AddPointer(Ty); void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; if (Depth > MaxExtDepth) { SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator), Op, Ty); UniqueSCEVs.InsertNode(S, IP); addToLoopUseLists(S); return S; } // zext(trunc(x)) --> zext(x) or x or trunc(x) if (const SCEVTruncateExpr *ST = dyn_cast(Op)) { // It's possible the bits taken off by the truncate were all zero bits. If // so, we should be able to simplify this further. const SCEV *X = ST->getOperand(); ConstantRange CR = getUnsignedRange(X); unsigned TruncBits = getTypeSizeInBits(ST->getType()); unsigned NewBits = getTypeSizeInBits(Ty); if (CR.truncate(TruncBits).zeroExtend(NewBits).contains( CR.zextOrTrunc(NewBits))) return getTruncateOrZeroExtend(X, Ty); } // If the input value is a chrec scev, and we can prove that the value // did not overflow the old, smaller, value, we can zero extend all of the // operands (often constants). This allows analysis of something like // this: for (unsigned char X = 0; X < 100; ++X) { int Y = X; } if (const SCEVAddRecExpr *AR = dyn_cast(Op)) if (AR->isAffine()) { const SCEV *Start = AR->getStart(); const SCEV *Step = AR->getStepRecurrence(*this); unsigned BitWidth = getTypeSizeInBits(AR->getType()); const Loop *L = AR->getLoop(); if (!AR->hasNoUnsignedWrap()) { auto NewFlags = proveNoWrapViaConstantRanges(AR); const_cast(AR)->setNoWrapFlags(NewFlags); } // If we have special knowledge that this addrec won't overflow, // we don't need to do any further analysis. if (AR->hasNoUnsignedWrap()) return getAddRecExpr( getExtendAddRecStart(AR, Ty, this, Depth + 1), getZeroExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags()); // Check whether the backedge-taken count is SCEVCouldNotCompute. // Note that this serves two purposes: It filters out loops that are // simply not analyzable, and it covers the case where this code is // being called from within backedge-taken count analysis, such that // attempting to ask for the backedge-taken count would likely result // in infinite recursion. In the later case, the analysis code will // cope with a conservative value, and it will take care to purge // that value once it has finished. const SCEV *MaxBECount = getMaxBackedgeTakenCount(L); if (!isa(MaxBECount)) { // Manually compute the final value for AR, checking for // overflow. // Check whether the backedge-taken count can be losslessly casted to // the addrec's type. The count is always unsigned. const SCEV *CastedMaxBECount = getTruncateOrZeroExtend(MaxBECount, Start->getType()); const SCEV *RecastedMaxBECount = getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType()); if (MaxBECount == RecastedMaxBECount) { Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); // Check whether Start+Step*MaxBECount has no unsigned overflow. const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step, SCEV::FlagAnyWrap, Depth + 1); const SCEV *ZAdd = getZeroExtendExpr(getAddExpr(Start, ZMul, SCEV::FlagAnyWrap, Depth + 1), WideTy, Depth + 1); const SCEV *WideStart = getZeroExtendExpr(Start, WideTy, Depth + 1); const SCEV *WideMaxBECount = getZeroExtendExpr(CastedMaxBECount, WideTy, Depth + 1); const SCEV *OperandExtendedAdd = getAddExpr(WideStart, getMulExpr(WideMaxBECount, getZeroExtendExpr(Step, WideTy, Depth + 1), SCEV::FlagAnyWrap, Depth + 1), SCEV::FlagAnyWrap, Depth + 1); if (ZAdd == OperandExtendedAdd) { // Cache knowledge of AR NUW, which is propagated to this AddRec. const_cast(AR)->setNoWrapFlags(SCEV::FlagNUW); // Return the expression with the addrec on the outside. return getAddRecExpr( getExtendAddRecStart(AR, Ty, this, Depth + 1), getZeroExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags()); } // Similar to above, only this time treat the step value as signed. // This covers loops that count down. OperandExtendedAdd = getAddExpr(WideStart, getMulExpr(WideMaxBECount, getSignExtendExpr(Step, WideTy, Depth + 1), SCEV::FlagAnyWrap, Depth + 1), SCEV::FlagAnyWrap, Depth + 1); if (ZAdd == OperandExtendedAdd) { // Cache knowledge of AR NW, which is propagated to this AddRec. // Negative step causes unsigned wrap, but it still can't self-wrap. const_cast(AR)->setNoWrapFlags(SCEV::FlagNW); // Return the expression with the addrec on the outside. return getAddRecExpr( getExtendAddRecStart(AR, Ty, this, Depth + 1), getSignExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags()); } } } // Normally, in the cases we can prove no-overflow via a // backedge guarding condition, we can also compute a backedge // taken count for the loop. The exceptions are assumptions and // guards present in the loop -- SCEV is not great at exploiting // these to compute max backedge taken counts, but can still use // these to prove lack of overflow. Use this fact to avoid // doing extra work that may not pay off. if (!isa(MaxBECount) || HasGuards || !AC.assumptions().empty()) { // If the backedge is guarded by a comparison with the pre-inc // value the addrec is safe. Also, if the entry is guarded by // a comparison with the start value and the backedge is // guarded by a comparison with the post-inc value, the addrec // is safe. if (isKnownPositive(Step)) { const SCEV *N = getConstant(APInt::getMinValue(BitWidth) - getUnsignedRangeMax(Step)); if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) || (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_ULT, Start, N) && isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR->getPostIncExpr(*this), N))) { // Cache knowledge of AR NUW, which is propagated to this // AddRec. const_cast(AR)->setNoWrapFlags(SCEV::FlagNUW); // Return the expression with the addrec on the outside. return getAddRecExpr( getExtendAddRecStart(AR, Ty, this, Depth + 1), getZeroExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags()); } } else if (isKnownNegative(Step)) { const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) - getSignedRangeMin(Step)); if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) || (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_UGT, Start, N) && isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR->getPostIncExpr(*this), N))) { // Cache knowledge of AR NW, which is propagated to this // AddRec. Negative step causes unsigned wrap, but it // still can't self-wrap. const_cast(AR)->setNoWrapFlags(SCEV::FlagNW); // Return the expression with the addrec on the outside. return getAddRecExpr( getExtendAddRecStart(AR, Ty, this, Depth + 1), getSignExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags()); } } } if (proveNoWrapByVaryingStart(Start, Step, L)) { const_cast(AR)->setNoWrapFlags(SCEV::FlagNUW); return getAddRecExpr( getExtendAddRecStart(AR, Ty, this, Depth + 1), getZeroExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags()); } } if (auto *SA = dyn_cast(Op)) { // zext((A + B + ...)) --> (zext(A) + zext(B) + ...) if (SA->hasNoUnsignedWrap()) { // If the addition does not unsign overflow then we can, by definition, // commute the zero extension with the addition operation. SmallVector Ops; for (const auto *Op : SA->operands()) Ops.push_back(getZeroExtendExpr(Op, Ty, Depth + 1)); return getAddExpr(Ops, SCEV::FlagNUW, Depth + 1); } } // The cast wasn't folded; create an explicit cast node. // Recompute the insert position, as it may have been invalidated. if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator), Op, Ty); UniqueSCEVs.InsertNode(S, IP); addToLoopUseLists(S); return S; } const SCEV * ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && "This is not an extending conversion!"); assert(isSCEVable(Ty) && "This is not a conversion to a SCEVable type!"); Ty = getEffectiveSCEVType(Ty); // Fold if the operand is constant. if (const SCEVConstant *SC = dyn_cast(Op)) return getConstant( cast(ConstantExpr::getSExt(SC->getValue(), Ty))); // sext(sext(x)) --> sext(x) if (const SCEVSignExtendExpr *SS = dyn_cast(Op)) return getSignExtendExpr(SS->getOperand(), Ty, Depth + 1); // sext(zext(x)) --> zext(x) if (const SCEVZeroExtendExpr *SZ = dyn_cast(Op)) return getZeroExtendExpr(SZ->getOperand(), Ty, Depth + 1); // Before doing any expensive analysis, check to see if we've already // computed a SCEV for this Op and Ty. FoldingSetNodeID ID; ID.AddInteger(scSignExtend); ID.AddPointer(Op); ID.AddPointer(Ty); void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; // Limit recursion depth. if (Depth > MaxExtDepth) { SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator), Op, Ty); UniqueSCEVs.InsertNode(S, IP); addToLoopUseLists(S); return S; } // sext(trunc(x)) --> sext(x) or x or trunc(x) if (const SCEVTruncateExpr *ST = dyn_cast(Op)) { // It's possible the bits taken off by the truncate were all sign bits. If // so, we should be able to simplify this further. const SCEV *X = ST->getOperand(); ConstantRange CR = getSignedRange(X); unsigned TruncBits = getTypeSizeInBits(ST->getType()); unsigned NewBits = getTypeSizeInBits(Ty); if (CR.truncate(TruncBits).signExtend(NewBits).contains( CR.sextOrTrunc(NewBits))) return getTruncateOrSignExtend(X, Ty); } // sext(C1 + (C2 * x)) --> C1 + sext(C2 * x) if C1 < C2 if (auto *SA = dyn_cast(Op)) { if (SA->getNumOperands() == 2) { auto *SC1 = dyn_cast(SA->getOperand(0)); auto *SMul = dyn_cast(SA->getOperand(1)); if (SMul && SC1) { if (auto *SC2 = dyn_cast(SMul->getOperand(0))) { const APInt &C1 = SC1->getAPInt(); const APInt &C2 = SC2->getAPInt(); if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && C2.ugt(C1) && C2.isPowerOf2()) return getAddExpr(getSignExtendExpr(SC1, Ty, Depth + 1), getSignExtendExpr(SMul, Ty, Depth + 1), SCEV::FlagAnyWrap, Depth + 1); } } } // sext((A + B + ...)) --> (sext(A) + sext(B) + ...) if (SA->hasNoSignedWrap()) { // If the addition does not sign overflow then we can, by definition, // commute the sign extension with the addition operation. SmallVector Ops; for (const auto *Op : SA->operands()) Ops.push_back(getSignExtendExpr(Op, Ty, Depth + 1)); return getAddExpr(Ops, SCEV::FlagNSW, Depth + 1); } } // If the input value is a chrec scev, and we can prove that the value // did not overflow the old, smaller, value, we can sign extend all of the // operands (often constants). This allows analysis of something like // this: for (signed char X = 0; X < 100; ++X) { int Y = X; } if (const SCEVAddRecExpr *AR = dyn_cast(Op)) if (AR->isAffine()) { const SCEV *Start = AR->getStart(); const SCEV *Step = AR->getStepRecurrence(*this); unsigned BitWidth = getTypeSizeInBits(AR->getType()); const Loop *L = AR->getLoop(); if (!AR->hasNoSignedWrap()) { auto NewFlags = proveNoWrapViaConstantRanges(AR); const_cast(AR)->setNoWrapFlags(NewFlags); } // If we have special knowledge that this addrec won't overflow, // we don't need to do any further analysis. if (AR->hasNoSignedWrap()) return getAddRecExpr( getExtendAddRecStart(AR, Ty, this, Depth + 1), getSignExtendExpr(Step, Ty, Depth + 1), L, SCEV::FlagNSW); // Check whether the backedge-taken count is SCEVCouldNotCompute. // Note that this serves two purposes: It filters out loops that are // simply not analyzable, and it covers the case where this code is // being called from within backedge-taken count analysis, such that // attempting to ask for the backedge-taken count would likely result // in infinite recursion. In the later case, the analysis code will // cope with a conservative value, and it will take care to purge // that value once it has finished. const SCEV *MaxBECount = getMaxBackedgeTakenCount(L); if (!isa(MaxBECount)) { // Manually compute the final value for AR, checking for // overflow. // Check whether the backedge-taken count can be losslessly casted to // the addrec's type. The count is always unsigned. const SCEV *CastedMaxBECount = getTruncateOrZeroExtend(MaxBECount, Start->getType()); const SCEV *RecastedMaxBECount = getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType()); if (MaxBECount == RecastedMaxBECount) { Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); // Check whether Start+Step*MaxBECount has no signed overflow. const SCEV *SMul = getMulExpr(CastedMaxBECount, Step, SCEV::FlagAnyWrap, Depth + 1); const SCEV *SAdd = getSignExtendExpr(getAddExpr(Start, SMul, SCEV::FlagAnyWrap, Depth + 1), WideTy, Depth + 1); const SCEV *WideStart = getSignExtendExpr(Start, WideTy, Depth + 1); const SCEV *WideMaxBECount = getZeroExtendExpr(CastedMaxBECount, WideTy, Depth + 1); const SCEV *OperandExtendedAdd = getAddExpr(WideStart, getMulExpr(WideMaxBECount, getSignExtendExpr(Step, WideTy, Depth + 1), SCEV::FlagAnyWrap, Depth + 1), SCEV::FlagAnyWrap, Depth + 1); if (SAdd == OperandExtendedAdd) { // Cache knowledge of AR NSW, which is propagated to this AddRec. const_cast(AR)->setNoWrapFlags(SCEV::FlagNSW); // Return the expression with the addrec on the outside. return getAddRecExpr( getExtendAddRecStart(AR, Ty, this, Depth + 1), getSignExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags()); } // Similar to above, only this time treat the step value as unsigned. // This covers loops that count up with an unsigned step. OperandExtendedAdd = getAddExpr(WideStart, getMulExpr(WideMaxBECount, getZeroExtendExpr(Step, WideTy, Depth + 1), SCEV::FlagAnyWrap, Depth + 1), SCEV::FlagAnyWrap, Depth + 1); if (SAdd == OperandExtendedAdd) { // If AR wraps around then // // abs(Step) * MaxBECount > unsigned-max(AR->getType()) // => SAdd != OperandExtendedAdd // // Thus (AR is not NW => SAdd != OperandExtendedAdd) <=> // (SAdd == OperandExtendedAdd => AR is NW) const_cast(AR)->setNoWrapFlags(SCEV::FlagNW); // Return the expression with the addrec on the outside. return getAddRecExpr( getExtendAddRecStart(AR, Ty, this, Depth + 1), getZeroExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags()); } } } // Normally, in the cases we can prove no-overflow via a // backedge guarding condition, we can also compute a backedge // taken count for the loop. The exceptions are assumptions and // guards present in the loop -- SCEV is not great at exploiting // these to compute max backedge taken counts, but can still use // these to prove lack of overflow. Use this fact to avoid // doing extra work that may not pay off. if (!isa(MaxBECount) || HasGuards || !AC.assumptions().empty()) { // If the backedge is guarded by a comparison with the pre-inc // value the addrec is safe. Also, if the entry is guarded by // a comparison with the start value and the backedge is // guarded by a comparison with the post-inc value, the addrec // is safe. ICmpInst::Predicate Pred; const SCEV *OverflowLimit = getSignedOverflowLimitForStep(Step, &Pred, this); if (OverflowLimit && (isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) || (isLoopEntryGuardedByCond(L, Pred, Start, OverflowLimit) && isLoopBackedgeGuardedByCond(L, Pred, AR->getPostIncExpr(*this), OverflowLimit)))) { // Cache knowledge of AR NSW, then propagate NSW to the wide AddRec. const_cast(AR)->setNoWrapFlags(SCEV::FlagNSW); return getAddRecExpr( getExtendAddRecStart(AR, Ty, this, Depth + 1), getSignExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags()); } } // If Start and Step are constants, check if we can apply this // transformation: // sext{C1,+,C2} --> C1 + sext{0,+,C2} if C1 < C2 auto *SC1 = dyn_cast(Start); auto *SC2 = dyn_cast(Step); if (SC1 && SC2) { const APInt &C1 = SC1->getAPInt(); const APInt &C2 = SC2->getAPInt(); if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && C2.ugt(C1) && C2.isPowerOf2()) { Start = getSignExtendExpr(Start, Ty, Depth + 1); const SCEV *NewAR = getAddRecExpr(getZero(AR->getType()), Step, L, AR->getNoWrapFlags()); return getAddExpr(Start, getSignExtendExpr(NewAR, Ty, Depth + 1), SCEV::FlagAnyWrap, Depth + 1); } } if (proveNoWrapByVaryingStart(Start, Step, L)) { const_cast(AR)->setNoWrapFlags(SCEV::FlagNSW); return getAddRecExpr( getExtendAddRecStart(AR, Ty, this, Depth + 1), getSignExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags()); } } // If the input value is provably positive and we could not simplify // away the sext build a zext instead. if (isKnownNonNegative(Op)) return getZeroExtendExpr(Op, Ty, Depth + 1); // The cast wasn't folded; create an explicit cast node. // Recompute the insert position, as it may have been invalidated. if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator), Op, Ty); UniqueSCEVs.InsertNode(S, IP); addToLoopUseLists(S); return S; } /// getAnyExtendExpr - Return a SCEV for the given operand extended with /// unspecified bits out to the given type. const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op, Type *Ty) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && "This is not an extending conversion!"); assert(isSCEVable(Ty) && "This is not a conversion to a SCEVable type!"); Ty = getEffectiveSCEVType(Ty); // Sign-extend negative constants. if (const SCEVConstant *SC = dyn_cast(Op)) if (SC->getAPInt().isNegative()) return getSignExtendExpr(Op, Ty); // Peel off a truncate cast. if (const SCEVTruncateExpr *T = dyn_cast(Op)) { const SCEV *NewOp = T->getOperand(); if (getTypeSizeInBits(NewOp->getType()) < getTypeSizeInBits(Ty)) return getAnyExtendExpr(NewOp, Ty); return getTruncateOrNoop(NewOp, Ty); } // Next try a zext cast. If the cast is folded, use it. const SCEV *ZExt = getZeroExtendExpr(Op, Ty); if (!isa(ZExt)) return ZExt; // Next try a sext cast. If the cast is folded, use it. const SCEV *SExt = getSignExtendExpr(Op, Ty); if (!isa(SExt)) return SExt; // Force the cast to be folded into the operands of an addrec. if (const SCEVAddRecExpr *AR = dyn_cast(Op)) { SmallVector Ops; for (const SCEV *Op : AR->operands()) Ops.push_back(getAnyExtendExpr(Op, Ty)); return getAddRecExpr(Ops, AR->getLoop(), SCEV::FlagNW); } // If the expression is obviously signed, use the sext cast value. if (isa(Op)) return SExt; // Absent any other information, use the zext cast value. return ZExt; } /// Process the given Ops list, which is a list of operands to be added under /// the given scale, update the given map. This is a helper function for /// getAddRecExpr. As an example of what it does, given a sequence of operands /// that would form an add expression like this: /// /// m + n + 13 + (A * (o + p + (B * (q + m + 29)))) + r + (-1 * r) /// /// where A and B are constants, update the map with these values: /// /// (m, 1+A*B), (n, 1), (o, A), (p, A), (q, A*B), (r, 0) /// /// and add 13 + A*B*29 to AccumulatedConstant. /// This will allow getAddRecExpr to produce this: /// /// 13+A*B*29 + n + (m * (1+A*B)) + ((o + p) * A) + (q * A*B) /// /// This form often exposes folding opportunities that are hidden in /// the original operand list. /// /// Return true iff it appears that any interesting folding opportunities /// may be exposed. This helps getAddRecExpr short-circuit extra work in /// the common case where no interesting opportunities are present, and /// is also used as a check to avoid infinite recursion. static bool CollectAddOperandsWithScales(DenseMap &M, SmallVectorImpl &NewOps, APInt &AccumulatedConstant, const SCEV *const *Ops, size_t NumOperands, const APInt &Scale, ScalarEvolution &SE) { bool Interesting = false; // Iterate over the add operands. They are sorted, with constants first. unsigned i = 0; while (const SCEVConstant *C = dyn_cast(Ops[i])) { ++i; // Pull a buried constant out to the outside. if (Scale != 1 || AccumulatedConstant != 0 || C->getValue()->isZero()) Interesting = true; AccumulatedConstant += Scale * C->getAPInt(); } // Next comes everything else. We're especially interested in multiplies // here, but they're in the middle, so just visit the rest with one loop. for (; i != NumOperands; ++i) { const SCEVMulExpr *Mul = dyn_cast(Ops[i]); if (Mul && isa(Mul->getOperand(0))) { APInt NewScale = Scale * cast(Mul->getOperand(0))->getAPInt(); if (Mul->getNumOperands() == 2 && isa(Mul->getOperand(1))) { // A multiplication of a constant with another add; recurse. const SCEVAddExpr *Add = cast(Mul->getOperand(1)); Interesting |= CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant, Add->op_begin(), Add->getNumOperands(), NewScale, SE); } else { // A multiplication of a constant with some other value. Update // the map. SmallVector MulOps(Mul->op_begin()+1, Mul->op_end()); const SCEV *Key = SE.getMulExpr(MulOps); auto Pair = M.insert({Key, NewScale}); if (Pair.second) { NewOps.push_back(Pair.first->first); } else { Pair.first->second += NewScale; // The map already had an entry for this value, which may indicate // a folding opportunity. Interesting = true; } } } else { // An ordinary operand. Update the map. std::pair::iterator, bool> Pair = M.insert({Ops[i], Scale}); if (Pair.second) { NewOps.push_back(Pair.first->first); } else { Pair.first->second += Scale; // The map already had an entry for this value, which may indicate // a folding opportunity. Interesting = true; } } } return Interesting; } // We're trying to construct a SCEV of type `Type' with `Ops' as operands and // `OldFlags' as can't-wrap behavior. Infer a more aggressive set of // can't-overflow flags for the operation if possible. static SCEV::NoWrapFlags StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type, const SmallVectorImpl &Ops, SCEV::NoWrapFlags Flags) { using namespace std::placeholders; using OBO = OverflowingBinaryOperator; bool CanAnalyze = Type == scAddExpr || Type == scAddRecExpr || Type == scMulExpr; (void)CanAnalyze; assert(CanAnalyze && "don't call from other places!"); int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW; SCEV::NoWrapFlags SignOrUnsignWrap = ScalarEvolution::maskFlags(Flags, SignOrUnsignMask); // If FlagNSW is true and all the operands are non-negative, infer FlagNUW. auto IsKnownNonNegative = [&](const SCEV *S) { return SE->isKnownNonNegative(S); }; if (SignOrUnsignWrap == SCEV::FlagNSW && all_of(Ops, IsKnownNonNegative)) Flags = ScalarEvolution::setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask); SignOrUnsignWrap = ScalarEvolution::maskFlags(Flags, SignOrUnsignMask); if (SignOrUnsignWrap != SignOrUnsignMask && Type == scAddExpr && Ops.size() == 2 && isa(Ops[0])) { // (A + C) --> (A + C) if the addition does not sign overflow // (A + C) --> (A + C) if the addition does not unsign overflow const APInt &C = cast(Ops[0])->getAPInt(); if (!(SignOrUnsignWrap & SCEV::FlagNSW)) { auto NSWRegion = ConstantRange::makeGuaranteedNoWrapRegion( Instruction::Add, C, OBO::NoSignedWrap); if (NSWRegion.contains(SE->getSignedRange(Ops[1]))) Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW); } if (!(SignOrUnsignWrap & SCEV::FlagNUW)) { auto NUWRegion = ConstantRange::makeGuaranteedNoWrapRegion( Instruction::Add, C, OBO::NoUnsignedWrap); if (NUWRegion.contains(SE->getUnsignedRange(Ops[1]))) Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW); } } return Flags; } bool ScalarEvolution::isAvailableAtLoopEntry(const SCEV *S, const Loop *L) { if (!isLoopInvariant(S, L)) return false; // If a value depends on a SCEVUnknown which is defined after the loop, we // conservatively assume that we cannot calculate it at the loop's entry. struct FindDominatedSCEVUnknown { bool Found = false; const Loop *L; DominatorTree &DT; LoopInfo &LI; FindDominatedSCEVUnknown(const Loop *L, DominatorTree &DT, LoopInfo &LI) : L(L), DT(DT), LI(LI) {} bool checkSCEVUnknown(const SCEVUnknown *SU) { if (auto *I = dyn_cast(SU->getValue())) { if (DT.dominates(L->getHeader(), I->getParent())) Found = true; else assert(DT.dominates(I->getParent(), L->getHeader()) && "No dominance relationship between SCEV and loop?"); } return false; } bool follow(const SCEV *S) { switch (static_cast(S->getSCEVType())) { case scConstant: return false; case scAddRecExpr: case scTruncate: case scZeroExtend: case scSignExtend: case scAddExpr: case scMulExpr: case scUMaxExpr: case scSMaxExpr: case scUDivExpr: return true; case scUnknown: return checkSCEVUnknown(cast(S)); case scCouldNotCompute: llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); } return false; } bool isDone() { return Found; } }; FindDominatedSCEVUnknown FSU(L, DT, LI); SCEVTraversal ST(FSU); ST.visitAll(S); return !FSU.Found; } /// Get a canonical add expression, or something simpler if possible. const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, SCEV::NoWrapFlags Flags, unsigned Depth) { assert(!(Flags & ~(SCEV::FlagNUW | SCEV::FlagNSW)) && "only nuw or nsw allowed"); assert(!Ops.empty() && "Cannot get empty add!"); if (Ops.size() == 1) return Ops[0]; #ifndef NDEBUG Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); for (unsigned i = 1, e = Ops.size(); i != e; ++i) assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && "SCEVAddExpr operand types don't match!"); #endif // Sort by complexity, this groups all similar expression types together. GroupByComplexity(Ops, &LI, DT); Flags = StrengthenNoWrapFlags(this, scAddExpr, Ops, Flags); // If there are any constants, fold them together. unsigned Idx = 0; if (const SCEVConstant *LHSC = dyn_cast(Ops[0])) { ++Idx; assert(Idx < Ops.size()); while (const SCEVConstant *RHSC = dyn_cast(Ops[Idx])) { // We found two constants, fold them together! Ops[0] = getConstant(LHSC->getAPInt() + RHSC->getAPInt()); if (Ops.size() == 2) return Ops[0]; Ops.erase(Ops.begin()+1); // Erase the folded element LHSC = cast(Ops[0]); } // If we are left with a constant zero being added, strip it off. if (LHSC->getValue()->isZero()) { Ops.erase(Ops.begin()); --Idx; } if (Ops.size() == 1) return Ops[0]; } // Limit recursion calls depth. if (Depth > MaxArithDepth) return getOrCreateAddExpr(Ops, Flags); // Okay, check to see if the same value occurs in the operand list more than // once. If so, merge them together into an multiply expression. Since we // sorted the list, these values are required to be adjacent. Type *Ty = Ops[0]->getType(); bool FoundMatch = false; for (unsigned i = 0, e = Ops.size(); i != e-1; ++i) if (Ops[i] == Ops[i+1]) { // X + Y + Y --> X + Y*2 // Scan ahead to count how many equal operands there are. unsigned Count = 2; while (i+Count != e && Ops[i+Count] == Ops[i]) ++Count; // Merge the values into a multiply. const SCEV *Scale = getConstant(Ty, Count); const SCEV *Mul = getMulExpr(Scale, Ops[i], SCEV::FlagAnyWrap, Depth + 1); if (Ops.size() == Count) return Mul; Ops[i] = Mul; Ops.erase(Ops.begin()+i+1, Ops.begin()+i+Count); --i; e -= Count - 1; FoundMatch = true; } if (FoundMatch) return getAddExpr(Ops, Flags, Depth + 1); // Check for truncates. If all the operands are truncated from the same // type, see if factoring out the truncate would permit the result to be // folded. eg., n*trunc(x) + m*trunc(y) --> trunc(trunc(m)*x + trunc(n)*y) // if the contents of the resulting outer trunc fold to something simple. auto FindTruncSrcType = [&]() -> Type * { // We're ultimately looking to fold an addrec of truncs and muls of only // constants and truncs, so if we find any other types of SCEV // as operands of the addrec then we bail and return nullptr here. // Otherwise, we return the type of the operand of a trunc that we find. if (auto *T = dyn_cast(Ops[Idx])) return T->getOperand()->getType(); if (const auto *Mul = dyn_cast(Ops[Idx])) { const auto *LastOp = Mul->getOperand(Mul->getNumOperands() - 1); if (const auto *T = dyn_cast(LastOp)) return T->getOperand()->getType(); } return nullptr; }; if (auto *SrcType = FindTruncSrcType()) { SmallVector LargeOps; bool Ok = true; // Check all the operands to see if they can be represented in the // source type of the truncate. for (unsigned i = 0, e = Ops.size(); i != e; ++i) { if (const SCEVTruncateExpr *T = dyn_cast(Ops[i])) { if (T->getOperand()->getType() != SrcType) { Ok = false; break; } LargeOps.push_back(T->getOperand()); } else if (const SCEVConstant *C = dyn_cast(Ops[i])) { LargeOps.push_back(getAnyExtendExpr(C, SrcType)); } else if (const SCEVMulExpr *M = dyn_cast(Ops[i])) { SmallVector LargeMulOps; for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) { if (const SCEVTruncateExpr *T = dyn_cast(M->getOperand(j))) { if (T->getOperand()->getType() != SrcType) { Ok = false; break; } LargeMulOps.push_back(T->getOperand()); } else if (const auto *C = dyn_cast(M->getOperand(j))) { LargeMulOps.push_back(getAnyExtendExpr(C, SrcType)); } else { Ok = false; break; } } if (Ok) LargeOps.push_back(getMulExpr(LargeMulOps, SCEV::FlagAnyWrap, Depth + 1)); } else { Ok = false; break; } } if (Ok) { // Evaluate the expression in the larger type. const SCEV *Fold = getAddExpr(LargeOps, Flags, Depth + 1); // If it folds to something simple, use it. Otherwise, don't. if (isa(Fold) || isa(Fold)) return getTruncateExpr(Fold, Ty); } } // Skip past any other cast SCEVs. while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddExpr) ++Idx; // If there are add operands they would be next. if (Idx < Ops.size()) { bool DeletedAdd = false; while (const SCEVAddExpr *Add = dyn_cast(Ops[Idx])) { if (Ops.size() > AddOpsInlineThreshold || Add->getNumOperands() > AddOpsInlineThreshold) break; // If we have an add, expand the add operands onto the end of the operands // list. Ops.erase(Ops.begin()+Idx); Ops.append(Add->op_begin(), Add->op_end()); DeletedAdd = true; } // If we deleted at least one add, we added operands to the end of the list, // and they are not necessarily sorted. Recurse to resort and resimplify // any operands we just acquired. if (DeletedAdd) return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); } // Skip over the add expression until we get to a multiply. while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr) ++Idx; // Check to see if there are any folding opportunities present with // operands multiplied by constant values. if (Idx < Ops.size() && isa(Ops[Idx])) { uint64_t BitWidth = getTypeSizeInBits(Ty); DenseMap M; SmallVector NewOps; APInt AccumulatedConstant(BitWidth, 0); if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant, Ops.data(), Ops.size(), APInt(BitWidth, 1), *this)) { struct APIntCompare { bool operator()(const APInt &LHS, const APInt &RHS) const { return LHS.ult(RHS); } }; // Some interesting folding opportunity is present, so its worthwhile to // re-generate the operands list. Group the operands by constant scale, // to avoid multiplying by the same constant scale multiple times. std::map, APIntCompare> MulOpLists; for (const SCEV *NewOp : NewOps) MulOpLists[M.find(NewOp)->second].push_back(NewOp); // Re-generate the operands list. Ops.clear(); if (AccumulatedConstant != 0) Ops.push_back(getConstant(AccumulatedConstant)); for (auto &MulOp : MulOpLists) if (MulOp.first != 0) Ops.push_back(getMulExpr( getConstant(MulOp.first), getAddExpr(MulOp.second, SCEV::FlagAnyWrap, Depth + 1), SCEV::FlagAnyWrap, Depth + 1)); if (Ops.empty()) return getZero(Ty); if (Ops.size() == 1) return Ops[0]; return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); } } // If we are adding something to a multiply expression, make sure the // something is not already an operand of the multiply. If so, merge it into // the multiply. for (; Idx < Ops.size() && isa(Ops[Idx]); ++Idx) { const SCEVMulExpr *Mul = cast(Ops[Idx]); for (unsigned MulOp = 0, e = Mul->getNumOperands(); MulOp != e; ++MulOp) { const SCEV *MulOpSCEV = Mul->getOperand(MulOp); if (isa(MulOpSCEV)) continue; for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp) if (MulOpSCEV == Ops[AddOp]) { // Fold W + X + (X * Y * Z) --> W + (X * ((Y*Z)+1)) const SCEV *InnerMul = Mul->getOperand(MulOp == 0); if (Mul->getNumOperands() != 2) { // If the multiply has more than two operands, we must get the // Y*Z term. SmallVector MulOps(Mul->op_begin(), Mul->op_begin()+MulOp); MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end()); InnerMul = getMulExpr(MulOps, SCEV::FlagAnyWrap, Depth + 1); } SmallVector TwoOps = {getOne(Ty), InnerMul}; const SCEV *AddOne = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1); const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV, SCEV::FlagAnyWrap, Depth + 1); if (Ops.size() == 2) return OuterMul; if (AddOp < Idx) { Ops.erase(Ops.begin()+AddOp); Ops.erase(Ops.begin()+Idx-1); } else { Ops.erase(Ops.begin()+Idx); Ops.erase(Ops.begin()+AddOp-1); } Ops.push_back(OuterMul); return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); } // Check this multiply against other multiplies being added together. for (unsigned OtherMulIdx = Idx+1; OtherMulIdx < Ops.size() && isa(Ops[OtherMulIdx]); ++OtherMulIdx) { const SCEVMulExpr *OtherMul = cast(Ops[OtherMulIdx]); // If MulOp occurs in OtherMul, we can fold the two multiplies // together. for (unsigned OMulOp = 0, e = OtherMul->getNumOperands(); OMulOp != e; ++OMulOp) if (OtherMul->getOperand(OMulOp) == MulOpSCEV) { // Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E)) const SCEV *InnerMul1 = Mul->getOperand(MulOp == 0); if (Mul->getNumOperands() != 2) { SmallVector MulOps(Mul->op_begin(), Mul->op_begin()+MulOp); MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end()); InnerMul1 = getMulExpr(MulOps, SCEV::FlagAnyWrap, Depth + 1); } const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0); if (OtherMul->getNumOperands() != 2) { SmallVector MulOps(OtherMul->op_begin(), OtherMul->op_begin()+OMulOp); MulOps.append(OtherMul->op_begin()+OMulOp+1, OtherMul->op_end()); InnerMul2 = getMulExpr(MulOps, SCEV::FlagAnyWrap, Depth + 1); } SmallVector TwoOps = {InnerMul1, InnerMul2}; const SCEV *InnerMulSum = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1); const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum, SCEV::FlagAnyWrap, Depth + 1); if (Ops.size() == 2) return OuterMul; Ops.erase(Ops.begin()+Idx); Ops.erase(Ops.begin()+OtherMulIdx-1); Ops.push_back(OuterMul); return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); } } } } // If there are any add recurrences in the operands list, see if any other // added values are loop invariant. If so, we can fold them into the // recurrence. while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr) ++Idx; // Scan over all recurrences, trying to fold loop invariants into them. for (; Idx < Ops.size() && isa(Ops[Idx]); ++Idx) { // Scan all of the other operands to this add and add them to the vector if // they are loop invariant w.r.t. the recurrence. SmallVector LIOps; const SCEVAddRecExpr *AddRec = cast(Ops[Idx]); const Loop *AddRecLoop = AddRec->getLoop(); for (unsigned i = 0, e = Ops.size(); i != e; ++i) if (isAvailableAtLoopEntry(Ops[i], AddRecLoop)) { LIOps.push_back(Ops[i]); Ops.erase(Ops.begin()+i); --i; --e; } // If we found some loop invariants, fold them into the recurrence. if (!LIOps.empty()) { // NLI + LI + {Start,+,Step} --> NLI + {LI+Start,+,Step} LIOps.push_back(AddRec->getStart()); SmallVector AddRecOps(AddRec->op_begin(), AddRec->op_end()); // This follows from the fact that the no-wrap flags on the outer add // expression are applicable on the 0th iteration, when the add recurrence // will be equal to its start value. AddRecOps[0] = getAddExpr(LIOps, Flags, Depth + 1); // Build the new addrec. Propagate the NUW and NSW flags if both the // outer add and the inner addrec are guaranteed to have no overflow. // Always propagate NW. Flags = AddRec->getNoWrapFlags(setFlags(Flags, SCEV::FlagNW)); const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop, Flags); // If all of the other operands were loop invariant, we are done. if (Ops.size() == 1) return NewRec; // Otherwise, add the folded AddRec by the non-invariant parts. for (unsigned i = 0;; ++i) if (Ops[i] == AddRec) { Ops[i] = NewRec; break; } return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); } // Okay, if there weren't any loop invariants to be folded, check to see if // there are multiple AddRec's with the same loop induction variable being // added together. If so, we can fold them. for (unsigned OtherIdx = Idx+1; OtherIdx < Ops.size() && isa(Ops[OtherIdx]); ++OtherIdx) { // We expect the AddRecExpr's to be sorted in reverse dominance order, // so that the 1st found AddRecExpr is dominated by all others. assert(DT.dominates( cast(Ops[OtherIdx])->getLoop()->getHeader(), AddRec->getLoop()->getHeader()) && "AddRecExprs are not sorted in reverse dominance order?"); if (AddRecLoop == cast(Ops[OtherIdx])->getLoop()) { // Other + {A,+,B} + {C,+,D} --> Other + {A+C,+,B+D} SmallVector AddRecOps(AddRec->op_begin(), AddRec->op_end()); for (; OtherIdx != Ops.size() && isa(Ops[OtherIdx]); ++OtherIdx) { const auto *OtherAddRec = cast(Ops[OtherIdx]); if (OtherAddRec->getLoop() == AddRecLoop) { for (unsigned i = 0, e = OtherAddRec->getNumOperands(); i != e; ++i) { if (i >= AddRecOps.size()) { AddRecOps.append(OtherAddRec->op_begin()+i, OtherAddRec->op_end()); break; } SmallVector TwoOps = { AddRecOps[i], OtherAddRec->getOperand(i)}; AddRecOps[i] = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1); } Ops.erase(Ops.begin() + OtherIdx); --OtherIdx; } } // Step size has changed, so we cannot guarantee no self-wraparound. Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap); return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); } } // Otherwise couldn't fold anything into this recurrence. Move onto the // next one. } // Okay, it looks like we really DO need an add expr. Check to see if we // already have one, otherwise create a new one. return getOrCreateAddExpr(Ops, Flags); } const SCEV * ScalarEvolution::getOrCreateAddExpr(SmallVectorImpl &Ops, SCEV::NoWrapFlags Flags) { FoldingSetNodeID ID; ID.AddInteger(scAddExpr); for (const SCEV *Op : Ops) ID.AddPointer(Op); void *IP = nullptr; SCEVAddExpr *S = static_cast(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); if (!S) { const SCEV **O = SCEVAllocator.Allocate(Ops.size()); std::uninitialized_copy(Ops.begin(), Ops.end(), O); S = new (SCEVAllocator) SCEVAddExpr(ID.Intern(SCEVAllocator), O, Ops.size()); UniqueSCEVs.InsertNode(S, IP); addToLoopUseLists(S); } S->setNoWrapFlags(Flags); return S; } const SCEV * ScalarEvolution::getOrCreateMulExpr(SmallVectorImpl &Ops, SCEV::NoWrapFlags Flags) { FoldingSetNodeID ID; ID.AddInteger(scMulExpr); for (unsigned i = 0, e = Ops.size(); i != e; ++i) ID.AddPointer(Ops[i]); void *IP = nullptr; SCEVMulExpr *S = static_cast(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); if (!S) { const SCEV **O = SCEVAllocator.Allocate(Ops.size()); std::uninitialized_copy(Ops.begin(), Ops.end(), O); S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator), O, Ops.size()); UniqueSCEVs.InsertNode(S, IP); addToLoopUseLists(S); } S->setNoWrapFlags(Flags); return S; } static uint64_t umul_ov(uint64_t i, uint64_t j, bool &Overflow) { uint64_t k = i*j; if (j > 1 && k / j != i) Overflow = true; return k; } /// Compute the result of "n choose k", the binomial coefficient. If an /// intermediate computation overflows, Overflow will be set and the return will /// be garbage. Overflow is not cleared on absence of overflow. static uint64_t Choose(uint64_t n, uint64_t k, bool &Overflow) { // We use the multiplicative formula: // n(n-1)(n-2)...(n-(k-1)) / k(k-1)(k-2)...1 . // At each iteration, we take the n-th term of the numeral and divide by the // (k-n)th term of the denominator. This division will always produce an // integral result, and helps reduce the chance of overflow in the // intermediate computations. However, we can still overflow even when the // final result would fit. if (n == 0 || n == k) return 1; if (k > n) return 0; if (k > n/2) k = n-k; uint64_t r = 1; for (uint64_t i = 1; i <= k; ++i) { r = umul_ov(r, n-(i-1), Overflow); r /= i; } return r; } /// Determine if any of the operands in this SCEV are a constant or if /// any of the add or multiply expressions in this SCEV contain a constant. static bool containsConstantInAddMulChain(const SCEV *StartExpr) { struct FindConstantInAddMulChain { bool FoundConstant = false; bool follow(const SCEV *S) { FoundConstant |= isa(S); return isa(S) || isa(S); } bool isDone() const { return FoundConstant; } }; FindConstantInAddMulChain F; SCEVTraversal ST(F); ST.visitAll(StartExpr); return F.FoundConstant; } /// Get a canonical multiply expression, or something simpler if possible. const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, SCEV::NoWrapFlags Flags, unsigned Depth) { assert(Flags == maskFlags(Flags, SCEV::FlagNUW | SCEV::FlagNSW) && "only nuw or nsw allowed"); assert(!Ops.empty() && "Cannot get empty mul!"); if (Ops.size() == 1) return Ops[0]; #ifndef NDEBUG Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); for (unsigned i = 1, e = Ops.size(); i != e; ++i) assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && "SCEVMulExpr operand types don't match!"); #endif // Sort by complexity, this groups all similar expression types together. GroupByComplexity(Ops, &LI, DT); Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags); // Limit recursion calls depth. if (Depth > MaxArithDepth) return getOrCreateMulExpr(Ops, Flags); // If there are any constants, fold them together. unsigned Idx = 0; if (const SCEVConstant *LHSC = dyn_cast(Ops[0])) { // C1*(C2+V) -> C1*C2 + C1*V if (Ops.size() == 2) if (const SCEVAddExpr *Add = dyn_cast(Ops[1])) // If any of Add's ops are Adds or Muls with a constant, // apply this transformation as well. if (Add->getNumOperands() == 2) // TODO: There are some cases where this transformation is not // profitable, for example: // Add = (C0 + X) * Y + Z. // Maybe the scope of this transformation should be narrowed down. if (containsConstantInAddMulChain(Add)) return getAddExpr(getMulExpr(LHSC, Add->getOperand(0), SCEV::FlagAnyWrap, Depth + 1), getMulExpr(LHSC, Add->getOperand(1), SCEV::FlagAnyWrap, Depth + 1), SCEV::FlagAnyWrap, Depth + 1); ++Idx; while (const SCEVConstant *RHSC = dyn_cast(Ops[Idx])) { // We found two constants, fold them together! ConstantInt *Fold = ConstantInt::get(getContext(), LHSC->getAPInt() * RHSC->getAPInt()); Ops[0] = getConstant(Fold); Ops.erase(Ops.begin()+1); // Erase the folded element if (Ops.size() == 1) return Ops[0]; LHSC = cast(Ops[0]); } // If we are left with a constant one being multiplied, strip it off. if (cast(Ops[0])->getValue()->isOne()) { Ops.erase(Ops.begin()); --Idx; } else if (cast(Ops[0])->getValue()->isZero()) { // If we have a multiply of zero, it will always be zero. return Ops[0]; } else if (Ops[0]->isAllOnesValue()) { // If we have a mul by -1 of an add, try distributing the -1 among the // add operands. if (Ops.size() == 2) { if (const SCEVAddExpr *Add = dyn_cast(Ops[1])) { SmallVector NewOps; bool AnyFolded = false; for (const SCEV *AddOp : Add->operands()) { const SCEV *Mul = getMulExpr(Ops[0], AddOp, SCEV::FlagAnyWrap, Depth + 1); if (!isa(Mul)) AnyFolded = true; NewOps.push_back(Mul); } if (AnyFolded) return getAddExpr(NewOps, SCEV::FlagAnyWrap, Depth + 1); } else if (const auto *AddRec = dyn_cast(Ops[1])) { // Negation preserves a recurrence's no self-wrap property. SmallVector Operands; for (const SCEV *AddRecOp : AddRec->operands()) Operands.push_back(getMulExpr(Ops[0], AddRecOp, SCEV::FlagAnyWrap, Depth + 1)); return getAddRecExpr(Operands, AddRec->getLoop(), AddRec->getNoWrapFlags(SCEV::FlagNW)); } } } if (Ops.size() == 1) return Ops[0]; } // Skip over the add expression until we get to a multiply. while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr) ++Idx; // If there are mul operands inline them all into this expression. if (Idx < Ops.size()) { bool DeletedMul = false; while (const SCEVMulExpr *Mul = dyn_cast(Ops[Idx])) { if (Ops.size() > MulOpsInlineThreshold) break; // If we have an mul, expand the mul operands onto the end of the // operands list. Ops.erase(Ops.begin()+Idx); Ops.append(Mul->op_begin(), Mul->op_end()); DeletedMul = true; } // If we deleted at least one mul, we added operands to the end of the // list, and they are not necessarily sorted. Recurse to resort and // resimplify any operands we just acquired. if (DeletedMul) return getMulExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); } // If there are any add recurrences in the operands list, see if any other // added values are loop invariant. If so, we can fold them into the // recurrence. while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr) ++Idx; // Scan over all recurrences, trying to fold loop invariants into them. for (; Idx < Ops.size() && isa(Ops[Idx]); ++Idx) { // Scan all of the other operands to this mul and add them to the vector // if they are loop invariant w.r.t. the recurrence. SmallVector LIOps; const SCEVAddRecExpr *AddRec = cast(Ops[Idx]); const Loop *AddRecLoop = AddRec->getLoop(); for (unsigned i = 0, e = Ops.size(); i != e; ++i) if (isAvailableAtLoopEntry(Ops[i], AddRecLoop)) { LIOps.push_back(Ops[i]); Ops.erase(Ops.begin()+i); --i; --e; } // If we found some loop invariants, fold them into the recurrence. if (!LIOps.empty()) { // NLI * LI * {Start,+,Step} --> NLI * {LI*Start,+,LI*Step} SmallVector NewOps; NewOps.reserve(AddRec->getNumOperands()); const SCEV *Scale = getMulExpr(LIOps, SCEV::FlagAnyWrap, Depth + 1); for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i), SCEV::FlagAnyWrap, Depth + 1)); // Build the new addrec. Propagate the NUW and NSW flags if both the // outer mul and the inner addrec are guaranteed to have no overflow. // // No self-wrap cannot be guaranteed after changing the step size, but // will be inferred if either NUW or NSW is true. Flags = AddRec->getNoWrapFlags(clearFlags(Flags, SCEV::FlagNW)); const SCEV *NewRec = getAddRecExpr(NewOps, AddRecLoop, Flags); // If all of the other operands were loop invariant, we are done. if (Ops.size() == 1) return NewRec; // Otherwise, multiply the folded AddRec by the non-invariant parts. for (unsigned i = 0;; ++i) if (Ops[i] == AddRec) { Ops[i] = NewRec; break; } return getMulExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); } // Okay, if there weren't any loop invariants to be folded, check to see // if there are multiple AddRec's with the same loop induction variable // being multiplied together. If so, we can fold them. // {A1,+,A2,+,...,+,An} * {B1,+,B2,+,...,+,Bn} // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [ // choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z // ]]],+,...up to x=2n}. // Note that the arguments to choose() are always integers with values // known at compile time, never SCEV objects. // // The implementation avoids pointless extra computations when the two // addrec's are of different length (mathematically, it's equivalent to // an infinite stream of zeros on the right). bool OpsModified = false; for (unsigned OtherIdx = Idx+1; OtherIdx != Ops.size() && isa(Ops[OtherIdx]); ++OtherIdx) { const SCEVAddRecExpr *OtherAddRec = dyn_cast(Ops[OtherIdx]); if (!OtherAddRec || OtherAddRec->getLoop() != AddRecLoop) continue; // Limit max number of arguments to avoid creation of unreasonably big // SCEVAddRecs with very complex operands. if (AddRec->getNumOperands() + OtherAddRec->getNumOperands() - 1 > MaxAddRecSize) continue; bool Overflow = false; Type *Ty = AddRec->getType(); bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64; SmallVector AddRecOps; for (int x = 0, xe = AddRec->getNumOperands() + OtherAddRec->getNumOperands() - 1; x != xe && !Overflow; ++x) { const SCEV *Term = getZero(Ty); for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) { uint64_t Coeff1 = Choose(x, 2*x - y, Overflow); for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1), ze = std::min(x+1, (int)OtherAddRec->getNumOperands()); z < ze && !Overflow; ++z) { uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow); uint64_t Coeff; if (LargerThan64Bits) Coeff = umul_ov(Coeff1, Coeff2, Overflow); else Coeff = Coeff1*Coeff2; const SCEV *CoeffTerm = getConstant(Ty, Coeff); const SCEV *Term1 = AddRec->getOperand(y-z); const SCEV *Term2 = OtherAddRec->getOperand(z); Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1, Term2, SCEV::FlagAnyWrap, Depth + 1), SCEV::FlagAnyWrap, Depth + 1); } } AddRecOps.push_back(Term); } if (!Overflow) { const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRec->getLoop(), SCEV::FlagAnyWrap); if (Ops.size() == 2) return NewAddRec; Ops[Idx] = NewAddRec; Ops.erase(Ops.begin() + OtherIdx); --OtherIdx; OpsModified = true; AddRec = dyn_cast(NewAddRec); if (!AddRec) break; } } if (OpsModified) return getMulExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); // Otherwise couldn't fold anything into this recurrence. Move onto the // next one. } // Okay, it looks like we really DO need an mul expr. Check to see if we // already have one, otherwise create a new one. return getOrCreateMulExpr(Ops, Flags); } /// Represents an unsigned remainder expression based on unsigned division. const SCEV *ScalarEvolution::getURemExpr(const SCEV *LHS, const SCEV *RHS) { assert(getEffectiveSCEVType(LHS->getType()) == getEffectiveSCEVType(RHS->getType()) && "SCEVURemExpr operand types don't match!"); // Short-circuit easy cases if (const SCEVConstant *RHSC = dyn_cast(RHS)) { // If constant is one, the result is trivial if (RHSC->getValue()->isOne()) return getZero(LHS->getType()); // X urem 1 --> 0 // If constant is a power of two, fold into a zext(trunc(LHS)). if (RHSC->getAPInt().isPowerOf2()) { Type *FullTy = LHS->getType(); Type *TruncTy = IntegerType::get(getContext(), RHSC->getAPInt().logBase2()); return getZeroExtendExpr(getTruncateExpr(LHS, TruncTy), FullTy); } } // Fallback to %a == %x urem %y == %x - ((%x udiv %y) * %y) const SCEV *UDiv = getUDivExpr(LHS, RHS); const SCEV *Mult = getMulExpr(UDiv, RHS, SCEV::FlagNUW); return getMinusSCEV(LHS, Mult, SCEV::FlagNUW); } /// Get a canonical unsigned division expression, or something simpler if /// possible. const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, const SCEV *RHS) { assert(getEffectiveSCEVType(LHS->getType()) == getEffectiveSCEVType(RHS->getType()) && "SCEVUDivExpr operand types don't match!"); if (const SCEVConstant *RHSC = dyn_cast(RHS)) { if (RHSC->getValue()->isOne()) return LHS; // X udiv 1 --> x // If the denominator is zero, the result of the udiv is undefined. Don't // try to analyze it, because the resolution chosen here may differ from // the resolution chosen in other parts of the compiler. if (!RHSC->getValue()->isZero()) { // Determine if the division can be folded into the operands of // its operands. // TODO: Generalize this to non-constants by using known-bits information. Type *Ty = LHS->getType(); unsigned LZ = RHSC->getAPInt().countLeadingZeros(); unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ - 1; // For non-power-of-two values, effectively round the value up to the // nearest power of two. if (!RHSC->getAPInt().isPowerOf2()) ++MaxShiftAmt; IntegerType *ExtTy = IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt); if (const SCEVAddRecExpr *AR = dyn_cast(LHS)) if (const SCEVConstant *Step = dyn_cast(AR->getStepRecurrence(*this))) { // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded. const APInt &StepInt = Step->getAPInt(); const APInt &DivInt = RHSC->getAPInt(); if (!StepInt.urem(DivInt) && getZeroExtendExpr(AR, ExtTy) == getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy), getZeroExtendExpr(Step, ExtTy), AR->getLoop(), SCEV::FlagAnyWrap)) { SmallVector Operands; for (const SCEV *Op : AR->operands()) Operands.push_back(getUDivExpr(Op, RHS)); return getAddRecExpr(Operands, AR->getLoop(), SCEV::FlagNW); } /// Get a canonical UDivExpr for a recurrence. /// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0. // We can currently only fold X%N if X is constant. const SCEVConstant *StartC = dyn_cast(AR->getStart()); if (StartC && !DivInt.urem(StepInt) && getZeroExtendExpr(AR, ExtTy) == getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy), getZeroExtendExpr(Step, ExtTy), AR->getLoop(), SCEV::FlagAnyWrap)) { const APInt &StartInt = StartC->getAPInt(); const APInt &StartRem = StartInt.urem(StepInt); if (StartRem != 0) LHS = getAddRecExpr(getConstant(StartInt - StartRem), Step, AR->getLoop(), SCEV::FlagNW); } } // (A*B)/C --> A*(B/C) if safe and B/C can be folded. if (const SCEVMulExpr *M = dyn_cast(LHS)) { SmallVector Operands; for (const SCEV *Op : M->operands()) Operands.push_back(getZeroExtendExpr(Op, ExtTy)); if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands)) // Find an operand that's safely divisible. for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) { const SCEV *Op = M->getOperand(i); const SCEV *Div = getUDivExpr(Op, RHSC); if (!isa(Div) && getMulExpr(Div, RHSC) == Op) { Operands = SmallVector(M->op_begin(), M->op_end()); Operands[i] = Div; return getMulExpr(Operands); } } } // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded. if (const SCEVAddExpr *A = dyn_cast(LHS)) { SmallVector Operands; for (const SCEV *Op : A->operands()) Operands.push_back(getZeroExtendExpr(Op, ExtTy)); if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) { Operands.clear(); for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) { const SCEV *Op = getUDivExpr(A->getOperand(i), RHS); if (isa(Op) || getMulExpr(Op, RHS) != A->getOperand(i)) break; Operands.push_back(Op); } if (Operands.size() == A->getNumOperands()) return getAddExpr(Operands); } } // Fold if both operands are constant. if (const SCEVConstant *LHSC = dyn_cast(LHS)) { Constant *LHSCV = LHSC->getValue(); Constant *RHSCV = RHSC->getValue(); return getConstant(cast(ConstantExpr::getUDiv(LHSCV, RHSCV))); } } } FoldingSetNodeID ID; ID.AddInteger(scUDivExpr); ID.AddPointer(LHS); ID.AddPointer(RHS); void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator), LHS, RHS); UniqueSCEVs.InsertNode(S, IP); addToLoopUseLists(S); return S; } static const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) { APInt A = C1->getAPInt().abs(); APInt B = C2->getAPInt().abs(); uint32_t ABW = A.getBitWidth(); uint32_t BBW = B.getBitWidth(); if (ABW > BBW) B = B.zext(ABW); else if (ABW < BBW) A = A.zext(BBW); return APIntOps::GreatestCommonDivisor(std::move(A), std::move(B)); } /// Get a canonical unsigned division expression, or something simpler if /// possible. There is no representation for an exact udiv in SCEV IR, but we /// can attempt to remove factors from the LHS and RHS. We can't do this when /// it's not exact because the udiv may be clearing bits. const SCEV *ScalarEvolution::getUDivExactExpr(const SCEV *LHS, const SCEV *RHS) { // TODO: we could try to find factors in all sorts of things, but for now we // just deal with u/exact (multiply, constant). See SCEVDivision towards the // end of this file for inspiration. const SCEVMulExpr *Mul = dyn_cast(LHS); if (!Mul || !Mul->hasNoUnsignedWrap()) return getUDivExpr(LHS, RHS); if (const SCEVConstant *RHSCst = dyn_cast(RHS)) { // If the mulexpr multiplies by a constant, then that constant must be the // first element of the mulexpr. if (const auto *LHSCst = dyn_cast(Mul->getOperand(0))) { if (LHSCst == RHSCst) { SmallVector Operands; Operands.append(Mul->op_begin() + 1, Mul->op_end()); return getMulExpr(Operands); } // We can't just assume that LHSCst divides RHSCst cleanly, it could be // that there's a factor provided by one of the other terms. We need to // check. APInt Factor = gcd(LHSCst, RHSCst); if (!Factor.isIntN(1)) { LHSCst = cast(getConstant(LHSCst->getAPInt().udiv(Factor))); RHSCst = cast(getConstant(RHSCst->getAPInt().udiv(Factor))); SmallVector Operands; Operands.push_back(LHSCst); Operands.append(Mul->op_begin() + 1, Mul->op_end()); LHS = getMulExpr(Operands); RHS = RHSCst; Mul = dyn_cast(LHS); if (!Mul) return getUDivExactExpr(LHS, RHS); } } } for (int i = 0, e = Mul->getNumOperands(); i != e; ++i) { if (Mul->getOperand(i) == RHS) { SmallVector Operands; Operands.append(Mul->op_begin(), Mul->op_begin() + i); Operands.append(Mul->op_begin() + i + 1, Mul->op_end()); return getMulExpr(Operands); } } return getUDivExpr(LHS, RHS); } /// Get an add recurrence expression for the specified loop. Simplify the /// expression as much as possible. const SCEV *ScalarEvolution::getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags) { SmallVector Operands; Operands.push_back(Start); if (const SCEVAddRecExpr *StepChrec = dyn_cast(Step)) if (StepChrec->getLoop() == L) { Operands.append(StepChrec->op_begin(), StepChrec->op_end()); return getAddRecExpr(Operands, L, maskFlags(Flags, SCEV::FlagNW)); } Operands.push_back(Step); return getAddRecExpr(Operands, L, Flags); } /// Get an add recurrence expression for the specified loop. Simplify the /// expression as much as possible. const SCEV * ScalarEvolution::getAddRecExpr(SmallVectorImpl &Operands, const Loop *L, SCEV::NoWrapFlags Flags) { if (Operands.size() == 1) return Operands[0]; #ifndef NDEBUG Type *ETy = getEffectiveSCEVType(Operands[0]->getType()); for (unsigned i = 1, e = Operands.size(); i != e; ++i) assert(getEffectiveSCEVType(Operands[i]->getType()) == ETy && "SCEVAddRecExpr operand types don't match!"); for (unsigned i = 0, e = Operands.size(); i != e; ++i) assert(isLoopInvariant(Operands[i], L) && "SCEVAddRecExpr operand is not loop-invariant!"); #endif if (Operands.back()->isZero()) { Operands.pop_back(); return getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); // {X,+,0} --> X } // It's tempting to want to call getMaxBackedgeTakenCount count here and // use that information to infer NUW and NSW flags. However, computing a // BE count requires calling getAddRecExpr, so we may not yet have a // meaningful BE count at this point (and if we don't, we'd be stuck // with a SCEVCouldNotCompute as the cached BE count). Flags = StrengthenNoWrapFlags(this, scAddRecExpr, Operands, Flags); // Canonicalize nested AddRecs in by nesting them in order of loop depth. if (const SCEVAddRecExpr *NestedAR = dyn_cast(Operands[0])) { const Loop *NestedLoop = NestedAR->getLoop(); if (L->contains(NestedLoop) ? (L->getLoopDepth() < NestedLoop->getLoopDepth()) : (!NestedLoop->contains(L) && DT.dominates(L->getHeader(), NestedLoop->getHeader()))) { SmallVector NestedOperands(NestedAR->op_begin(), NestedAR->op_end()); Operands[0] = NestedAR->getStart(); // AddRecs require their operands be loop-invariant with respect to their // loops. Don't perform this transformation if it would break this // requirement. bool AllInvariant = all_of( Operands, [&](const SCEV *Op) { return isLoopInvariant(Op, L); }); if (AllInvariant) { // Create a recurrence for the outer loop with the same step size. // // The outer recurrence keeps its NW flag but only keeps NUW/NSW if the // inner recurrence has the same property. SCEV::NoWrapFlags OuterFlags = maskFlags(Flags, SCEV::FlagNW | NestedAR->getNoWrapFlags()); NestedOperands[0] = getAddRecExpr(Operands, L, OuterFlags); AllInvariant = all_of(NestedOperands, [&](const SCEV *Op) { return isLoopInvariant(Op, NestedLoop); }); if (AllInvariant) { // Ok, both add recurrences are valid after the transformation. // // The inner recurrence keeps its NW flag but only keeps NUW/NSW if // the outer recurrence has the same property. SCEV::NoWrapFlags InnerFlags = maskFlags(NestedAR->getNoWrapFlags(), SCEV::FlagNW | Flags); return getAddRecExpr(NestedOperands, NestedLoop, InnerFlags); } } // Reset Operands to its original state. Operands[0] = NestedAR; } } // Okay, it looks like we really DO need an addrec expr. Check to see if we // already have one, otherwise create a new one. FoldingSetNodeID ID; ID.AddInteger(scAddRecExpr); for (unsigned i = 0, e = Operands.size(); i != e; ++i) ID.AddPointer(Operands[i]); ID.AddPointer(L); void *IP = nullptr; SCEVAddRecExpr *S = static_cast(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); if (!S) { const SCEV **O = SCEVAllocator.Allocate(Operands.size()); std::uninitialized_copy(Operands.begin(), Operands.end(), O); S = new (SCEVAllocator) SCEVAddRecExpr(ID.Intern(SCEVAllocator), O, Operands.size(), L); UniqueSCEVs.InsertNode(S, IP); addToLoopUseLists(S); } S->setNoWrapFlags(Flags); return S; } const SCEV * ScalarEvolution::getGEPExpr(GEPOperator *GEP, const SmallVectorImpl &IndexExprs) { const SCEV *BaseExpr = getSCEV(GEP->getPointerOperand()); // getSCEV(Base)->getType() has the same address space as Base->getType() // because SCEV::getType() preserves the address space. Type *IntPtrTy = getEffectiveSCEVType(BaseExpr->getType()); // FIXME(PR23527): Don't blindly transfer the inbounds flag from the GEP // instruction to its SCEV, because the Instruction may be guarded by control // flow and the no-overflow bits may not be valid for the expression in any // context. This can be fixed similarly to how these flags are handled for // adds. SCEV::NoWrapFlags Wrap = GEP->isInBounds() ? SCEV::FlagNSW : SCEV::FlagAnyWrap; const SCEV *TotalOffset = getZero(IntPtrTy); // The array size is unimportant. The first thing we do on CurTy is getting // its element type. Type *CurTy = ArrayType::get(GEP->getSourceElementType(), 0); for (const SCEV *IndexExpr : IndexExprs) { // Compute the (potentially symbolic) offset in bytes for this index. if (StructType *STy = dyn_cast(CurTy)) { // For a struct, add the member offset. ConstantInt *Index = cast(IndexExpr)->getValue(); unsigned FieldNo = Index->getZExtValue(); const SCEV *FieldOffset = getOffsetOfExpr(IntPtrTy, STy, FieldNo); // Add the field offset to the running total offset. TotalOffset = getAddExpr(TotalOffset, FieldOffset); // Update CurTy to the type of the field at Index. CurTy = STy->getTypeAtIndex(Index); } else { // Update CurTy to its element type. CurTy = cast(CurTy)->getElementType(); // For an array, add the element offset, explicitly scaled. const SCEV *ElementSize = getSizeOfExpr(IntPtrTy, CurTy); // Getelementptr indices are signed. IndexExpr = getTruncateOrSignExtend(IndexExpr, IntPtrTy); // Multiply the index by the element size to compute the element offset. const SCEV *LocalOffset = getMulExpr(IndexExpr, ElementSize, Wrap); // Add the element offset to the running total offset. TotalOffset = getAddExpr(TotalOffset, LocalOffset); } } // Add the total offset from all the GEP indices to the base. return getAddExpr(BaseExpr, TotalOffset, Wrap); } const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS, const SCEV *RHS) { SmallVector Ops = {LHS, RHS}; return getSMaxExpr(Ops); } const SCEV * ScalarEvolution::getSMaxExpr(SmallVectorImpl &Ops) { assert(!Ops.empty() && "Cannot get empty smax!"); if (Ops.size() == 1) return Ops[0]; #ifndef NDEBUG Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); for (unsigned i = 1, e = Ops.size(); i != e; ++i) assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && "SCEVSMaxExpr operand types don't match!"); #endif // Sort by complexity, this groups all similar expression types together. GroupByComplexity(Ops, &LI, DT); // If there are any constants, fold them together. unsigned Idx = 0; if (const SCEVConstant *LHSC = dyn_cast(Ops[0])) { ++Idx; assert(Idx < Ops.size()); while (const SCEVConstant *RHSC = dyn_cast(Ops[Idx])) { // We found two constants, fold them together! ConstantInt *Fold = ConstantInt::get( getContext(), APIntOps::smax(LHSC->getAPInt(), RHSC->getAPInt())); Ops[0] = getConstant(Fold); Ops.erase(Ops.begin()+1); // Erase the folded element if (Ops.size() == 1) return Ops[0]; LHSC = cast(Ops[0]); } // If we are left with a constant minimum-int, strip it off. if (cast(Ops[0])->getValue()->isMinValue(true)) { Ops.erase(Ops.begin()); --Idx; } else if (cast(Ops[0])->getValue()->isMaxValue(true)) { // If we have an smax with a constant maximum-int, it will always be // maximum-int. return Ops[0]; } if (Ops.size() == 1) return Ops[0]; } // Find the first SMax while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scSMaxExpr) ++Idx; // Check to see if one of the operands is an SMax. If so, expand its operands // onto our operand list, and recurse to simplify. if (Idx < Ops.size()) { bool DeletedSMax = false; while (const SCEVSMaxExpr *SMax = dyn_cast(Ops[Idx])) { Ops.erase(Ops.begin()+Idx); Ops.append(SMax->op_begin(), SMax->op_end()); DeletedSMax = true; } if (DeletedSMax) return getSMaxExpr(Ops); } // Okay, check to see if the same value occurs in the operand list twice. If // so, delete one. Since we sorted the list, these values are required to // be adjacent. for (unsigned i = 0, e = Ops.size()-1; i != e; ++i) // X smax Y smax Y --> X smax Y // X smax Y --> X, if X is always greater than Y if (Ops[i] == Ops[i+1] || isKnownPredicate(ICmpInst::ICMP_SGE, Ops[i], Ops[i+1])) { Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2); --i; --e; } else if (isKnownPredicate(ICmpInst::ICMP_SLE, Ops[i], Ops[i+1])) { Ops.erase(Ops.begin()+i, Ops.begin()+i+1); --i; --e; } if (Ops.size() == 1) return Ops[0]; assert(!Ops.empty() && "Reduced smax down to nothing!"); // Okay, it looks like we really DO need an smax expr. Check to see if we // already have one, otherwise create a new one. FoldingSetNodeID ID; ID.AddInteger(scSMaxExpr); for (unsigned i = 0, e = Ops.size(); i != e; ++i) ID.AddPointer(Ops[i]); void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; const SCEV **O = SCEVAllocator.Allocate(Ops.size()); std::uninitialized_copy(Ops.begin(), Ops.end(), O); SCEV *S = new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator), O, Ops.size()); UniqueSCEVs.InsertNode(S, IP); addToLoopUseLists(S); return S; } const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS, const SCEV *RHS) { SmallVector Ops = {LHS, RHS}; return getUMaxExpr(Ops); } const SCEV * ScalarEvolution::getUMaxExpr(SmallVectorImpl &Ops) { assert(!Ops.empty() && "Cannot get empty umax!"); if (Ops.size() == 1) return Ops[0]; #ifndef NDEBUG Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); for (unsigned i = 1, e = Ops.size(); i != e; ++i) assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && "SCEVUMaxExpr operand types don't match!"); #endif // Sort by complexity, this groups all similar expression types together. GroupByComplexity(Ops, &LI, DT); // If there are any constants, fold them together. unsigned Idx = 0; if (const SCEVConstant *LHSC = dyn_cast(Ops[0])) { ++Idx; assert(Idx < Ops.size()); while (const SCEVConstant *RHSC = dyn_cast(Ops[Idx])) { // We found two constants, fold them together! ConstantInt *Fold = ConstantInt::get( getContext(), APIntOps::umax(LHSC->getAPInt(), RHSC->getAPInt())); Ops[0] = getConstant(Fold); Ops.erase(Ops.begin()+1); // Erase the folded element if (Ops.size() == 1) return Ops[0]; LHSC = cast(Ops[0]); } // If we are left with a constant minimum-int, strip it off. if (cast(Ops[0])->getValue()->isMinValue(false)) { Ops.erase(Ops.begin()); --Idx; } else if (cast(Ops[0])->getValue()->isMaxValue(false)) { // If we have an umax with a constant maximum-int, it will always be // maximum-int. return Ops[0]; } if (Ops.size() == 1) return Ops[0]; } // Find the first UMax while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scUMaxExpr) ++Idx; // Check to see if one of the operands is a UMax. If so, expand its operands // onto our operand list, and recurse to simplify. if (Idx < Ops.size()) { bool DeletedUMax = false; while (const SCEVUMaxExpr *UMax = dyn_cast(Ops[Idx])) { Ops.erase(Ops.begin()+Idx); Ops.append(UMax->op_begin(), UMax->op_end()); DeletedUMax = true; } if (DeletedUMax) return getUMaxExpr(Ops); } // Okay, check to see if the same value occurs in the operand list twice. If // so, delete one. Since we sorted the list, these values are required to // be adjacent. for (unsigned i = 0, e = Ops.size()-1; i != e; ++i) // X umax Y umax Y --> X umax Y // X umax Y --> X, if X is always greater than Y if (Ops[i] == Ops[i+1] || isKnownPredicate(ICmpInst::ICMP_UGE, Ops[i], Ops[i+1])) { Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2); --i; --e; } else if (isKnownPredicate(ICmpInst::ICMP_ULE, Ops[i], Ops[i+1])) { Ops.erase(Ops.begin()+i, Ops.begin()+i+1); --i; --e; } if (Ops.size() == 1) return Ops[0]; assert(!Ops.empty() && "Reduced umax down to nothing!"); // Okay, it looks like we really DO need a umax expr. Check to see if we // already have one, otherwise create a new one. FoldingSetNodeID ID; ID.AddInteger(scUMaxExpr); for (unsigned i = 0, e = Ops.size(); i != e; ++i) ID.AddPointer(Ops[i]); void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; const SCEV **O = SCEVAllocator.Allocate(Ops.size()); std::uninitialized_copy(Ops.begin(), Ops.end(), O); SCEV *S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator), O, Ops.size()); UniqueSCEVs.InsertNode(S, IP); addToLoopUseLists(S); return S; } const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS, const SCEV *RHS) { // ~smax(~x, ~y) == smin(x, y). return getNotSCEV(getSMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS))); } const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS, const SCEV *RHS) { // ~umax(~x, ~y) == umin(x, y) return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS))); } const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) { // We can bypass creating a target-independent // constant expression and then folding it back into a ConstantInt. // This is just a compile-time optimization. return getConstant(IntTy, getDataLayout().getTypeAllocSize(AllocTy)); } const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy, StructType *STy, unsigned FieldNo) { // We can bypass creating a target-independent // constant expression and then folding it back into a ConstantInt. // This is just a compile-time optimization. return getConstant( IntTy, getDataLayout().getStructLayout(STy)->getElementOffset(FieldNo)); } const SCEV *ScalarEvolution::getUnknown(Value *V) { // Don't attempt to do anything other than create a SCEVUnknown object // here. createSCEV only calls getUnknown after checking for all other // interesting possibilities, and any other code that calls getUnknown // is doing so in order to hide a value from SCEV canonicalization. FoldingSetNodeID ID; ID.AddInteger(scUnknown); ID.AddPointer(V); void *IP = nullptr; if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) { assert(cast(S)->getValue() == V && "Stale SCEVUnknown in uniquing map!"); return S; } SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V, this, FirstUnknown); FirstUnknown = cast(S); UniqueSCEVs.InsertNode(S, IP); return S; } //===----------------------------------------------------------------------===// // Basic SCEV Analysis and PHI Idiom Recognition Code // /// Test if values of the given type are analyzable within the SCEV /// framework. This primarily includes integer types, and it can optionally /// include pointer types if the ScalarEvolution class has access to /// target-specific information. bool ScalarEvolution::isSCEVable(Type *Ty) const { // Integers and pointers are always SCEVable. return Ty->isIntegerTy() || Ty->isPointerTy(); } /// Return the size in bits of the specified type, for which isSCEVable must /// return true. uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const { assert(isSCEVable(Ty) && "Type is not SCEVable!"); return getDataLayout().getTypeSizeInBits(Ty); } /// Return a type with the same bitwidth as the given type and which represents /// how SCEV will treat the given type, for which isSCEVable must return /// true. For pointer types, this is the pointer-sized integer type. Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const { assert(isSCEVable(Ty) && "Type is not SCEVable!"); if (Ty->isIntegerTy()) return Ty; // The only other support type is pointer. assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!"); return getDataLayout().getIntPtrType(Ty); } Type *ScalarEvolution::getWiderType(Type *T1, Type *T2) const { return getTypeSizeInBits(T1) >= getTypeSizeInBits(T2) ? T1 : T2; } const SCEV *ScalarEvolution::getCouldNotCompute() { return CouldNotCompute.get(); } bool ScalarEvolution::checkValidity(const SCEV *S) const { bool ContainsNulls = SCEVExprContains(S, [](const SCEV *S) { auto *SU = dyn_cast(S); return SU && SU->getValue() == nullptr; }); return !ContainsNulls; } bool ScalarEvolution::containsAddRecurrence(const SCEV *S) { HasRecMapType::iterator I = HasRecMap.find(S); if (I != HasRecMap.end()) return I->second; bool FoundAddRec = SCEVExprContains(S, isa); HasRecMap.insert({S, FoundAddRec}); return FoundAddRec; } /// Try to split a SCEVAddExpr into a pair of {SCEV, ConstantInt}. /// If \p S is a SCEVAddExpr and is composed of a sub SCEV S' and an /// offset I, then return {S', I}, else return {\p S, nullptr}. static std::pair splitAddExpr(const SCEV *S) { const auto *Add = dyn_cast(S); if (!Add) return {S, nullptr}; if (Add->getNumOperands() != 2) return {S, nullptr}; auto *ConstOp = dyn_cast(Add->getOperand(0)); if (!ConstOp) return {S, nullptr}; return {Add->getOperand(1), ConstOp->getValue()}; } /// Return the ValueOffsetPair set for \p S. \p S can be represented /// by the value and offset from any ValueOffsetPair in the set. SetVector * ScalarEvolution::getSCEVValues(const SCEV *S) { ExprValueMapType::iterator SI = ExprValueMap.find_as(S); if (SI == ExprValueMap.end()) return nullptr; #ifndef NDEBUG if (VerifySCEVMap) { // Check there is no dangling Value in the set returned. for (const auto &VE : SI->second) assert(ValueExprMap.count(VE.first)); } #endif return &SI->second; } /// Erase Value from ValueExprMap and ExprValueMap. ValueExprMap.erase(V) /// cannot be used separately. eraseValueFromMap should be used to remove /// V from ValueExprMap and ExprValueMap at the same time. void ScalarEvolution::eraseValueFromMap(Value *V) { ValueExprMapType::iterator I = ValueExprMap.find_as(V); if (I != ValueExprMap.end()) { const SCEV *S = I->second; // Remove {V, 0} from the set of ExprValueMap[S] if (SetVector *SV = getSCEVValues(S)) SV->remove({V, nullptr}); // Remove {V, Offset} from the set of ExprValueMap[Stripped] const SCEV *Stripped; ConstantInt *Offset; std::tie(Stripped, Offset) = splitAddExpr(S); if (Offset != nullptr) { if (SetVector *SV = getSCEVValues(Stripped)) SV->remove({V, Offset}); } ValueExprMap.erase(V); } } /// Return an existing SCEV if it exists, otherwise analyze the expression and /// create a new one. const SCEV *ScalarEvolution::getSCEV(Value *V) { assert(isSCEVable(V->getType()) && "Value is not SCEVable!"); const SCEV *S = getExistingSCEV(V); if (S == nullptr) { S = createSCEV(V); // During PHI resolution, it is possible to create two SCEVs for the same // V, so it is needed to double check whether V->S is inserted into // ValueExprMap before insert S->{V, 0} into ExprValueMap. std::pair Pair = ValueExprMap.insert({SCEVCallbackVH(V, this), S}); if (Pair.second) { ExprValueMap[S].insert({V, nullptr}); // If S == Stripped + Offset, add Stripped -> {V, Offset} into // ExprValueMap. const SCEV *Stripped = S; ConstantInt *Offset = nullptr; std::tie(Stripped, Offset) = splitAddExpr(S); // If stripped is SCEVUnknown, don't bother to save // Stripped -> {V, offset}. It doesn't simplify and sometimes even // increase the complexity of the expansion code. // If V is GetElementPtrInst, don't save Stripped -> {V, offset} // because it may generate add/sub instead of GEP in SCEV expansion. if (Offset != nullptr && !isa(Stripped) && !isa(V)) ExprValueMap[Stripped].insert({V, Offset}); } } return S; } const SCEV *ScalarEvolution::getExistingSCEV(Value *V) { assert(isSCEVable(V->getType()) && "Value is not SCEVable!"); ValueExprMapType::iterator I = ValueExprMap.find_as(V); if (I != ValueExprMap.end()) { const SCEV *S = I->second; if (checkValidity(S)) return S; eraseValueFromMap(V); forgetMemoizedResults(S); } return nullptr; } /// Return a SCEV corresponding to -V = -1*V const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags) { if (const SCEVConstant *VC = dyn_cast(V)) return getConstant( cast(ConstantExpr::getNeg(VC->getValue()))); Type *Ty = V->getType(); Ty = getEffectiveSCEVType(Ty); return getMulExpr( V, getConstant(cast(Constant::getAllOnesValue(Ty))), Flags); } /// Return a SCEV corresponding to ~V = -1-V const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) { if (const SCEVConstant *VC = dyn_cast(V)) return getConstant( cast(ConstantExpr::getNot(VC->getValue()))); Type *Ty = V->getType(); Ty = getEffectiveSCEVType(Ty); const SCEV *AllOnes = getConstant(cast(Constant::getAllOnesValue(Ty))); return getMinusSCEV(AllOnes, V); } const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags, unsigned Depth) { // Fast path: X - X --> 0. if (LHS == RHS) return getZero(LHS->getType()); // We represent LHS - RHS as LHS + (-1)*RHS. This transformation // makes it so that we cannot make much use of NUW. auto AddFlags = SCEV::FlagAnyWrap; const bool RHSIsNotMinSigned = !getSignedRangeMin(RHS).isMinSignedValue(); if (maskFlags(Flags, SCEV::FlagNSW) == SCEV::FlagNSW) { // Let M be the minimum representable signed value. Then (-1)*RHS // signed-wraps if and only if RHS is M. That can happen even for // a NSW subtraction because e.g. (-1)*M signed-wraps even though // -1 - M does not. So to transfer NSW from LHS - RHS to LHS + // (-1)*RHS, we need to prove that RHS != M. // // If LHS is non-negative and we know that LHS - RHS does not // signed-wrap, then RHS cannot be M. So we can rule out signed-wrap // either by proving that RHS > M or that LHS >= 0. if (RHSIsNotMinSigned || isKnownNonNegative(LHS)) { AddFlags = SCEV::FlagNSW; } } // FIXME: Find a correct way to transfer NSW to (-1)*M when LHS - // RHS is NSW and LHS >= 0. // // The difficulty here is that the NSW flag may have been proven // relative to a loop that is to be found in a recurrence in LHS and // not in RHS. Applying NSW to (-1)*M may then let the NSW have a // larger scope than intended. auto NegFlags = RHSIsNotMinSigned ? SCEV::FlagNSW : SCEV::FlagAnyWrap; return getAddExpr(LHS, getNegativeSCEV(RHS, NegFlags), AddFlags, Depth); } const SCEV * ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, Type *Ty) { Type *SrcTy = V->getType(); assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot truncate or zero extend with non-integer arguments!"); if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) return V; // No conversion if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty)) return getTruncateExpr(V, Ty); return getZeroExtendExpr(V, Ty); } const SCEV * ScalarEvolution::getTruncateOrSignExtend(const SCEV *V, Type *Ty) { Type *SrcTy = V->getType(); assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot truncate or zero extend with non-integer arguments!"); if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) return V; // No conversion if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty)) return getTruncateExpr(V, Ty); return getSignExtendExpr(V, Ty); } const SCEV * ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, Type *Ty) { Type *SrcTy = V->getType(); assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot noop or zero extend with non-integer arguments!"); assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && "getNoopOrZeroExtend cannot truncate!"); if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) return V; // No conversion return getZeroExtendExpr(V, Ty); } const SCEV * ScalarEvolution::getNoopOrSignExtend(const SCEV *V, Type *Ty) { Type *SrcTy = V->getType(); assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot noop or sign extend with non-integer arguments!"); assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && "getNoopOrSignExtend cannot truncate!"); if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) return V; // No conversion return getSignExtendExpr(V, Ty); } const SCEV * ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, Type *Ty) { Type *SrcTy = V->getType(); assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot noop or any extend with non-integer arguments!"); assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && "getNoopOrAnyExtend cannot truncate!"); if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) return V; // No conversion return getAnyExtendExpr(V, Ty); } const SCEV * ScalarEvolution::getTruncateOrNoop(const SCEV *V, Type *Ty) { Type *SrcTy = V->getType(); assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot truncate or noop with non-integer arguments!"); assert(getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) && "getTruncateOrNoop cannot extend!"); if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) return V; // No conversion return getTruncateExpr(V, Ty); } const SCEV *ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV *LHS, const SCEV *RHS) { const SCEV *PromotedLHS = LHS; const SCEV *PromotedRHS = RHS; if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType())) PromotedRHS = getZeroExtendExpr(RHS, LHS->getType()); else PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType()); return getUMaxExpr(PromotedLHS, PromotedRHS); } const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(const SCEV *LHS, const SCEV *RHS) { const SCEV *PromotedLHS = LHS; const SCEV *PromotedRHS = RHS; if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType())) PromotedRHS = getZeroExtendExpr(RHS, LHS->getType()); else PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType()); return getUMinExpr(PromotedLHS, PromotedRHS); } const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) { // A pointer operand may evaluate to a nonpointer expression, such as null. if (!V->getType()->isPointerTy()) return V; if (const SCEVCastExpr *Cast = dyn_cast(V)) { return getPointerBase(Cast->getOperand()); } else if (const SCEVNAryExpr *NAry = dyn_cast(V)) { const SCEV *PtrOp = nullptr; for (const SCEV *NAryOp : NAry->operands()) { if (NAryOp->getType()->isPointerTy()) { // Cannot find the base of an expression with multiple pointer operands. if (PtrOp) return V; PtrOp = NAryOp; } } if (!PtrOp) return V; return getPointerBase(PtrOp); } return V; } /// Push users of the given Instruction onto the given Worklist. static void PushDefUseChildren(Instruction *I, SmallVectorImpl &Worklist) { // Push the def-use children onto the Worklist stack. for (User *U : I->users()) Worklist.push_back(cast(U)); } void ScalarEvolution::forgetSymbolicName(Instruction *PN, const SCEV *SymName) { SmallVector Worklist; PushDefUseChildren(PN, Worklist); SmallPtrSet Visited; Visited.insert(PN); while (!Worklist.empty()) { Instruction *I = Worklist.pop_back_val(); if (!Visited.insert(I).second) continue; auto It = ValueExprMap.find_as(static_cast(I)); if (It != ValueExprMap.end()) { const SCEV *Old = It->second; // Short-circuit the def-use traversal if the symbolic name // ceases to appear in expressions. if (Old != SymName && !hasOperand(Old, SymName)) continue; // SCEVUnknown for a PHI either means that it has an unrecognized // structure, it's a PHI that's in the progress of being computed // by createNodeForPHI, or it's a single-value PHI. In the first case, // additional loop trip count information isn't going to change anything. // In the second case, createNodeForPHI will perform the necessary // updates on its own when it gets to that point. In the third, we do // want to forget the SCEVUnknown. if (!isa(I) || !isa(Old) || (I != PN && Old == SymName)) { eraseValueFromMap(It->first); forgetMemoizedResults(Old); } } PushDefUseChildren(I, Worklist); } } namespace { class SCEVInitRewriter : public SCEVRewriteVisitor { public: static const SCEV *rewrite(const SCEV *S, const Loop *L, ScalarEvolution &SE) { SCEVInitRewriter Rewriter(L, SE); const SCEV *Result = Rewriter.visit(S); return Rewriter.isValid() ? Result : SE.getCouldNotCompute(); } const SCEV *visitUnknown(const SCEVUnknown *Expr) { if (!SE.isLoopInvariant(Expr, L)) Valid = false; return Expr; } const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { // Only allow AddRecExprs for this loop. if (Expr->getLoop() == L) return Expr->getStart(); Valid = false; return Expr; } bool isValid() { return Valid; } private: explicit SCEVInitRewriter(const Loop *L, ScalarEvolution &SE) : SCEVRewriteVisitor(SE), L(L) {} const Loop *L; bool Valid = true; }; /// This class evaluates the compare condition by matching it against the /// condition of loop latch. If there is a match we assume a true value /// for the condition while building SCEV nodes. class SCEVBackedgeConditionFolder : public SCEVRewriteVisitor { public: static const SCEV *rewrite(const SCEV *S, const Loop *L, ScalarEvolution &SE) { bool IsPosBECond = false; Value *BECond = nullptr; if (BasicBlock *Latch = L->getLoopLatch()) { BranchInst *BI = dyn_cast(Latch->getTerminator()); if (BI && BI->isConditional()) { assert(BI->getSuccessor(0) != BI->getSuccessor(1) && "Both outgoing branches should not target same header!"); BECond = BI->getCondition(); IsPosBECond = BI->getSuccessor(0) == L->getHeader(); } else { return S; } } SCEVBackedgeConditionFolder Rewriter(L, BECond, IsPosBECond, SE); return Rewriter.visit(S); } const SCEV *visitUnknown(const SCEVUnknown *Expr) { const SCEV *Result = Expr; bool InvariantF = SE.isLoopInvariant(Expr, L); if (!InvariantF) { Instruction *I = cast(Expr->getValue()); switch (I->getOpcode()) { case Instruction::Select: { SelectInst *SI = cast(I); Optional Res = compareWithBackedgeCondition(SI->getCondition()); if (Res.hasValue()) { bool IsOne = cast(Res.getValue())->getValue()->isOne(); Result = SE.getSCEV(IsOne ? SI->getTrueValue() : SI->getFalseValue()); } break; } default: { Optional Res = compareWithBackedgeCondition(I); if (Res.hasValue()) Result = Res.getValue(); break; } } } return Result; } private: explicit SCEVBackedgeConditionFolder(const Loop *L, Value *BECond, bool IsPosBECond, ScalarEvolution &SE) : SCEVRewriteVisitor(SE), L(L), BackedgeCond(BECond), IsPositiveBECond(IsPosBECond) {} Optional compareWithBackedgeCondition(Value *IC); const Loop *L; /// Loop back condition. Value *BackedgeCond = nullptr; /// Set to true if loop back is on positive branch condition. bool IsPositiveBECond; }; Optional SCEVBackedgeConditionFolder::compareWithBackedgeCondition(Value *IC) { // If value matches the backedge condition for loop latch, // then return a constant evolution node based on loopback // branch taken. if (BackedgeCond == IC) return IsPositiveBECond ? SE.getOne(Type::getInt1Ty(SE.getContext())) : SE.getZero(Type::getInt1Ty(SE.getContext())); return None; } class SCEVShiftRewriter : public SCEVRewriteVisitor { public: static const SCEV *rewrite(const SCEV *S, const Loop *L, ScalarEvolution &SE) { SCEVShiftRewriter Rewriter(L, SE); const SCEV *Result = Rewriter.visit(S); return Rewriter.isValid() ? Result : SE.getCouldNotCompute(); } const SCEV *visitUnknown(const SCEVUnknown *Expr) { // Only allow AddRecExprs for this loop. if (!SE.isLoopInvariant(Expr, L)) Valid = false; return Expr; } const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { if (Expr->getLoop() == L && Expr->isAffine()) return SE.getMinusSCEV(Expr, Expr->getStepRecurrence(SE)); Valid = false; return Expr; } bool isValid() { return Valid; } private: explicit SCEVShiftRewriter(const Loop *L, ScalarEvolution &SE) : SCEVRewriteVisitor(SE), L(L) {} const Loop *L; bool Valid = true; }; } // end anonymous namespace SCEV::NoWrapFlags ScalarEvolution::proveNoWrapViaConstantRanges(const SCEVAddRecExpr *AR) { if (!AR->isAffine()) return SCEV::FlagAnyWrap; using OBO = OverflowingBinaryOperator; SCEV::NoWrapFlags Result = SCEV::FlagAnyWrap; if (!AR->hasNoSignedWrap()) { ConstantRange AddRecRange = getSignedRange(AR); ConstantRange IncRange = getSignedRange(AR->getStepRecurrence(*this)); auto NSWRegion = ConstantRange::makeGuaranteedNoWrapRegion( Instruction::Add, IncRange, OBO::NoSignedWrap); if (NSWRegion.contains(AddRecRange)) Result = ScalarEvolution::setFlags(Result, SCEV::FlagNSW); } if (!AR->hasNoUnsignedWrap()) { ConstantRange AddRecRange = getUnsignedRange(AR); ConstantRange IncRange = getUnsignedRange(AR->getStepRecurrence(*this)); auto NUWRegion = ConstantRange::makeGuaranteedNoWrapRegion( Instruction::Add, IncRange, OBO::NoUnsignedWrap); if (NUWRegion.contains(AddRecRange)) Result = ScalarEvolution::setFlags(Result, SCEV::FlagNUW); } return Result; } namespace { /// Represents an abstract binary operation. This may exist as a /// normal instruction or constant expression, or may have been /// derived from an expression tree. struct BinaryOp { unsigned Opcode; Value *LHS; Value *RHS; bool IsNSW = false; bool IsNUW = false; /// Op is set if this BinaryOp corresponds to a concrete LLVM instruction or /// constant expression. Operator *Op = nullptr; explicit BinaryOp(Operator *Op) : Opcode(Op->getOpcode()), LHS(Op->getOperand(0)), RHS(Op->getOperand(1)), Op(Op) { if (auto *OBO = dyn_cast(Op)) { IsNSW = OBO->hasNoSignedWrap(); IsNUW = OBO->hasNoUnsignedWrap(); } } explicit BinaryOp(unsigned Opcode, Value *LHS, Value *RHS, bool IsNSW = false, bool IsNUW = false) : Opcode(Opcode), LHS(LHS), RHS(RHS), IsNSW(IsNSW), IsNUW(IsNUW) {} }; } // end anonymous namespace /// Try to map \p V into a BinaryOp, and return \c None on failure. static Optional MatchBinaryOp(Value *V, DominatorTree &DT) { auto *Op = dyn_cast(V); if (!Op) return None; // Implementation detail: all the cleverness here should happen without // creating new SCEV expressions -- our caller knowns tricks to avoid creating // SCEV expressions when possible, and we should not break that. switch (Op->getOpcode()) { case Instruction::Add: case Instruction::Sub: case Instruction::Mul: case Instruction::UDiv: case Instruction::URem: case Instruction::And: case Instruction::Or: case Instruction::AShr: case Instruction::Shl: return BinaryOp(Op); case Instruction::Xor: if (auto *RHSC = dyn_cast(Op->getOperand(1))) // If the RHS of the xor is a signmask, then this is just an add. // Instcombine turns add of signmask into xor as a strength reduction step. if (RHSC->getValue().isSignMask()) return BinaryOp(Instruction::Add, Op->getOperand(0), Op->getOperand(1)); return BinaryOp(Op); case Instruction::LShr: // Turn logical shift right of a constant into a unsigned divide. if (ConstantInt *SA = dyn_cast(Op->getOperand(1))) { uint32_t BitWidth = cast(Op->getType())->getBitWidth(); // If the shift count is not less than the bitwidth, the result of // the shift is undefined. Don't try to analyze it, because the // resolution chosen here may differ from the resolution chosen in // other parts of the compiler. if (SA->getValue().ult(BitWidth)) { Constant *X = ConstantInt::get(SA->getContext(), APInt::getOneBitSet(BitWidth, SA->getZExtValue())); return BinaryOp(Instruction::UDiv, Op->getOperand(0), X); } } return BinaryOp(Op); case Instruction::ExtractValue: { auto *EVI = cast(Op); if (EVI->getNumIndices() != 1 || EVI->getIndices()[0] != 0) break; auto *CI = dyn_cast(EVI->getAggregateOperand()); if (!CI) break; if (auto *F = CI->getCalledFunction()) switch (F->getIntrinsicID()) { case Intrinsic::sadd_with_overflow: case Intrinsic::uadd_with_overflow: if (!isOverflowIntrinsicNoWrap(cast(CI), DT)) return BinaryOp(Instruction::Add, CI->getArgOperand(0), CI->getArgOperand(1)); // Now that we know that all uses of the arithmetic-result component of // CI are guarded by the overflow check, we can go ahead and pretend // that the arithmetic is non-overflowing. if (F->getIntrinsicID() == Intrinsic::sadd_with_overflow) return BinaryOp(Instruction::Add, CI->getArgOperand(0), CI->getArgOperand(1), /* IsNSW = */ true, /* IsNUW = */ false); else return BinaryOp(Instruction::Add, CI->getArgOperand(0), CI->getArgOperand(1), /* IsNSW = */ false, /* IsNUW*/ true); case Intrinsic::ssub_with_overflow: case Intrinsic::usub_with_overflow: if (!isOverflowIntrinsicNoWrap(cast(CI), DT)) return BinaryOp(Instruction::Sub, CI->getArgOperand(0), CI->getArgOperand(1)); // The same reasoning as sadd/uadd above. if (F->getIntrinsicID() == Intrinsic::ssub_with_overflow) return BinaryOp(Instruction::Sub, CI->getArgOperand(0), CI->getArgOperand(1), /* IsNSW = */ true, /* IsNUW = */ false); else return BinaryOp(Instruction::Sub, CI->getArgOperand(0), CI->getArgOperand(1), /* IsNSW = */ false, /* IsNUW = */ true); case Intrinsic::smul_with_overflow: case Intrinsic::umul_with_overflow: return BinaryOp(Instruction::Mul, CI->getArgOperand(0), CI->getArgOperand(1)); default: break; } break; } default: break; } return None; } /// Helper function to createAddRecFromPHIWithCasts. We have a phi /// node whose symbolic (unknown) SCEV is \p SymbolicPHI, which is updated via /// the loop backedge by a SCEVAddExpr, possibly also with a few casts on the /// way. This function checks if \p Op, an operand of this SCEVAddExpr, /// follows one of the following patterns: /// Op == (SExt ix (Trunc iy (%SymbolicPHI) to ix) to iy) /// Op == (ZExt ix (Trunc iy (%SymbolicPHI) to ix) to iy) /// If the SCEV expression of \p Op conforms with one of the expected patterns /// we return the type of the truncation operation, and indicate whether the /// truncated type should be treated as signed/unsigned by setting /// \p Signed to true/false, respectively. static Type *isSimpleCastedPHI(const SCEV *Op, const SCEVUnknown *SymbolicPHI, bool &Signed, ScalarEvolution &SE) { // The case where Op == SymbolicPHI (that is, with no type conversions on // the way) is handled by the regular add recurrence creating logic and // would have already been triggered in createAddRecForPHI. Reaching it here // means that createAddRecFromPHI had failed for this PHI before (e.g., // because one of the other operands of the SCEVAddExpr updating this PHI is // not invariant). // // Here we look for the case where Op = (ext(trunc(SymbolicPHI))), and in // this case predicates that allow us to prove that Op == SymbolicPHI will // be added. if (Op == SymbolicPHI) return nullptr; unsigned SourceBits = SE.getTypeSizeInBits(SymbolicPHI->getType()); unsigned NewBits = SE.getTypeSizeInBits(Op->getType()); if (SourceBits != NewBits) return nullptr; const SCEVSignExtendExpr *SExt = dyn_cast(Op); const SCEVZeroExtendExpr *ZExt = dyn_cast(Op); if (!SExt && !ZExt) return nullptr; const SCEVTruncateExpr *Trunc = SExt ? dyn_cast(SExt->getOperand()) : dyn_cast(ZExt->getOperand()); if (!Trunc) return nullptr; const SCEV *X = Trunc->getOperand(); if (X != SymbolicPHI) return nullptr; Signed = SExt != nullptr; return Trunc->getType(); } static const Loop *isIntegerLoopHeaderPHI(const PHINode *PN, LoopInfo &LI) { if (!PN->getType()->isIntegerTy()) return nullptr; const Loop *L = LI.getLoopFor(PN->getParent()); if (!L || L->getHeader() != PN->getParent()) return nullptr; return L; } // Analyze \p SymbolicPHI, a SCEV expression of a phi node, and check if the // computation that updates the phi follows the following pattern: // (SExt/ZExt ix (Trunc iy (%SymbolicPHI) to ix) to iy) + InvariantAccum // which correspond to a phi->trunc->sext/zext->add->phi update chain. // If so, try to see if it can be rewritten as an AddRecExpr under some // Predicates. If successful, return them as a pair. Also cache the results // of the analysis. // // Example usage scenario: // Say the Rewriter is called for the following SCEV: // 8 * ((sext i32 (trunc i64 %X to i32) to i64) + %Step) // where: // %X = phi i64 (%Start, %BEValue) // It will visitMul->visitAdd->visitSExt->visitTrunc->visitUnknown(%X), // and call this function with %SymbolicPHI = %X. // // The analysis will find that the value coming around the backedge has // the following SCEV: // BEValue = ((sext i32 (trunc i64 %X to i32) to i64) + %Step) // Upon concluding that this matches the desired pattern, the function // will return the pair {NewAddRec, SmallPredsVec} where: // NewAddRec = {%Start,+,%Step} // SmallPredsVec = {P1, P2, P3} as follows: // P1(WrapPred): AR: {trunc(%Start),+,(trunc %Step)} Flags: // P2(EqualPred): %Start == (sext i32 (trunc i64 %Start to i32) to i64) // P3(EqualPred): %Step == (sext i32 (trunc i64 %Step to i32) to i64) // The returned pair means that SymbolicPHI can be rewritten into NewAddRec // under the predicates {P1,P2,P3}. // This predicated rewrite will be cached in PredicatedSCEVRewrites: // PredicatedSCEVRewrites[{%X,L}] = {NewAddRec, {P1,P2,P3)} // // TODO's: // // 1) Extend the Induction descriptor to also support inductions that involve // casts: When needed (namely, when we are called in the context of the // vectorizer induction analysis), a Set of cast instructions will be // populated by this method, and provided back to isInductionPHI. This is // needed to allow the vectorizer to properly record them to be ignored by // the cost model and to avoid vectorizing them (otherwise these casts, // which are redundant under the runtime overflow checks, will be // vectorized, which can be costly). // // 2) Support additional induction/PHISCEV patterns: We also want to support // inductions where the sext-trunc / zext-trunc operations (partly) occur // after the induction update operation (the induction increment): // // (Trunc iy (SExt/ZExt ix (%SymbolicPHI + InvariantAccum) to iy) to ix) // which correspond to a phi->add->trunc->sext/zext->phi update chain. // // (Trunc iy ((SExt/ZExt ix (%SymbolicPhi) to iy) + InvariantAccum) to ix) // which correspond to a phi->trunc->add->sext/zext->phi update chain. // // 3) Outline common code with createAddRecFromPHI to avoid duplication. Optional>> ScalarEvolution::createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI) { SmallVector Predicates; // *** Part1: Analyze if we have a phi-with-cast pattern for which we can // return an AddRec expression under some predicate. auto *PN = cast(SymbolicPHI->getValue()); const Loop *L = isIntegerLoopHeaderPHI(PN, LI); assert(L && "Expecting an integer loop header phi"); // The loop may have multiple entrances or multiple exits; we can analyze // this phi as an addrec if it has a unique entry value and a unique // backedge value. Value *BEValueV = nullptr, *StartValueV = nullptr; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { Value *V = PN->getIncomingValue(i); if (L->contains(PN->getIncomingBlock(i))) { if (!BEValueV) { BEValueV = V; } else if (BEValueV != V) { BEValueV = nullptr; break; } } else if (!StartValueV) { StartValueV = V; } else if (StartValueV != V) { StartValueV = nullptr; break; } } if (!BEValueV || !StartValueV) return None; const SCEV *BEValue = getSCEV(BEValueV); // If the value coming around the backedge is an add with the symbolic // value we just inserted, possibly with casts that we can ignore under // an appropriate runtime guard, then we found a simple induction variable! const auto *Add = dyn_cast(BEValue); if (!Add) return None; // If there is a single occurrence of the symbolic value, possibly // casted, replace it with a recurrence. unsigned FoundIndex = Add->getNumOperands(); Type *TruncTy = nullptr; bool Signed; for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) if ((TruncTy = isSimpleCastedPHI(Add->getOperand(i), SymbolicPHI, Signed, *this))) if (FoundIndex == e) { FoundIndex = i; break; } if (FoundIndex == Add->getNumOperands()) return None; // Create an add with everything but the specified operand. SmallVector Ops; for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) if (i != FoundIndex) Ops.push_back(Add->getOperand(i)); const SCEV *Accum = getAddExpr(Ops); // The runtime checks will not be valid if the step amount is // varying inside the loop. if (!isLoopInvariant(Accum, L)) return None; // *** Part2: Create the predicates // Analysis was successful: we have a phi-with-cast pattern for which we // can return an AddRec expression under the following predicates: // // P1: A Wrap predicate that guarantees that Trunc(Start) + i*Trunc(Accum) // fits within the truncated type (does not overflow) for i = 0 to n-1. // P2: An Equal predicate that guarantees that // Start = (Ext ix (Trunc iy (Start) to ix) to iy) // P3: An Equal predicate that guarantees that // Accum = (Ext ix (Trunc iy (Accum) to ix) to iy) // // As we next prove, the above predicates guarantee that: // Start + i*Accum = (Ext ix (Trunc iy ( Start + i*Accum ) to ix) to iy) // // // More formally, we want to prove that: // Expr(i+1) = Start + (i+1) * Accum // = (Ext ix (Trunc iy (Expr(i)) to ix) to iy) + Accum // // Given that: // 1) Expr(0) = Start // 2) Expr(1) = Start + Accum // = (Ext ix (Trunc iy (Start) to ix) to iy) + Accum :: from P2 // 3) Induction hypothesis (step i): // Expr(i) = (Ext ix (Trunc iy (Expr(i-1)) to ix) to iy) + Accum // // Proof: // Expr(i+1) = // = Start + (i+1)*Accum // = (Start + i*Accum) + Accum // = Expr(i) + Accum // = (Ext ix (Trunc iy (Expr(i-1)) to ix) to iy) + Accum + Accum // :: from step i // // = (Ext ix (Trunc iy (Start + (i-1)*Accum) to ix) to iy) + Accum + Accum // // = (Ext ix (Trunc iy (Start + (i-1)*Accum) to ix) to iy) // + (Ext ix (Trunc iy (Accum) to ix) to iy) // + Accum :: from P3 // // = (Ext ix (Trunc iy ((Start + (i-1)*Accum) + Accum) to ix) to iy) // + Accum :: from P1: Ext(x)+Ext(y)=>Ext(x+y) // // = (Ext ix (Trunc iy (Start + i*Accum) to ix) to iy) + Accum // = (Ext ix (Trunc iy (Expr(i)) to ix) to iy) + Accum // // By induction, the same applies to all iterations 1<=i(PHISCEV)) { SCEVWrapPredicate::IncrementWrapFlags AddedFlags = Signed ? SCEVWrapPredicate::IncrementNSSW : SCEVWrapPredicate::IncrementNUSW; const SCEVPredicate *AddRecPred = getWrapPredicate(AR, AddedFlags); Predicates.push_back(AddRecPred); } // Create the Equal Predicates P2,P3: // It is possible that the predicates P2 and/or P3 are computable at // compile time due to StartVal and/or Accum being constants. // If either one is, then we can check that now and escape if either P2 // or P3 is false. // Construct the extended SCEV: (Ext ix (Trunc iy (Expr) to ix) to iy) // for each of StartVal and Accum auto getExtendedExpr = [&](const SCEV *Expr, bool CreateSignExtend) -> const SCEV * { assert(isLoopInvariant(Expr, L) && "Expr is expected to be invariant"); const SCEV *TruncatedExpr = getTruncateExpr(Expr, TruncTy); const SCEV *ExtendedExpr = CreateSignExtend ? getSignExtendExpr(TruncatedExpr, Expr->getType()) : getZeroExtendExpr(TruncatedExpr, Expr->getType()); return ExtendedExpr; }; // Given: // ExtendedExpr = (Ext ix (Trunc iy (Expr) to ix) to iy // = getExtendedExpr(Expr) // Determine whether the predicate P: Expr == ExtendedExpr // is known to be false at compile time auto PredIsKnownFalse = [&](const SCEV *Expr, const SCEV *ExtendedExpr) -> bool { return Expr != ExtendedExpr && isKnownPredicate(ICmpInst::ICMP_NE, Expr, ExtendedExpr); }; const SCEV *StartExtended = getExtendedExpr(StartVal, Signed); if (PredIsKnownFalse(StartVal, StartExtended)) { DEBUG(dbgs() << "P2 is compile-time false\n";); return None; } // The Step is always Signed (because the overflow checks are either // NSSW or NUSW) const SCEV *AccumExtended = getExtendedExpr(Accum, /*CreateSignExtend=*/true); if (PredIsKnownFalse(Accum, AccumExtended)) { DEBUG(dbgs() << "P3 is compile-time false\n";); return None; } auto AppendPredicate = [&](const SCEV *Expr, const SCEV *ExtendedExpr) -> void { if (Expr != ExtendedExpr && !isKnownPredicate(ICmpInst::ICMP_EQ, Expr, ExtendedExpr)) { const SCEVPredicate *Pred = getEqualPredicate(Expr, ExtendedExpr); DEBUG (dbgs() << "Added Predicate: " << *Pred); Predicates.push_back(Pred); } }; AppendPredicate(StartVal, StartExtended); AppendPredicate(Accum, AccumExtended); // *** Part3: Predicates are ready. Now go ahead and create the new addrec in // which the casts had been folded away. The caller can rewrite SymbolicPHI // into NewAR if it will also add the runtime overflow checks specified in // Predicates. auto *NewAR = getAddRecExpr(StartVal, Accum, L, SCEV::FlagAnyWrap); std::pair> PredRewrite = std::make_pair(NewAR, Predicates); // Remember the result of the analysis for this SCEV at this locayyytion. PredicatedSCEVRewrites[{SymbolicPHI, L}] = PredRewrite; return PredRewrite; } Optional>> ScalarEvolution::createAddRecFromPHIWithCasts(const SCEVUnknown *SymbolicPHI) { auto *PN = cast(SymbolicPHI->getValue()); const Loop *L = isIntegerLoopHeaderPHI(PN, LI); if (!L) return None; // Check to see if we already analyzed this PHI. auto I = PredicatedSCEVRewrites.find({SymbolicPHI, L}); if (I != PredicatedSCEVRewrites.end()) { std::pair> Rewrite = I->second; // Analysis was done before and failed to create an AddRec: if (Rewrite.first == SymbolicPHI) return None; // Analysis was done before and succeeded to create an AddRec under // a predicate: assert(isa(Rewrite.first) && "Expected an AddRec"); assert(!(Rewrite.second).empty() && "Expected to find Predicates"); return Rewrite; } Optional>> Rewrite = createAddRecFromPHIWithCastsImpl(SymbolicPHI); // Record in the cache that the analysis failed if (!Rewrite) { SmallVector Predicates; PredicatedSCEVRewrites[{SymbolicPHI, L}] = {SymbolicPHI, Predicates}; return None; } return Rewrite; } // FIXME: This utility is currently required because the Rewriter currently // does not rewrite this expression: // {0, +, (sext ix (trunc iy to ix) to iy)} // into {0, +, %step}, // even when the following Equal predicate exists: // "%step == (sext ix (trunc iy to ix) to iy)". bool PredicatedScalarEvolution::areAddRecsEqualWithPreds( const SCEVAddRecExpr *AR1, const SCEVAddRecExpr *AR2) const { if (AR1 == AR2) return true; auto areExprsEqual = [&](const SCEV *Expr1, const SCEV *Expr2) -> bool { if (Expr1 != Expr2 && !Preds.implies(SE.getEqualPredicate(Expr1, Expr2)) && !Preds.implies(SE.getEqualPredicate(Expr2, Expr1))) return false; return true; }; if (!areExprsEqual(AR1->getStart(), AR2->getStart()) || !areExprsEqual(AR1->getStepRecurrence(SE), AR2->getStepRecurrence(SE))) return false; return true; } /// A helper function for createAddRecFromPHI to handle simple cases. /// /// This function tries to find an AddRec expression for the simplest (yet most /// common) cases: PN = PHI(Start, OP(Self, LoopInvariant)). /// If it fails, createAddRecFromPHI will use a more general, but slow, /// technique for finding the AddRec expression. const SCEV *ScalarEvolution::createSimpleAffineAddRec(PHINode *PN, Value *BEValueV, Value *StartValueV) { const Loop *L = LI.getLoopFor(PN->getParent()); assert(L && L->getHeader() == PN->getParent()); assert(BEValueV && StartValueV); auto BO = MatchBinaryOp(BEValueV, DT); if (!BO) return nullptr; if (BO->Opcode != Instruction::Add) return nullptr; const SCEV *Accum = nullptr; if (BO->LHS == PN && L->isLoopInvariant(BO->RHS)) Accum = getSCEV(BO->RHS); else if (BO->RHS == PN && L->isLoopInvariant(BO->LHS)) Accum = getSCEV(BO->LHS); if (!Accum) return nullptr; SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; if (BO->IsNUW) Flags = setFlags(Flags, SCEV::FlagNUW); if (BO->IsNSW) Flags = setFlags(Flags, SCEV::FlagNSW); const SCEV *StartVal = getSCEV(StartValueV); const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags); ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV; // We can add Flags to the post-inc expression only if we // know that it is *undefined behavior* for BEValueV to // overflow. if (auto *BEInst = dyn_cast(BEValueV)) if (isLoopInvariant(Accum, L) && isAddRecNeverPoison(BEInst, L)) (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags); return PHISCEV; } const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) { const Loop *L = LI.getLoopFor(PN->getParent()); if (!L || L->getHeader() != PN->getParent()) return nullptr; // The loop may have multiple entrances or multiple exits; we can analyze // this phi as an addrec if it has a unique entry value and a unique // backedge value. Value *BEValueV = nullptr, *StartValueV = nullptr; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { Value *V = PN->getIncomingValue(i); if (L->contains(PN->getIncomingBlock(i))) { if (!BEValueV) { BEValueV = V; } else if (BEValueV != V) { BEValueV = nullptr; break; } } else if (!StartValueV) { StartValueV = V; } else if (StartValueV != V) { StartValueV = nullptr; break; } } if (!BEValueV || !StartValueV) return nullptr; assert(ValueExprMap.find_as(PN) == ValueExprMap.end() && "PHI node already processed?"); // First, try to find AddRec expression without creating a fictituos symbolic // value for PN. if (auto *S = createSimpleAffineAddRec(PN, BEValueV, StartValueV)) return S; // Handle PHI node value symbolically. const SCEV *SymbolicName = getUnknown(PN); ValueExprMap.insert({SCEVCallbackVH(PN, this), SymbolicName}); // Using this symbolic name for the PHI, analyze the value coming around // the back-edge. const SCEV *BEValue = getSCEV(BEValueV); // NOTE: If BEValue is loop invariant, we know that the PHI node just // has a special value for the first iteration of the loop. // If the value coming around the backedge is an add with the symbolic // value we just inserted, then we found a simple induction variable! if (const SCEVAddExpr *Add = dyn_cast(BEValue)) { // If there is a single occurrence of the symbolic value, replace it // with a recurrence. unsigned FoundIndex = Add->getNumOperands(); for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) if (Add->getOperand(i) == SymbolicName) if (FoundIndex == e) { FoundIndex = i; break; } if (FoundIndex != Add->getNumOperands()) { // Create an add with everything but the specified operand. SmallVector Ops; for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) if (i != FoundIndex) Ops.push_back(SCEVBackedgeConditionFolder::rewrite(Add->getOperand(i), L, *this)); const SCEV *Accum = getAddExpr(Ops); // This is not a valid addrec if the step amount is varying each // loop iteration, but is not itself an addrec in this loop. if (isLoopInvariant(Accum, L) || (isa(Accum) && cast(Accum)->getLoop() == L)) { SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; if (auto BO = MatchBinaryOp(BEValueV, DT)) { if (BO->Opcode == Instruction::Add && BO->LHS == PN) { if (BO->IsNUW) Flags = setFlags(Flags, SCEV::FlagNUW); if (BO->IsNSW) Flags = setFlags(Flags, SCEV::FlagNSW); } } else if (GEPOperator *GEP = dyn_cast(BEValueV)) { // If the increment is an inbounds GEP, then we know the address // space cannot be wrapped around. We cannot make any guarantee // about signed or unsigned overflow because pointers are // unsigned but we may have a negative index from the base // pointer. We can guarantee that no unsigned wrap occurs if the // indices form a positive value. if (GEP->isInBounds() && GEP->getOperand(0) == PN) { Flags = setFlags(Flags, SCEV::FlagNW); const SCEV *Ptr = getSCEV(GEP->getPointerOperand()); if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr))) Flags = setFlags(Flags, SCEV::FlagNUW); } // We cannot transfer nuw and nsw flags from subtraction // operations -- sub nuw X, Y is not the same as add nuw X, -Y // for instance. } const SCEV *StartVal = getSCEV(StartValueV); const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags); // Okay, for the entire analysis of this edge we assumed the PHI // to be symbolic. We now need to go back and purge all of the // entries for the scalars that use the symbolic expression. forgetSymbolicName(PN, SymbolicName); ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV; // We can add Flags to the post-inc expression only if we // know that it is *undefined behavior* for BEValueV to // overflow. if (auto *BEInst = dyn_cast(BEValueV)) if (isLoopInvariant(Accum, L) && isAddRecNeverPoison(BEInst, L)) (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags); return PHISCEV; } } } else { // Otherwise, this could be a loop like this: // i = 0; for (j = 1; ..; ++j) { .... i = j; } // In this case, j = {1,+,1} and BEValue is j. // Because the other in-value of i (0) fits the evolution of BEValue // i really is an addrec evolution. // // We can generalize this saying that i is the shifted value of BEValue // by one iteration: // PHI(f(0), f({1,+,1})) --> f({0,+,1}) const SCEV *Shifted = SCEVShiftRewriter::rewrite(BEValue, L, *this); const SCEV *Start = SCEVInitRewriter::rewrite(Shifted, L, *this); if (Shifted != getCouldNotCompute() && Start != getCouldNotCompute()) { const SCEV *StartVal = getSCEV(StartValueV); if (Start == StartVal) { // Okay, for the entire analysis of this edge we assumed the PHI // to be symbolic. We now need to go back and purge all of the // entries for the scalars that use the symbolic expression. forgetSymbolicName(PN, SymbolicName); ValueExprMap[SCEVCallbackVH(PN, this)] = Shifted; return Shifted; } } } // Remove the temporary PHI node SCEV that has been inserted while intending // to create an AddRecExpr for this PHI node. We can not keep this temporary // as it will prevent later (possibly simpler) SCEV expressions to be added // to the ValueExprMap. eraseValueFromMap(PN); return nullptr; } // Checks if the SCEV S is available at BB. S is considered available at BB // if S can be materialized at BB without introducing a fault. static bool IsAvailableOnEntry(const Loop *L, DominatorTree &DT, const SCEV *S, BasicBlock *BB) { struct CheckAvailable { bool TraversalDone = false; bool Available = true; const Loop *L = nullptr; // The loop BB is in (can be nullptr) BasicBlock *BB = nullptr; DominatorTree &DT; CheckAvailable(const Loop *L, BasicBlock *BB, DominatorTree &DT) : L(L), BB(BB), DT(DT) {} bool setUnavailable() { TraversalDone = true; Available = false; return false; } bool follow(const SCEV *S) { switch (S->getSCEVType()) { case scConstant: case scTruncate: case scZeroExtend: case scSignExtend: case scAddExpr: case scMulExpr: case scUMaxExpr: case scSMaxExpr: // These expressions are available if their operand(s) is/are. return true; case scAddRecExpr: { // We allow add recurrences that are on the loop BB is in, or some // outer loop. This guarantees availability because the value of the // add recurrence at BB is simply the "current" value of the induction // variable. We can relax this in the future; for instance an add // recurrence on a sibling dominating loop is also available at BB. const auto *ARLoop = cast(S)->getLoop(); if (L && (ARLoop == L || ARLoop->contains(L))) return true; return setUnavailable(); } case scUnknown: { // For SCEVUnknown, we check for simple dominance. const auto *SU = cast(S); Value *V = SU->getValue(); if (isa(V)) return false; if (isa(V) && DT.dominates(cast(V), BB)) return false; return setUnavailable(); } case scUDivExpr: case scCouldNotCompute: // We do not try to smart about these at all. return setUnavailable(); } llvm_unreachable("switch should be fully covered!"); } bool isDone() { return TraversalDone; } }; CheckAvailable CA(L, BB, DT); SCEVTraversal ST(CA); ST.visitAll(S); return CA.Available; } // Try to match a control flow sequence that branches out at BI and merges back // at Merge into a "C ? LHS : RHS" select pattern. Return true on a successful // match. static bool BrPHIToSelect(DominatorTree &DT, BranchInst *BI, PHINode *Merge, Value *&C, Value *&LHS, Value *&RHS) { C = BI->getCondition(); BasicBlockEdge LeftEdge(BI->getParent(), BI->getSuccessor(0)); BasicBlockEdge RightEdge(BI->getParent(), BI->getSuccessor(1)); if (!LeftEdge.isSingleEdge()) return false; assert(RightEdge.isSingleEdge() && "Follows from LeftEdge.isSingleEdge()"); Use &LeftUse = Merge->getOperandUse(0); Use &RightUse = Merge->getOperandUse(1); if (DT.dominates(LeftEdge, LeftUse) && DT.dominates(RightEdge, RightUse)) { LHS = LeftUse; RHS = RightUse; return true; } if (DT.dominates(LeftEdge, RightUse) && DT.dominates(RightEdge, LeftUse)) { LHS = RightUse; RHS = LeftUse; return true; } return false; } const SCEV *ScalarEvolution::createNodeFromSelectLikePHI(PHINode *PN) { auto IsReachable = [&](BasicBlock *BB) { return DT.isReachableFromEntry(BB); }; if (PN->getNumIncomingValues() == 2 && all_of(PN->blocks(), IsReachable)) { const Loop *L = LI.getLoopFor(PN->getParent()); // We don't want to break LCSSA, even in a SCEV expression tree. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (LI.getLoopFor(PN->getIncomingBlock(i)) != L) return nullptr; // Try to match // // br %cond, label %left, label %right // left: // br label %merge // right: // br label %merge // merge: // V = phi [ %x, %left ], [ %y, %right ] // // as "select %cond, %x, %y" BasicBlock *IDom = DT[PN->getParent()]->getIDom()->getBlock(); assert(IDom && "At least the entry block should dominate PN"); auto *BI = dyn_cast(IDom->getTerminator()); Value *Cond = nullptr, *LHS = nullptr, *RHS = nullptr; if (BI && BI->isConditional() && BrPHIToSelect(DT, BI, PN, Cond, LHS, RHS) && IsAvailableOnEntry(L, DT, getSCEV(LHS), PN->getParent()) && IsAvailableOnEntry(L, DT, getSCEV(RHS), PN->getParent())) return createNodeForSelectOrPHI(PN, Cond, LHS, RHS); } return nullptr; } const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { if (const SCEV *S = createAddRecFromPHI(PN)) return S; if (const SCEV *S = createNodeFromSelectLikePHI(PN)) return S; // If the PHI has a single incoming value, follow that value, unless the // PHI's incoming blocks are in a different loop, in which case doing so // risks breaking LCSSA form. Instcombine would normally zap these, but // it doesn't have DominatorTree information, so it may miss cases. if (Value *V = SimplifyInstruction(PN, {getDataLayout(), &TLI, &DT, &AC})) if (LI.replacementPreservesLCSSAForm(PN, V)) return getSCEV(V); // If it's not a loop phi, we can't handle it yet. return getUnknown(PN); } const SCEV *ScalarEvolution::createNodeForSelectOrPHI(Instruction *I, Value *Cond, Value *TrueVal, Value *FalseVal) { // Handle "constant" branch or select. This can occur for instance when a // loop pass transforms an inner loop and moves on to process the outer loop. if (auto *CI = dyn_cast(Cond)) return getSCEV(CI->isOne() ? TrueVal : FalseVal); // Try to match some simple smax or umax patterns. auto *ICI = dyn_cast(Cond); if (!ICI) return getUnknown(I); Value *LHS = ICI->getOperand(0); Value *RHS = ICI->getOperand(1); switch (ICI->getPredicate()) { case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: std::swap(LHS, RHS); LLVM_FALLTHROUGH; case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_SGE: // a >s b ? a+x : b+x -> smax(a, b)+x // a >s b ? b+x : a+x -> smin(a, b)+x if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType())) { const SCEV *LS = getNoopOrSignExtend(getSCEV(LHS), I->getType()); const SCEV *RS = getNoopOrSignExtend(getSCEV(RHS), I->getType()); const SCEV *LA = getSCEV(TrueVal); const SCEV *RA = getSCEV(FalseVal); const SCEV *LDiff = getMinusSCEV(LA, LS); const SCEV *RDiff = getMinusSCEV(RA, RS); if (LDiff == RDiff) return getAddExpr(getSMaxExpr(LS, RS), LDiff); LDiff = getMinusSCEV(LA, RS); RDiff = getMinusSCEV(RA, LS); if (LDiff == RDiff) return getAddExpr(getSMinExpr(LS, RS), LDiff); } break; case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: std::swap(LHS, RHS); LLVM_FALLTHROUGH; case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_UGE: // a >u b ? a+x : b+x -> umax(a, b)+x // a >u b ? b+x : a+x -> umin(a, b)+x if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType())) { const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType()); const SCEV *RS = getNoopOrZeroExtend(getSCEV(RHS), I->getType()); const SCEV *LA = getSCEV(TrueVal); const SCEV *RA = getSCEV(FalseVal); const SCEV *LDiff = getMinusSCEV(LA, LS); const SCEV *RDiff = getMinusSCEV(RA, RS); if (LDiff == RDiff) return getAddExpr(getUMaxExpr(LS, RS), LDiff); LDiff = getMinusSCEV(LA, RS); RDiff = getMinusSCEV(RA, LS); if (LDiff == RDiff) return getAddExpr(getUMinExpr(LS, RS), LDiff); } break; case ICmpInst::ICMP_NE: // n != 0 ? n+x : 1+x -> umax(n, 1)+x if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType()) && isa(RHS) && cast(RHS)->isZero()) { const SCEV *One = getOne(I->getType()); const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType()); const SCEV *LA = getSCEV(TrueVal); const SCEV *RA = getSCEV(FalseVal); const SCEV *LDiff = getMinusSCEV(LA, LS); const SCEV *RDiff = getMinusSCEV(RA, One); if (LDiff == RDiff) return getAddExpr(getUMaxExpr(One, LS), LDiff); } break; case ICmpInst::ICMP_EQ: // n == 0 ? 1+x : n+x -> umax(n, 1)+x if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType()) && isa(RHS) && cast(RHS)->isZero()) { const SCEV *One = getOne(I->getType()); const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType()); const SCEV *LA = getSCEV(TrueVal); const SCEV *RA = getSCEV(FalseVal); const SCEV *LDiff = getMinusSCEV(LA, One); const SCEV *RDiff = getMinusSCEV(RA, LS); if (LDiff == RDiff) return getAddExpr(getUMaxExpr(One, LS), LDiff); } break; default: break; } return getUnknown(I); } /// Expand GEP instructions into add and multiply operations. This allows them /// to be analyzed by regular SCEV code. const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { // Don't attempt to analyze GEPs over unsized objects. if (!GEP->getSourceElementType()->isSized()) return getUnknown(GEP); SmallVector IndexExprs; for (auto Index = GEP->idx_begin(); Index != GEP->idx_end(); ++Index) IndexExprs.push_back(getSCEV(*Index)); return getGEPExpr(GEP, IndexExprs); } uint32_t ScalarEvolution::GetMinTrailingZerosImpl(const SCEV *S) { if (const SCEVConstant *C = dyn_cast(S)) return C->getAPInt().countTrailingZeros(); if (const SCEVTruncateExpr *T = dyn_cast(S)) return std::min(GetMinTrailingZeros(T->getOperand()), (uint32_t)getTypeSizeInBits(T->getType())); if (const SCEVZeroExtendExpr *E = dyn_cast(S)) { uint32_t OpRes = GetMinTrailingZeros(E->getOperand()); return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ? getTypeSizeInBits(E->getType()) : OpRes; } if (const SCEVSignExtendExpr *E = dyn_cast(S)) { uint32_t OpRes = GetMinTrailingZeros(E->getOperand()); return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ? getTypeSizeInBits(E->getType()) : OpRes; } if (const SCEVAddExpr *A = dyn_cast(S)) { // The result is the min of all operands results. uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0)); for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i) MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i))); return MinOpRes; } if (const SCEVMulExpr *M = dyn_cast(S)) { // The result is the sum of all operands results. uint32_t SumOpRes = GetMinTrailingZeros(M->getOperand(0)); uint32_t BitWidth = getTypeSizeInBits(M->getType()); for (unsigned i = 1, e = M->getNumOperands(); SumOpRes != BitWidth && i != e; ++i) SumOpRes = std::min(SumOpRes + GetMinTrailingZeros(M->getOperand(i)), BitWidth); return SumOpRes; } if (const SCEVAddRecExpr *A = dyn_cast(S)) { // The result is the min of all operands results. uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0)); for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i) MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i))); return MinOpRes; } if (const SCEVSMaxExpr *M = dyn_cast(S)) { // The result is the min of all operands results. uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0)); for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i) MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i))); return MinOpRes; } if (const SCEVUMaxExpr *M = dyn_cast(S)) { // The result is the min of all operands results. uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0)); for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i) MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i))); return MinOpRes; } if (const SCEVUnknown *U = dyn_cast(S)) { // For a SCEVUnknown, ask ValueTracking. KnownBits Known = computeKnownBits(U->getValue(), getDataLayout(), 0, &AC, nullptr, &DT); return Known.countMinTrailingZeros(); } // SCEVUDivExpr return 0; } uint32_t ScalarEvolution::GetMinTrailingZeros(const SCEV *S) { auto I = MinTrailingZerosCache.find(S); if (I != MinTrailingZerosCache.end()) return I->second; uint32_t Result = GetMinTrailingZerosImpl(S); auto InsertPair = MinTrailingZerosCache.insert({S, Result}); assert(InsertPair.second && "Should insert a new key"); return InsertPair.first->second; } /// Helper method to assign a range to V from metadata present in the IR. static Optional GetRangeFromMetadata(Value *V) { if (Instruction *I = dyn_cast(V)) if (MDNode *MD = I->getMetadata(LLVMContext::MD_range)) return getConstantRangeFromMetadata(*MD); return None; } /// Determine the range for a particular SCEV. If SignHint is /// HINT_RANGE_UNSIGNED (resp. HINT_RANGE_SIGNED) then getRange prefers ranges /// with a "cleaner" unsigned (resp. signed) representation. const ConstantRange & ScalarEvolution::getRangeRef(const SCEV *S, ScalarEvolution::RangeSignHint SignHint) { DenseMap &Cache = SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED ? UnsignedRanges : SignedRanges; // See if we've computed this range already. DenseMap::iterator I = Cache.find(S); if (I != Cache.end()) return I->second; if (const SCEVConstant *C = dyn_cast(S)) return setRange(C, SignHint, ConstantRange(C->getAPInt())); unsigned BitWidth = getTypeSizeInBits(S->getType()); ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true); // If the value has known zeros, the maximum value will have those known zeros // as well. uint32_t TZ = GetMinTrailingZeros(S); if (TZ != 0) { if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) ConservativeResult = ConstantRange(APInt::getMinValue(BitWidth), APInt::getMaxValue(BitWidth).lshr(TZ).shl(TZ) + 1); else ConservativeResult = ConstantRange( APInt::getSignedMinValue(BitWidth), APInt::getSignedMaxValue(BitWidth).ashr(TZ).shl(TZ) + 1); } if (const SCEVAddExpr *Add = dyn_cast(S)) { ConstantRange X = getRangeRef(Add->getOperand(0), SignHint); for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i) X = X.add(getRangeRef(Add->getOperand(i), SignHint)); return setRange(Add, SignHint, ConservativeResult.intersectWith(X)); } if (const SCEVMulExpr *Mul = dyn_cast(S)) { ConstantRange X = getRangeRef(Mul->getOperand(0), SignHint); for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i) X = X.multiply(getRangeRef(Mul->getOperand(i), SignHint)); return setRange(Mul, SignHint, ConservativeResult.intersectWith(X)); } if (const SCEVSMaxExpr *SMax = dyn_cast(S)) { ConstantRange X = getRangeRef(SMax->getOperand(0), SignHint); for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i) X = X.smax(getRangeRef(SMax->getOperand(i), SignHint)); return setRange(SMax, SignHint, ConservativeResult.intersectWith(X)); } if (const SCEVUMaxExpr *UMax = dyn_cast(S)) { ConstantRange X = getRangeRef(UMax->getOperand(0), SignHint); for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i) X = X.umax(getRangeRef(UMax->getOperand(i), SignHint)); return setRange(UMax, SignHint, ConservativeResult.intersectWith(X)); } if (const SCEVUDivExpr *UDiv = dyn_cast(S)) { ConstantRange X = getRangeRef(UDiv->getLHS(), SignHint); ConstantRange Y = getRangeRef(UDiv->getRHS(), SignHint); return setRange(UDiv, SignHint, ConservativeResult.intersectWith(X.udiv(Y))); } if (const SCEVZeroExtendExpr *ZExt = dyn_cast(S)) { ConstantRange X = getRangeRef(ZExt->getOperand(), SignHint); return setRange(ZExt, SignHint, ConservativeResult.intersectWith(X.zeroExtend(BitWidth))); } if (const SCEVSignExtendExpr *SExt = dyn_cast(S)) { ConstantRange X = getRangeRef(SExt->getOperand(), SignHint); return setRange(SExt, SignHint, ConservativeResult.intersectWith(X.signExtend(BitWidth))); } if (const SCEVTruncateExpr *Trunc = dyn_cast(S)) { ConstantRange X = getRangeRef(Trunc->getOperand(), SignHint); return setRange(Trunc, SignHint, ConservativeResult.intersectWith(X.truncate(BitWidth))); } if (const SCEVAddRecExpr *AddRec = dyn_cast(S)) { // If there's no unsigned wrap, the value will never be less than its // initial value. if (AddRec->hasNoUnsignedWrap()) if (const SCEVConstant *C = dyn_cast(AddRec->getStart())) if (!C->getValue()->isZero()) ConservativeResult = ConservativeResult.intersectWith( ConstantRange(C->getAPInt(), APInt(BitWidth, 0))); // If there's no signed wrap, and all the operands have the same sign or // zero, the value won't ever change sign. if (AddRec->hasNoSignedWrap()) { bool AllNonNeg = true; bool AllNonPos = true; for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) { if (!isKnownNonNegative(AddRec->getOperand(i))) AllNonNeg = false; if (!isKnownNonPositive(AddRec->getOperand(i))) AllNonPos = false; } if (AllNonNeg) ConservativeResult = ConservativeResult.intersectWith( ConstantRange(APInt(BitWidth, 0), APInt::getSignedMinValue(BitWidth))); else if (AllNonPos) ConservativeResult = ConservativeResult.intersectWith( ConstantRange(APInt::getSignedMinValue(BitWidth), APInt(BitWidth, 1))); } // TODO: non-affine addrec if (AddRec->isAffine()) { const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop()); if (!isa(MaxBECount) && getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) { auto RangeFromAffine = getRangeForAffineAR( AddRec->getStart(), AddRec->getStepRecurrence(*this), MaxBECount, BitWidth); if (!RangeFromAffine.isFullSet()) ConservativeResult = ConservativeResult.intersectWith(RangeFromAffine); auto RangeFromFactoring = getRangeViaFactoring( AddRec->getStart(), AddRec->getStepRecurrence(*this), MaxBECount, BitWidth); if (!RangeFromFactoring.isFullSet()) ConservativeResult = ConservativeResult.intersectWith(RangeFromFactoring); } } return setRange(AddRec, SignHint, std::move(ConservativeResult)); } if (const SCEVUnknown *U = dyn_cast(S)) { // Check if the IR explicitly contains !range metadata. Optional MDRange = GetRangeFromMetadata(U->getValue()); if (MDRange.hasValue()) ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue()); // Split here to avoid paying the compile-time cost of calling both // computeKnownBits and ComputeNumSignBits. This restriction can be lifted // if needed. const DataLayout &DL = getDataLayout(); if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) { // For a SCEVUnknown, ask ValueTracking. KnownBits Known = computeKnownBits(U->getValue(), DL, 0, &AC, nullptr, &DT); if (Known.One != ~Known.Zero + 1) ConservativeResult = ConservativeResult.intersectWith(ConstantRange(Known.One, ~Known.Zero + 1)); } else { assert(SignHint == ScalarEvolution::HINT_RANGE_SIGNED && "generalize as needed!"); unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, &AC, nullptr, &DT); if (NS > 1) ConservativeResult = ConservativeResult.intersectWith( ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1), APInt::getSignedMaxValue(BitWidth).ashr(NS - 1) + 1)); } return setRange(U, SignHint, std::move(ConservativeResult)); } return setRange(S, SignHint, std::move(ConservativeResult)); } // Given a StartRange, Step and MaxBECount for an expression compute a range of // values that the expression can take. Initially, the expression has a value // from StartRange and then is changed by Step up to MaxBECount times. Signed // argument defines if we treat Step as signed or unsigned. static ConstantRange getRangeForAffineARHelper(APInt Step, const ConstantRange &StartRange, const APInt &MaxBECount, unsigned BitWidth, bool Signed) { // If either Step or MaxBECount is 0, then the expression won't change, and we // just need to return the initial range. if (Step == 0 || MaxBECount == 0) return StartRange; // If we don't know anything about the initial value (i.e. StartRange is // FullRange), then we don't know anything about the final range either. // Return FullRange. if (StartRange.isFullSet()) return ConstantRange(BitWidth, /* isFullSet = */ true); // If Step is signed and negative, then we use its absolute value, but we also // note that we're moving in the opposite direction. bool Descending = Signed && Step.isNegative(); if (Signed) // This is correct even for INT_SMIN. Let's look at i8 to illustrate this: // abs(INT_SMIN) = abs(-128) = abs(0x80) = -0x80 = 0x80 = 128. // This equations hold true due to the well-defined wrap-around behavior of // APInt. Step = Step.abs(); // Check if Offset is more than full span of BitWidth. If it is, the // expression is guaranteed to overflow. if (APInt::getMaxValue(StartRange.getBitWidth()).udiv(Step).ult(MaxBECount)) return ConstantRange(BitWidth, /* isFullSet = */ true); // Offset is by how much the expression can change. Checks above guarantee no // overflow here. APInt Offset = Step * MaxBECount; // Minimum value of the final range will match the minimal value of StartRange // if the expression is increasing and will be decreased by Offset otherwise. // Maximum value of the final range will match the maximal value of StartRange // if the expression is decreasing and will be increased by Offset otherwise. APInt StartLower = StartRange.getLower(); APInt StartUpper = StartRange.getUpper() - 1; APInt MovedBoundary = Descending ? (StartLower - std::move(Offset)) : (StartUpper + std::move(Offset)); // It's possible that the new minimum/maximum value will fall into the initial // range (due to wrap around). This means that the expression can take any // value in this bitwidth, and we have to return full range. if (StartRange.contains(MovedBoundary)) return ConstantRange(BitWidth, /* isFullSet = */ true); APInt NewLower = Descending ? std::move(MovedBoundary) : std::move(StartLower); APInt NewUpper = Descending ? std::move(StartUpper) : std::move(MovedBoundary); NewUpper += 1; // If we end up with full range, return a proper full range. if (NewLower == NewUpper) return ConstantRange(BitWidth, /* isFullSet = */ true); // No overflow detected, return [StartLower, StartUpper + Offset + 1) range. return ConstantRange(std::move(NewLower), std::move(NewUpper)); } ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start, const SCEV *Step, const SCEV *MaxBECount, unsigned BitWidth) { assert(!isa(MaxBECount) && getTypeSizeInBits(MaxBECount->getType()) <= BitWidth && "Precondition!"); MaxBECount = getNoopOrZeroExtend(MaxBECount, Start->getType()); APInt MaxBECountValue = getUnsignedRangeMax(MaxBECount); // First, consider step signed. ConstantRange StartSRange = getSignedRange(Start); ConstantRange StepSRange = getSignedRange(Step); // If Step can be both positive and negative, we need to find ranges for the // maximum absolute step values in both directions and union them. ConstantRange SR = getRangeForAffineARHelper(StepSRange.getSignedMin(), StartSRange, MaxBECountValue, BitWidth, /* Signed = */ true); SR = SR.unionWith(getRangeForAffineARHelper(StepSRange.getSignedMax(), StartSRange, MaxBECountValue, BitWidth, /* Signed = */ true)); // Next, consider step unsigned. ConstantRange UR = getRangeForAffineARHelper( getUnsignedRangeMax(Step), getUnsignedRange(Start), MaxBECountValue, BitWidth, /* Signed = */ false); // Finally, intersect signed and unsigned ranges. return SR.intersectWith(UR); } ConstantRange ScalarEvolution::getRangeViaFactoring(const SCEV *Start, const SCEV *Step, const SCEV *MaxBECount, unsigned BitWidth) { // RangeOf({C?A:B,+,C?P:Q}) == RangeOf(C?{A,+,P}:{B,+,Q}) // == RangeOf({A,+,P}) union RangeOf({B,+,Q}) struct SelectPattern { Value *Condition = nullptr; APInt TrueValue; APInt FalseValue; explicit SelectPattern(ScalarEvolution &SE, unsigned BitWidth, const SCEV *S) { Optional CastOp; APInt Offset(BitWidth, 0); assert(SE.getTypeSizeInBits(S->getType()) == BitWidth && "Should be!"); // Peel off a constant offset: if (auto *SA = dyn_cast(S)) { // In the future we could consider being smarter here and handle // {Start+Step,+,Step} too. if (SA->getNumOperands() != 2 || !isa(SA->getOperand(0))) return; Offset = cast(SA->getOperand(0))->getAPInt(); S = SA->getOperand(1); } // Peel off a cast operation if (auto *SCast = dyn_cast(S)) { CastOp = SCast->getSCEVType(); S = SCast->getOperand(); } using namespace llvm::PatternMatch; auto *SU = dyn_cast(S); const APInt *TrueVal, *FalseVal; if (!SU || !match(SU->getValue(), m_Select(m_Value(Condition), m_APInt(TrueVal), m_APInt(FalseVal)))) { Condition = nullptr; return; } TrueValue = *TrueVal; FalseValue = *FalseVal; // Re-apply the cast we peeled off earlier if (CastOp.hasValue()) switch (*CastOp) { default: llvm_unreachable("Unknown SCEV cast type!"); case scTruncate: TrueValue = TrueValue.trunc(BitWidth); FalseValue = FalseValue.trunc(BitWidth); break; case scZeroExtend: TrueValue = TrueValue.zext(BitWidth); FalseValue = FalseValue.zext(BitWidth); break; case scSignExtend: TrueValue = TrueValue.sext(BitWidth); FalseValue = FalseValue.sext(BitWidth); break; } // Re-apply the constant offset we peeled off earlier TrueValue += Offset; FalseValue += Offset; } bool isRecognized() { return Condition != nullptr; } }; SelectPattern StartPattern(*this, BitWidth, Start); if (!StartPattern.isRecognized()) return ConstantRange(BitWidth, /* isFullSet = */ true); SelectPattern StepPattern(*this, BitWidth, Step); if (!StepPattern.isRecognized()) return ConstantRange(BitWidth, /* isFullSet = */ true); if (StartPattern.Condition != StepPattern.Condition) { // We don't handle this case today; but we could, by considering four // possibilities below instead of two. I'm not sure if there are cases where // that will help over what getRange already does, though. return ConstantRange(BitWidth, /* isFullSet = */ true); } // NB! Calling ScalarEvolution::getConstant is fine, but we should not try to // construct arbitrary general SCEV expressions here. This function is called // from deep in the call stack, and calling getSCEV (on a sext instruction, // say) can end up caching a suboptimal value. // FIXME: without the explicit `this` receiver below, MSVC errors out with // C2352 and C2512 (otherwise it isn't needed). const SCEV *TrueStart = this->getConstant(StartPattern.TrueValue); const SCEV *TrueStep = this->getConstant(StepPattern.TrueValue); const SCEV *FalseStart = this->getConstant(StartPattern.FalseValue); const SCEV *FalseStep = this->getConstant(StepPattern.FalseValue); ConstantRange TrueRange = this->getRangeForAffineAR(TrueStart, TrueStep, MaxBECount, BitWidth); ConstantRange FalseRange = this->getRangeForAffineAR(FalseStart, FalseStep, MaxBECount, BitWidth); return TrueRange.unionWith(FalseRange); } SCEV::NoWrapFlags ScalarEvolution::getNoWrapFlagsFromUB(const Value *V) { if (isa(V)) return SCEV::FlagAnyWrap; const BinaryOperator *BinOp = cast(V); // Return early if there are no flags to propagate to the SCEV. SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; if (BinOp->hasNoUnsignedWrap()) Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW); if (BinOp->hasNoSignedWrap()) Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW); if (Flags == SCEV::FlagAnyWrap) return SCEV::FlagAnyWrap; return isSCEVExprNeverPoison(BinOp) ? Flags : SCEV::FlagAnyWrap; } bool ScalarEvolution::isSCEVExprNeverPoison(const Instruction *I) { // Here we check that I is in the header of the innermost loop containing I, // since we only deal with instructions in the loop header. The actual loop we // need to check later will come from an add recurrence, but getting that // requires computing the SCEV of the operands, which can be expensive. This // check we can do cheaply to rule out some cases early. Loop *InnermostContainingLoop = LI.getLoopFor(I->getParent()); if (InnermostContainingLoop == nullptr || InnermostContainingLoop->getHeader() != I->getParent()) return false; // Only proceed if we can prove that I does not yield poison. if (!programUndefinedIfFullPoison(I)) return false; // At this point we know that if I is executed, then it does not wrap // according to at least one of NSW or NUW. If I is not executed, then we do // not know if the calculation that I represents would wrap. Multiple // instructions can map to the same SCEV. If we apply NSW or NUW from I to // the SCEV, we must guarantee no wrapping for that SCEV also when it is // derived from other instructions that map to the same SCEV. We cannot make // that guarantee for cases where I is not executed. So we need to find the // loop that I is considered in relation to and prove that I is executed for // every iteration of that loop. That implies that the value that I // calculates does not wrap anywhere in the loop, so then we can apply the // flags to the SCEV. // // We check isLoopInvariant to disambiguate in case we are adding recurrences // from different loops, so that we know which loop to prove that I is // executed in. for (unsigned OpIndex = 0; OpIndex < I->getNumOperands(); ++OpIndex) { // I could be an extractvalue from a call to an overflow intrinsic. // TODO: We can do better here in some cases. if (!isSCEVable(I->getOperand(OpIndex)->getType())) return false; const SCEV *Op = getSCEV(I->getOperand(OpIndex)); if (auto *AddRec = dyn_cast(Op)) { bool AllOtherOpsLoopInvariant = true; for (unsigned OtherOpIndex = 0; OtherOpIndex < I->getNumOperands(); ++OtherOpIndex) { if (OtherOpIndex != OpIndex) { const SCEV *OtherOp = getSCEV(I->getOperand(OtherOpIndex)); if (!isLoopInvariant(OtherOp, AddRec->getLoop())) { AllOtherOpsLoopInvariant = false; break; } } } if (AllOtherOpsLoopInvariant && isGuaranteedToExecuteForEveryIteration(I, AddRec->getLoop())) return true; } } return false; } bool ScalarEvolution::isAddRecNeverPoison(const Instruction *I, const Loop *L) { // If we know that \c I can never be poison period, then that's enough. if (isSCEVExprNeverPoison(I)) return true; // For an add recurrence specifically, we assume that infinite loops without // side effects are undefined behavior, and then reason as follows: // // If the add recurrence is poison in any iteration, it is poison on all // future iterations (since incrementing poison yields poison). If the result // of the add recurrence is fed into the loop latch condition and the loop // does not contain any throws or exiting blocks other than the latch, we now // have the ability to "choose" whether the backedge is taken or not (by // choosing a sufficiently evil value for the poison feeding into the branch) // for every iteration including and after the one in which \p I first became // poison. There are two possibilities (let's call the iteration in which \p // I first became poison as K): // // 1. In the set of iterations including and after K, the loop body executes // no side effects. In this case executing the backege an infinte number // of times will yield undefined behavior. // // 2. In the set of iterations including and after K, the loop body executes // at least one side effect. In this case, that specific instance of side // effect is control dependent on poison, which also yields undefined // behavior. auto *ExitingBB = L->getExitingBlock(); auto *LatchBB = L->getLoopLatch(); if (!ExitingBB || !LatchBB || ExitingBB != LatchBB) return false; SmallPtrSet Pushed; SmallVector PoisonStack; // We start by assuming \c I, the post-inc add recurrence, is poison. Only // things that are known to be fully poison under that assumption go on the // PoisonStack. Pushed.insert(I); PoisonStack.push_back(I); bool LatchControlDependentOnPoison = false; while (!PoisonStack.empty() && !LatchControlDependentOnPoison) { const Instruction *Poison = PoisonStack.pop_back_val(); for (auto *PoisonUser : Poison->users()) { if (propagatesFullPoison(cast(PoisonUser))) { if (Pushed.insert(cast(PoisonUser)).second) PoisonStack.push_back(cast(PoisonUser)); } else if (auto *BI = dyn_cast(PoisonUser)) { assert(BI->isConditional() && "Only possibility!"); if (BI->getParent() == LatchBB) { LatchControlDependentOnPoison = true; break; } } } } return LatchControlDependentOnPoison && loopHasNoAbnormalExits(L); } ScalarEvolution::LoopProperties ScalarEvolution::getLoopProperties(const Loop *L) { using LoopProperties = ScalarEvolution::LoopProperties; auto Itr = LoopPropertiesCache.find(L); if (Itr == LoopPropertiesCache.end()) { auto HasSideEffects = [](Instruction *I) { if (auto *SI = dyn_cast(I)) return !SI->isSimple(); return I->mayHaveSideEffects(); }; LoopProperties LP = {/* HasNoAbnormalExits */ true, /*HasNoSideEffects*/ true}; for (auto *BB : L->getBlocks()) for (auto &I : *BB) { if (!isGuaranteedToTransferExecutionToSuccessor(&I)) LP.HasNoAbnormalExits = false; if (HasSideEffects(&I)) LP.HasNoSideEffects = false; if (!LP.HasNoAbnormalExits && !LP.HasNoSideEffects) break; // We're already as pessimistic as we can get. } auto InsertPair = LoopPropertiesCache.insert({L, LP}); assert(InsertPair.second && "We just checked!"); Itr = InsertPair.first; } return Itr->second; } const SCEV *ScalarEvolution::createSCEV(Value *V) { if (!isSCEVable(V->getType())) return getUnknown(V); if (Instruction *I = dyn_cast(V)) { // Don't attempt to analyze instructions in blocks that aren't // reachable. Such instructions don't matter, and they aren't required // to obey basic rules for definitions dominating uses which this // analysis depends on. if (!DT.isReachableFromEntry(I->getParent())) return getUnknown(V); } else if (ConstantInt *CI = dyn_cast(V)) return getConstant(CI); else if (isa(V)) return getZero(V->getType()); else if (GlobalAlias *GA = dyn_cast(V)) return GA->isInterposable() ? getUnknown(V) : getSCEV(GA->getAliasee()); else if (!isa(V)) return getUnknown(V); Operator *U = cast(V); if (auto BO = MatchBinaryOp(U, DT)) { switch (BO->Opcode) { case Instruction::Add: { // The simple thing to do would be to just call getSCEV on both operands // and call getAddExpr with the result. However if we're looking at a // bunch of things all added together, this can be quite inefficient, // because it leads to N-1 getAddExpr calls for N ultimate operands. // Instead, gather up all the operands and make a single getAddExpr call. // LLVM IR canonical form means we need only traverse the left operands. SmallVector AddOps; do { if (BO->Op) { if (auto *OpSCEV = getExistingSCEV(BO->Op)) { AddOps.push_back(OpSCEV); break; } // If a NUW or NSW flag can be applied to the SCEV for this // addition, then compute the SCEV for this addition by itself // with a separate call to getAddExpr. We need to do that // instead of pushing the operands of the addition onto AddOps, // since the flags are only known to apply to this particular // addition - they may not apply to other additions that can be // formed with operands from AddOps. const SCEV *RHS = getSCEV(BO->RHS); SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(BO->Op); if (Flags != SCEV::FlagAnyWrap) { const SCEV *LHS = getSCEV(BO->LHS); if (BO->Opcode == Instruction::Sub) AddOps.push_back(getMinusSCEV(LHS, RHS, Flags)); else AddOps.push_back(getAddExpr(LHS, RHS, Flags)); break; } } if (BO->Opcode == Instruction::Sub) AddOps.push_back(getNegativeSCEV(getSCEV(BO->RHS))); else AddOps.push_back(getSCEV(BO->RHS)); auto NewBO = MatchBinaryOp(BO->LHS, DT); if (!NewBO || (NewBO->Opcode != Instruction::Add && NewBO->Opcode != Instruction::Sub)) { AddOps.push_back(getSCEV(BO->LHS)); break; } BO = NewBO; } while (true); return getAddExpr(AddOps); } case Instruction::Mul: { SmallVector MulOps; do { if (BO->Op) { if (auto *OpSCEV = getExistingSCEV(BO->Op)) { MulOps.push_back(OpSCEV); break; } SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(BO->Op); if (Flags != SCEV::FlagAnyWrap) { MulOps.push_back( getMulExpr(getSCEV(BO->LHS), getSCEV(BO->RHS), Flags)); break; } } MulOps.push_back(getSCEV(BO->RHS)); auto NewBO = MatchBinaryOp(BO->LHS, DT); if (!NewBO || NewBO->Opcode != Instruction::Mul) { MulOps.push_back(getSCEV(BO->LHS)); break; } BO = NewBO; } while (true); return getMulExpr(MulOps); } case Instruction::UDiv: return getUDivExpr(getSCEV(BO->LHS), getSCEV(BO->RHS)); case Instruction::URem: return getURemExpr(getSCEV(BO->LHS), getSCEV(BO->RHS)); case Instruction::Sub: { SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; if (BO->Op) Flags = getNoWrapFlagsFromUB(BO->Op); return getMinusSCEV(getSCEV(BO->LHS), getSCEV(BO->RHS), Flags); } case Instruction::And: // For an expression like x&255 that merely masks off the high bits, // use zext(trunc(x)) as the SCEV expression. if (ConstantInt *CI = dyn_cast(BO->RHS)) { if (CI->isZero()) return getSCEV(BO->RHS); if (CI->isMinusOne()) return getSCEV(BO->LHS); const APInt &A = CI->getValue(); // Instcombine's ShrinkDemandedConstant may strip bits out of // constants, obscuring what would otherwise be a low-bits mask. // Use computeKnownBits to compute what ShrinkDemandedConstant // knew about to reconstruct a low-bits mask value. unsigned LZ = A.countLeadingZeros(); unsigned TZ = A.countTrailingZeros(); unsigned BitWidth = A.getBitWidth(); KnownBits Known(BitWidth); computeKnownBits(BO->LHS, Known, getDataLayout(), 0, &AC, nullptr, &DT); APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ); if ((LZ != 0 || TZ != 0) && !((~A & ~Known.Zero) & EffectiveMask)) { const SCEV *MulCount = getConstant(APInt::getOneBitSet(BitWidth, TZ)); const SCEV *LHS = getSCEV(BO->LHS); const SCEV *ShiftedLHS = nullptr; if (auto *LHSMul = dyn_cast(LHS)) { if (auto *OpC = dyn_cast(LHSMul->getOperand(0))) { // For an expression like (x * 8) & 8, simplify the multiply. unsigned MulZeros = OpC->getAPInt().countTrailingZeros(); unsigned GCD = std::min(MulZeros, TZ); APInt DivAmt = APInt::getOneBitSet(BitWidth, TZ - GCD); SmallVector MulOps; MulOps.push_back(getConstant(OpC->getAPInt().lshr(GCD))); MulOps.append(LHSMul->op_begin() + 1, LHSMul->op_end()); auto *NewMul = getMulExpr(MulOps, LHSMul->getNoWrapFlags()); ShiftedLHS = getUDivExpr(NewMul, getConstant(DivAmt)); } } if (!ShiftedLHS) ShiftedLHS = getUDivExpr(LHS, MulCount); return getMulExpr( getZeroExtendExpr( getTruncateExpr(ShiftedLHS, IntegerType::get(getContext(), BitWidth - LZ - TZ)), BO->LHS->getType()), MulCount); } } break; case Instruction::Or: // If the RHS of the Or is a constant, we may have something like: // X*4+1 which got turned into X*4|1. Handle this as an Add so loop // optimizations will transparently handle this case. // // In order for this transformation to be safe, the LHS must be of the // form X*(2^n) and the Or constant must be less than 2^n. if (ConstantInt *CI = dyn_cast(BO->RHS)) { const SCEV *LHS = getSCEV(BO->LHS); const APInt &CIVal = CI->getValue(); if (GetMinTrailingZeros(LHS) >= (CIVal.getBitWidth() - CIVal.countLeadingZeros())) { // Build a plain add SCEV. const SCEV *S = getAddExpr(LHS, getSCEV(CI)); // If the LHS of the add was an addrec and it has no-wrap flags, // transfer the no-wrap flags, since an or won't introduce a wrap. if (const SCEVAddRecExpr *NewAR = dyn_cast(S)) { const SCEVAddRecExpr *OldAR = cast(LHS); const_cast(NewAR)->setNoWrapFlags( OldAR->getNoWrapFlags()); } return S; } } break; case Instruction::Xor: if (ConstantInt *CI = dyn_cast(BO->RHS)) { // If the RHS of xor is -1, then this is a not operation. if (CI->isMinusOne()) return getNotSCEV(getSCEV(BO->LHS)); // Model xor(and(x, C), C) as and(~x, C), if C is a low-bits mask. // This is a variant of the check for xor with -1, and it handles // the case where instcombine has trimmed non-demanded bits out // of an xor with -1. if (auto *LBO = dyn_cast(BO->LHS)) if (ConstantInt *LCI = dyn_cast(LBO->getOperand(1))) if (LBO->getOpcode() == Instruction::And && LCI->getValue() == CI->getValue()) if (const SCEVZeroExtendExpr *Z = dyn_cast(getSCEV(BO->LHS))) { Type *UTy = BO->LHS->getType(); const SCEV *Z0 = Z->getOperand(); Type *Z0Ty = Z0->getType(); unsigned Z0TySize = getTypeSizeInBits(Z0Ty); // If C is a low-bits mask, the zero extend is serving to // mask off the high bits. Complement the operand and // re-apply the zext. if (CI->getValue().isMask(Z0TySize)) return getZeroExtendExpr(getNotSCEV(Z0), UTy); // If C is a single bit, it may be in the sign-bit position // before the zero-extend. In this case, represent the xor // using an add, which is equivalent, and re-apply the zext. APInt Trunc = CI->getValue().trunc(Z0TySize); if (Trunc.zext(getTypeSizeInBits(UTy)) == CI->getValue() && Trunc.isSignMask()) return getZeroExtendExpr(getAddExpr(Z0, getConstant(Trunc)), UTy); } } break; case Instruction::Shl: // Turn shift left of a constant amount into a multiply. if (ConstantInt *SA = dyn_cast(BO->RHS)) { uint32_t BitWidth = cast(SA->getType())->getBitWidth(); // If the shift count is not less than the bitwidth, the result of // the shift is undefined. Don't try to analyze it, because the // resolution chosen here may differ from the resolution chosen in // other parts of the compiler. if (SA->getValue().uge(BitWidth)) break; // It is currently not resolved how to interpret NSW for left // shift by BitWidth - 1, so we avoid applying flags in that // case. Remove this check (or this comment) once the situation // is resolved. See // http://lists.llvm.org/pipermail/llvm-dev/2015-April/084195.html // and http://reviews.llvm.org/D8890 . auto Flags = SCEV::FlagAnyWrap; if (BO->Op && SA->getValue().ult(BitWidth - 1)) Flags = getNoWrapFlagsFromUB(BO->Op); Constant *X = ConstantInt::get(getContext(), APInt::getOneBitSet(BitWidth, SA->getZExtValue())); return getMulExpr(getSCEV(BO->LHS), getSCEV(X), Flags); } break; case Instruction::AShr: { // AShr X, C, where C is a constant. ConstantInt *CI = dyn_cast(BO->RHS); if (!CI) break; Type *OuterTy = BO->LHS->getType(); uint64_t BitWidth = getTypeSizeInBits(OuterTy); // If the shift count is not less than the bitwidth, the result of // the shift is undefined. Don't try to analyze it, because the // resolution chosen here may differ from the resolution chosen in // other parts of the compiler. if (CI->getValue().uge(BitWidth)) break; if (CI->isZero()) return getSCEV(BO->LHS); // shift by zero --> noop uint64_t AShrAmt = CI->getZExtValue(); Type *TruncTy = IntegerType::get(getContext(), BitWidth - AShrAmt); Operator *L = dyn_cast(BO->LHS); if (L && L->getOpcode() == Instruction::Shl) { // X = Shl A, n // Y = AShr X, m // Both n and m are constant. const SCEV *ShlOp0SCEV = getSCEV(L->getOperand(0)); if (L->getOperand(1) == BO->RHS) // For a two-shift sext-inreg, i.e. n = m, // use sext(trunc(x)) as the SCEV expression. return getSignExtendExpr( getTruncateExpr(ShlOp0SCEV, TruncTy), OuterTy); ConstantInt *ShlAmtCI = dyn_cast(L->getOperand(1)); if (ShlAmtCI && ShlAmtCI->getValue().ult(BitWidth)) { uint64_t ShlAmt = ShlAmtCI->getZExtValue(); if (ShlAmt > AShrAmt) { // When n > m, use sext(mul(trunc(x), 2^(n-m)))) as the SCEV // expression. We already checked that ShlAmt < BitWidth, so // the multiplier, 1 << (ShlAmt - AShrAmt), fits into TruncTy as // ShlAmt - AShrAmt < Amt. APInt Mul = APInt::getOneBitSet(BitWidth - AShrAmt, ShlAmt - AShrAmt); return getSignExtendExpr( getMulExpr(getTruncateExpr(ShlOp0SCEV, TruncTy), getConstant(Mul)), OuterTy); } } } break; } } } switch (U->getOpcode()) { case Instruction::Trunc: return getTruncateExpr(getSCEV(U->getOperand(0)), U->getType()); case Instruction::ZExt: return getZeroExtendExpr(getSCEV(U->getOperand(0)), U->getType()); case Instruction::SExt: if (auto BO = MatchBinaryOp(U->getOperand(0), DT)) { // The NSW flag of a subtract does not always survive the conversion to // A + (-1)*B. By pushing sign extension onto its operands we are much // more likely to preserve NSW and allow later AddRec optimisations. // // NOTE: This is effectively duplicating this logic from getSignExtend: // sext((A + B + ...)) --> (sext(A) + sext(B) + ...) // but by that point the NSW information has potentially been lost. if (BO->Opcode == Instruction::Sub && BO->IsNSW) { Type *Ty = U->getType(); auto *V1 = getSignExtendExpr(getSCEV(BO->LHS), Ty); auto *V2 = getSignExtendExpr(getSCEV(BO->RHS), Ty); return getMinusSCEV(V1, V2, SCEV::FlagNSW); } } return getSignExtendExpr(getSCEV(U->getOperand(0)), U->getType()); case Instruction::BitCast: // BitCasts are no-op casts so we just eliminate the cast. if (isSCEVable(U->getType()) && isSCEVable(U->getOperand(0)->getType())) return getSCEV(U->getOperand(0)); break; // It's tempting to handle inttoptr and ptrtoint as no-ops, however this can // lead to pointer expressions which cannot safely be expanded to GEPs, // because ScalarEvolution doesn't respect the GEP aliasing rules when // simplifying integer expressions. case Instruction::GetElementPtr: return createNodeForGEP(cast(U)); case Instruction::PHI: return createNodeForPHI(cast(U)); case Instruction::Select: // U can also be a select constant expr, which let fall through. Since // createNodeForSelect only works for a condition that is an `ICmpInst`, and // constant expressions cannot have instructions as operands, we'd have // returned getUnknown for a select constant expressions anyway. if (isa(U)) return createNodeForSelectOrPHI(cast(U), U->getOperand(0), U->getOperand(1), U->getOperand(2)); break; case Instruction::Call: case Instruction::Invoke: if (Value *RV = CallSite(U).getReturnedArgOperand()) return getSCEV(RV); break; } return getUnknown(V); } //===----------------------------------------------------------------------===// // Iteration Count Computation Code // static unsigned getConstantTripCount(const SCEVConstant *ExitCount) { if (!ExitCount) return 0; ConstantInt *ExitConst = ExitCount->getValue(); // Guard against huge trip counts. if (ExitConst->getValue().getActiveBits() > 32) return 0; // In case of integer overflow, this returns 0, which is correct. return ((unsigned)ExitConst->getZExtValue()) + 1; } unsigned ScalarEvolution::getSmallConstantTripCount(const Loop *L) { if (BasicBlock *ExitingBB = L->getExitingBlock()) return getSmallConstantTripCount(L, ExitingBB); // No trip count information for multiple exits. return 0; } unsigned ScalarEvolution::getSmallConstantTripCount(const Loop *L, BasicBlock *ExitingBlock) { assert(ExitingBlock && "Must pass a non-null exiting block!"); assert(L->isLoopExiting(ExitingBlock) && "Exiting block must actually branch out of the loop!"); const SCEVConstant *ExitCount = dyn_cast(getExitCount(L, ExitingBlock)); return getConstantTripCount(ExitCount); } unsigned ScalarEvolution::getSmallConstantMaxTripCount(const Loop *L) { const auto *MaxExitCount = dyn_cast(getMaxBackedgeTakenCount(L)); return getConstantTripCount(MaxExitCount); } unsigned ScalarEvolution::getSmallConstantTripMultiple(const Loop *L) { if (BasicBlock *ExitingBB = L->getExitingBlock()) return getSmallConstantTripMultiple(L, ExitingBB); // No trip multiple information for multiple exits. return 0; } /// Returns the largest constant divisor of the trip count of this loop as a /// normal unsigned value, if possible. This means that the actual trip count is /// always a multiple of the returned value (don't forget the trip count could /// very well be zero as well!). /// /// Returns 1 if the trip count is unknown or not guaranteed to be the /// multiple of a constant (which is also the case if the trip count is simply /// constant, use getSmallConstantTripCount for that case), Will also return 1 /// if the trip count is very large (>= 2^32). /// /// As explained in the comments for getSmallConstantTripCount, this assumes /// that control exits the loop via ExitingBlock. unsigned ScalarEvolution::getSmallConstantTripMultiple(const Loop *L, BasicBlock *ExitingBlock) { assert(ExitingBlock && "Must pass a non-null exiting block!"); assert(L->isLoopExiting(ExitingBlock) && "Exiting block must actually branch out of the loop!"); const SCEV *ExitCount = getExitCount(L, ExitingBlock); if (ExitCount == getCouldNotCompute()) return 1; // Get the trip count from the BE count by adding 1. const SCEV *TCExpr = getAddExpr(ExitCount, getOne(ExitCount->getType())); const SCEVConstant *TC = dyn_cast(TCExpr); if (!TC) // Attempt to factor more general cases. Returns the greatest power of // two divisor. If overflow happens, the trip count expression is still // divisible by the greatest power of 2 divisor returned. return 1U << std::min((uint32_t)31, GetMinTrailingZeros(TCExpr)); ConstantInt *Result = TC->getValue(); // Guard against huge trip counts (this requires checking // for zero to handle the case where the trip count == -1 and the // addition wraps). if (!Result || Result->getValue().getActiveBits() > 32 || Result->getValue().getActiveBits() == 0) return 1; return (unsigned)Result->getZExtValue(); } /// Get the expression for the number of loop iterations for which this loop is /// guaranteed not to exit via ExitingBlock. Otherwise return /// SCEVCouldNotCompute. const SCEV *ScalarEvolution::getExitCount(const Loop *L, BasicBlock *ExitingBlock) { return getBackedgeTakenInfo(L).getExact(ExitingBlock, this); } const SCEV * ScalarEvolution::getPredicatedBackedgeTakenCount(const Loop *L, SCEVUnionPredicate &Preds) { return getPredicatedBackedgeTakenInfo(L).getExact(this, &Preds); } const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L) { return getBackedgeTakenInfo(L).getExact(this); } /// Similar to getBackedgeTakenCount, except return the least SCEV value that is /// known never to be less than the actual backedge taken count. const SCEV *ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) { return getBackedgeTakenInfo(L).getMax(this); } bool ScalarEvolution::isBackedgeTakenCountMaxOrZero(const Loop *L) { return getBackedgeTakenInfo(L).isMaxOrZero(this); } /// Push PHI nodes in the header of the given loop onto the given Worklist. static void PushLoopPHIs(const Loop *L, SmallVectorImpl &Worklist) { BasicBlock *Header = L->getHeader(); // Push all Loop-header PHIs onto the Worklist stack. for (PHINode &PN : Header->phis()) Worklist.push_back(&PN); } const ScalarEvolution::BackedgeTakenInfo & ScalarEvolution::getPredicatedBackedgeTakenInfo(const Loop *L) { auto &BTI = getBackedgeTakenInfo(L); if (BTI.hasFullInfo()) return BTI; auto Pair = PredicatedBackedgeTakenCounts.insert({L, BackedgeTakenInfo()}); if (!Pair.second) return Pair.first->second; BackedgeTakenInfo Result = computeBackedgeTakenCount(L, /*AllowPredicates=*/true); return PredicatedBackedgeTakenCounts.find(L)->second = std::move(Result); } const ScalarEvolution::BackedgeTakenInfo & ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { // Initially insert an invalid entry for this loop. If the insertion // succeeds, proceed to actually compute a backedge-taken count and // update the value. The temporary CouldNotCompute value tells SCEV // code elsewhere that it shouldn't attempt to request a new // backedge-taken count, which could result in infinite recursion. std::pair::iterator, bool> Pair = BackedgeTakenCounts.insert({L, BackedgeTakenInfo()}); if (!Pair.second) return Pair.first->second; // computeBackedgeTakenCount may allocate memory for its result. Inserting it // into the BackedgeTakenCounts map transfers ownership. Otherwise, the result // must be cleared in this scope. BackedgeTakenInfo Result = computeBackedgeTakenCount(L); if (Result.getExact(this) != getCouldNotCompute()) { assert(isLoopInvariant(Result.getExact(this), L) && isLoopInvariant(Result.getMax(this), L) && "Computed backedge-taken count isn't loop invariant for loop!"); ++NumTripCountsComputed; } else if (Result.getMax(this) == getCouldNotCompute() && isa(L->getHeader()->begin())) { // Only count loops that have phi nodes as not being computable. ++NumTripCountsNotComputed; } // Now that we know more about the trip count for this loop, forget any // existing SCEV values for PHI nodes in this loop since they are only // conservative estimates made without the benefit of trip count // information. This is similar to the code in forgetLoop, except that // it handles SCEVUnknown PHI nodes specially. if (Result.hasAnyInfo()) { SmallVector Worklist; PushLoopPHIs(L, Worklist); SmallPtrSet Discovered; while (!Worklist.empty()) { Instruction *I = Worklist.pop_back_val(); ValueExprMapType::iterator It = ValueExprMap.find_as(static_cast(I)); if (It != ValueExprMap.end()) { const SCEV *Old = It->second; // SCEVUnknown for a PHI either means that it has an unrecognized // structure, or it's a PHI that's in the progress of being computed // by createNodeForPHI. In the former case, additional loop trip // count information isn't going to change anything. In the later // case, createNodeForPHI will perform the necessary updates on its // own when it gets to that point. if (!isa(I) || !isa(Old)) { eraseValueFromMap(It->first); forgetMemoizedResults(Old); } if (PHINode *PN = dyn_cast(I)) ConstantEvolutionLoopExitValue.erase(PN); } // Since we don't need to invalidate anything for correctness and we're // only invalidating to make SCEV's results more precise, we get to stop // early to avoid invalidating too much. This is especially important in // cases like: // // %v = f(pn0, pn1) // pn0 and pn1 used through some other phi node // loop0: // %pn0 = phi // ... // loop1: // %pn1 = phi // ... // // where both loop0 and loop1's backedge taken count uses the SCEV // expression for %v. If we don't have the early stop below then in cases // like the above, getBackedgeTakenInfo(loop1) will clear out the trip // count for loop0 and getBackedgeTakenInfo(loop0) will clear out the trip // count for loop1, effectively nullifying SCEV's trip count cache. for (auto *U : I->users()) if (auto *I = dyn_cast(U)) { auto *LoopForUser = LI.getLoopFor(I->getParent()); if (LoopForUser && L->contains(LoopForUser) && Discovered.insert(I).second) Worklist.push_back(I); } } } // Re-lookup the insert position, since the call to // computeBackedgeTakenCount above could result in a // recusive call to getBackedgeTakenInfo (on a different // loop), which would invalidate the iterator computed // earlier. return BackedgeTakenCounts.find(L)->second = std::move(Result); } void ScalarEvolution::forgetLoop(const Loop *L) { // Drop any stored trip count value. auto RemoveLoopFromBackedgeMap = [](DenseMap &Map, const Loop *L) { auto BTCPos = Map.find(L); if (BTCPos != Map.end()) { BTCPos->second.clear(); Map.erase(BTCPos); } }; SmallVector LoopWorklist(1, L); SmallVector Worklist; SmallPtrSet Visited; // Iterate over all the loops and sub-loops to drop SCEV information. while (!LoopWorklist.empty()) { auto *CurrL = LoopWorklist.pop_back_val(); RemoveLoopFromBackedgeMap(BackedgeTakenCounts, CurrL); RemoveLoopFromBackedgeMap(PredicatedBackedgeTakenCounts, CurrL); // Drop information about predicated SCEV rewrites for this loop. for (auto I = PredicatedSCEVRewrites.begin(); I != PredicatedSCEVRewrites.end();) { std::pair Entry = I->first; if (Entry.second == CurrL) PredicatedSCEVRewrites.erase(I++); else ++I; } auto LoopUsersItr = LoopUsers.find(CurrL); if (LoopUsersItr != LoopUsers.end()) { for (auto *S : LoopUsersItr->second) forgetMemoizedResults(S); LoopUsers.erase(LoopUsersItr); } // Drop information about expressions based on loop-header PHIs. PushLoopPHIs(CurrL, Worklist); while (!Worklist.empty()) { Instruction *I = Worklist.pop_back_val(); if (!Visited.insert(I).second) continue; ValueExprMapType::iterator It = ValueExprMap.find_as(static_cast(I)); if (It != ValueExprMap.end()) { eraseValueFromMap(It->first); forgetMemoizedResults(It->second); if (PHINode *PN = dyn_cast(I)) ConstantEvolutionLoopExitValue.erase(PN); } PushDefUseChildren(I, Worklist); } LoopPropertiesCache.erase(CurrL); // Forget all contained loops too, to avoid dangling entries in the // ValuesAtScopes map. LoopWorklist.append(CurrL->begin(), CurrL->end()); } } void ScalarEvolution::forgetValue(Value *V) { Instruction *I = dyn_cast(V); if (!I) return; // Drop information about expressions based on loop-header PHIs. SmallVector Worklist; Worklist.push_back(I); SmallPtrSet Visited; while (!Worklist.empty()) { I = Worklist.pop_back_val(); if (!Visited.insert(I).second) continue; ValueExprMapType::iterator It = ValueExprMap.find_as(static_cast(I)); if (It != ValueExprMap.end()) { eraseValueFromMap(It->first); forgetMemoizedResults(It->second); if (PHINode *PN = dyn_cast(I)) ConstantEvolutionLoopExitValue.erase(PN); } PushDefUseChildren(I, Worklist); } } /// Get the exact loop backedge taken count considering all loop exits. A /// computable result can only be returned for loops with a single exit. /// Returning the minimum taken count among all exits is incorrect because one /// of the loop's exit limit's may have been skipped. howFarToZero assumes that /// the limit of each loop test is never skipped. This is a valid assumption as /// long as the loop exits via that test. For precise results, it is the /// caller's responsibility to specify the relevant loop exit using /// getExact(ExitingBlock, SE). const SCEV * ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE, SCEVUnionPredicate *Preds) const { // If any exits were not computable, the loop is not computable. if (!isComplete() || ExitNotTaken.empty()) return SE->getCouldNotCompute(); const SCEV *BECount = nullptr; for (auto &ENT : ExitNotTaken) { assert(ENT.ExactNotTaken != SE->getCouldNotCompute() && "bad exit SCEV"); if (!BECount) BECount = ENT.ExactNotTaken; else if (BECount != ENT.ExactNotTaken) return SE->getCouldNotCompute(); if (Preds && !ENT.hasAlwaysTruePredicate()) Preds->add(ENT.Predicate.get()); assert((Preds || ENT.hasAlwaysTruePredicate()) && "Predicate should be always true!"); } assert(BECount && "Invalid not taken count for loop exit"); return BECount; } /// Get the exact not taken count for this loop exit. const SCEV * ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock, ScalarEvolution *SE) const { for (auto &ENT : ExitNotTaken) if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePredicate()) return ENT.ExactNotTaken; return SE->getCouldNotCompute(); } /// getMax - Get the max backedge taken count for the loop. const SCEV * ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const { auto PredicateNotAlwaysTrue = [](const ExitNotTakenInfo &ENT) { return !ENT.hasAlwaysTruePredicate(); }; if (any_of(ExitNotTaken, PredicateNotAlwaysTrue) || !getMax()) return SE->getCouldNotCompute(); assert((isa(getMax()) || isa(getMax())) && "No point in having a non-constant max backedge taken count!"); return getMax(); } bool ScalarEvolution::BackedgeTakenInfo::isMaxOrZero(ScalarEvolution *SE) const { auto PredicateNotAlwaysTrue = [](const ExitNotTakenInfo &ENT) { return !ENT.hasAlwaysTruePredicate(); }; return MaxOrZero && !any_of(ExitNotTaken, PredicateNotAlwaysTrue); } bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S, ScalarEvolution *SE) const { if (getMax() && getMax() != SE->getCouldNotCompute() && SE->hasOperand(getMax(), S)) return true; for (auto &ENT : ExitNotTaken) if (ENT.ExactNotTaken != SE->getCouldNotCompute() && SE->hasOperand(ENT.ExactNotTaken, S)) return true; return false; } ScalarEvolution::ExitLimit::ExitLimit(const SCEV *E) : ExactNotTaken(E), MaxNotTaken(E) { assert((isa(MaxNotTaken) || isa(MaxNotTaken)) && "No point in having a non-constant max backedge taken count!"); } ScalarEvolution::ExitLimit::ExitLimit( const SCEV *E, const SCEV *M, bool MaxOrZero, ArrayRef *> PredSetList) : ExactNotTaken(E), MaxNotTaken(M), MaxOrZero(MaxOrZero) { assert((isa(ExactNotTaken) || !isa(MaxNotTaken)) && "Exact is not allowed to be less precise than Max"); assert((isa(MaxNotTaken) || isa(MaxNotTaken)) && "No point in having a non-constant max backedge taken count!"); for (auto *PredSet : PredSetList) for (auto *P : *PredSet) addPredicate(P); } ScalarEvolution::ExitLimit::ExitLimit( const SCEV *E, const SCEV *M, bool MaxOrZero, const SmallPtrSetImpl &PredSet) : ExitLimit(E, M, MaxOrZero, {&PredSet}) { assert((isa(MaxNotTaken) || isa(MaxNotTaken)) && "No point in having a non-constant max backedge taken count!"); } ScalarEvolution::ExitLimit::ExitLimit(const SCEV *E, const SCEV *M, bool MaxOrZero) : ExitLimit(E, M, MaxOrZero, None) { assert((isa(MaxNotTaken) || isa(MaxNotTaken)) && "No point in having a non-constant max backedge taken count!"); } /// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each /// computable exit into a persistent ExitNotTakenInfo array. ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo( SmallVectorImpl &&ExitCounts, bool Complete, const SCEV *MaxCount, bool MaxOrZero) : MaxAndComplete(MaxCount, Complete), MaxOrZero(MaxOrZero) { using EdgeExitInfo = ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo; ExitNotTaken.reserve(ExitCounts.size()); std::transform( ExitCounts.begin(), ExitCounts.end(), std::back_inserter(ExitNotTaken), [&](const EdgeExitInfo &EEI) { BasicBlock *ExitBB = EEI.first; const ExitLimit &EL = EEI.second; if (EL.Predicates.empty()) return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, nullptr); std::unique_ptr Predicate(new SCEVUnionPredicate); for (auto *Pred : EL.Predicates) Predicate->add(Pred); return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, std::move(Predicate)); }); assert((isa(MaxCount) || isa(MaxCount)) && "No point in having a non-constant max backedge taken count!"); } /// Invalidate this result and free the ExitNotTakenInfo array. void ScalarEvolution::BackedgeTakenInfo::clear() { ExitNotTaken.clear(); } /// Compute the number of times the backedge of the specified loop will execute. ScalarEvolution::BackedgeTakenInfo ScalarEvolution::computeBackedgeTakenCount(const Loop *L, bool AllowPredicates) { SmallVector ExitingBlocks; L->getExitingBlocks(ExitingBlocks); using EdgeExitInfo = ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo; SmallVector ExitCounts; bool CouldComputeBECount = true; BasicBlock *Latch = L->getLoopLatch(); // may be NULL. const SCEV *MustExitMaxBECount = nullptr; const SCEV *MayExitMaxBECount = nullptr; bool MustExitMaxOrZero = false; // Compute the ExitLimit for each loop exit. Use this to populate ExitCounts // and compute maxBECount. // Do a union of all the predicates here. for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { BasicBlock *ExitBB = ExitingBlocks[i]; ExitLimit EL = computeExitLimit(L, ExitBB, AllowPredicates); assert((AllowPredicates || EL.Predicates.empty()) && "Predicated exit limit when predicates are not allowed!"); // 1. For each exit that can be computed, add an entry to ExitCounts. // CouldComputeBECount is true only if all exits can be computed. if (EL.ExactNotTaken == getCouldNotCompute()) // We couldn't compute an exact value for this exit, so // we won't be able to compute an exact value for the loop. CouldComputeBECount = false; else ExitCounts.emplace_back(ExitBB, EL); // 2. Derive the loop's MaxBECount from each exit's max number of // non-exiting iterations. Partition the loop exits into two kinds: // LoopMustExits and LoopMayExits. // // If the exit dominates the loop latch, it is a LoopMustExit otherwise it // is a LoopMayExit. If any computable LoopMustExit is found, then // MaxBECount is the minimum EL.MaxNotTaken of computable // LoopMustExits. Otherwise, MaxBECount is conservatively the maximum // EL.MaxNotTaken, where CouldNotCompute is considered greater than any // computable EL.MaxNotTaken. if (EL.MaxNotTaken != getCouldNotCompute() && Latch && DT.dominates(ExitBB, Latch)) { if (!MustExitMaxBECount) { MustExitMaxBECount = EL.MaxNotTaken; MustExitMaxOrZero = EL.MaxOrZero; } else { MustExitMaxBECount = getUMinFromMismatchedTypes(MustExitMaxBECount, EL.MaxNotTaken); } } else if (MayExitMaxBECount != getCouldNotCompute()) { if (!MayExitMaxBECount || EL.MaxNotTaken == getCouldNotCompute()) MayExitMaxBECount = EL.MaxNotTaken; else { MayExitMaxBECount = getUMaxFromMismatchedTypes(MayExitMaxBECount, EL.MaxNotTaken); } } } const SCEV *MaxBECount = MustExitMaxBECount ? MustExitMaxBECount : (MayExitMaxBECount ? MayExitMaxBECount : getCouldNotCompute()); // The loop backedge will be taken the maximum or zero times if there's // a single exit that must be taken the maximum or zero times. bool MaxOrZero = (MustExitMaxOrZero && ExitingBlocks.size() == 1); return BackedgeTakenInfo(std::move(ExitCounts), CouldComputeBECount, MaxBECount, MaxOrZero); } ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock, bool AllowPredicates) { // Okay, we've chosen an exiting block. See what condition causes us to exit // at this block and remember the exit block and whether all other targets // lead to the loop header. bool MustExecuteLoopHeader = true; BasicBlock *Exit = nullptr; for (auto *SBB : successors(ExitingBlock)) if (!L->contains(SBB)) { if (Exit) // Multiple exit successors. return getCouldNotCompute(); Exit = SBB; } else if (SBB != L->getHeader()) { MustExecuteLoopHeader = false; } // At this point, we know we have a conditional branch that determines whether // the loop is exited. However, we don't know if the branch is executed each // time through the loop. If not, then the execution count of the branch will // not be equal to the trip count of the loop. // // Currently we check for this by checking to see if the Exit branch goes to // the loop header. If so, we know it will always execute the same number of // times as the loop. We also handle the case where the exit block *is* the // loop header. This is common for un-rotated loops. // // If both of those tests fail, walk up the unique predecessor chain to the // header, stopping if there is an edge that doesn't exit the loop. If the // header is reached, the execution count of the branch will be equal to the // trip count of the loop. // // More extensive analysis could be done to handle more cases here. // if (!MustExecuteLoopHeader && ExitingBlock != L->getHeader()) { // The simple checks failed, try climbing the unique predecessor chain // up to the header. bool Ok = false; for (BasicBlock *BB = ExitingBlock; BB; ) { BasicBlock *Pred = BB->getUniquePredecessor(); if (!Pred) return getCouldNotCompute(); TerminatorInst *PredTerm = Pred->getTerminator(); for (const BasicBlock *PredSucc : PredTerm->successors()) { if (PredSucc == BB) continue; // If the predecessor has a successor that isn't BB and isn't // outside the loop, assume the worst. if (L->contains(PredSucc)) return getCouldNotCompute(); } if (Pred == L->getHeader()) { Ok = true; break; } BB = Pred; } if (!Ok) return getCouldNotCompute(); } bool IsOnlyExit = (L->getExitingBlock() != nullptr); TerminatorInst *Term = ExitingBlock->getTerminator(); if (BranchInst *BI = dyn_cast(Term)) { assert(BI->isConditional() && "If unconditional, it can't be in loop!"); // Proceed to the next level to examine the exit condition expression. return computeExitLimitFromCond( L, BI->getCondition(), BI->getSuccessor(0), BI->getSuccessor(1), /*ControlsExit=*/IsOnlyExit, AllowPredicates); } if (SwitchInst *SI = dyn_cast(Term)) return computeExitLimitFromSingleExitSwitch(L, SI, Exit, /*ControlsExit=*/IsOnlyExit); return getCouldNotCompute(); } ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCond( const Loop *L, Value *ExitCond, BasicBlock *TBB, BasicBlock *FBB, bool ControlsExit, bool AllowPredicates) { ScalarEvolution::ExitLimitCacheTy Cache(L, TBB, FBB, AllowPredicates); return computeExitLimitFromCondCached(Cache, L, ExitCond, TBB, FBB, ControlsExit, AllowPredicates); } Optional ScalarEvolution::ExitLimitCache::find(const Loop *L, Value *ExitCond, BasicBlock *TBB, BasicBlock *FBB, bool ControlsExit, bool AllowPredicates) { (void)this->L; (void)this->TBB; (void)this->FBB; (void)this->AllowPredicates; assert(this->L == L && this->TBB == TBB && this->FBB == FBB && this->AllowPredicates == AllowPredicates && "Variance in assumed invariant key components!"); auto Itr = TripCountMap.find({ExitCond, ControlsExit}); if (Itr == TripCountMap.end()) return None; return Itr->second; } void ScalarEvolution::ExitLimitCache::insert(const Loop *L, Value *ExitCond, BasicBlock *TBB, BasicBlock *FBB, bool ControlsExit, bool AllowPredicates, const ExitLimit &EL) { assert(this->L == L && this->TBB == TBB && this->FBB == FBB && this->AllowPredicates == AllowPredicates && "Variance in assumed invariant key components!"); auto InsertResult = TripCountMap.insert({{ExitCond, ControlsExit}, EL}); assert(InsertResult.second && "Expected successful insertion!"); (void)InsertResult; } ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondCached( ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, BasicBlock *TBB, BasicBlock *FBB, bool ControlsExit, bool AllowPredicates) { if (auto MaybeEL = Cache.find(L, ExitCond, TBB, FBB, ControlsExit, AllowPredicates)) return *MaybeEL; ExitLimit EL = computeExitLimitFromCondImpl(Cache, L, ExitCond, TBB, FBB, ControlsExit, AllowPredicates); Cache.insert(L, ExitCond, TBB, FBB, ControlsExit, AllowPredicates, EL); return EL; } ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl( ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, BasicBlock *TBB, BasicBlock *FBB, bool ControlsExit, bool AllowPredicates) { // Check if the controlling expression for this loop is an And or Or. if (BinaryOperator *BO = dyn_cast(ExitCond)) { if (BO->getOpcode() == Instruction::And) { // Recurse on the operands of the and. bool EitherMayExit = L->contains(TBB); ExitLimit EL0 = computeExitLimitFromCondCached( Cache, L, BO->getOperand(0), TBB, FBB, ControlsExit && !EitherMayExit, AllowPredicates); ExitLimit EL1 = computeExitLimitFromCondCached( Cache, L, BO->getOperand(1), TBB, FBB, ControlsExit && !EitherMayExit, AllowPredicates); const SCEV *BECount = getCouldNotCompute(); const SCEV *MaxBECount = getCouldNotCompute(); if (EitherMayExit) { // Both conditions must be true for the loop to continue executing. // Choose the less conservative count. if (EL0.ExactNotTaken == getCouldNotCompute() || EL1.ExactNotTaken == getCouldNotCompute()) BECount = getCouldNotCompute(); else BECount = getUMinFromMismatchedTypes(EL0.ExactNotTaken, EL1.ExactNotTaken); if (EL0.MaxNotTaken == getCouldNotCompute()) MaxBECount = EL1.MaxNotTaken; else if (EL1.MaxNotTaken == getCouldNotCompute()) MaxBECount = EL0.MaxNotTaken; else MaxBECount = getUMinFromMismatchedTypes(EL0.MaxNotTaken, EL1.MaxNotTaken); } else { // Both conditions must be true at the same time for the loop to exit. // For now, be conservative. assert(L->contains(FBB) && "Loop block has no successor in loop!"); if (EL0.MaxNotTaken == EL1.MaxNotTaken) MaxBECount = EL0.MaxNotTaken; if (EL0.ExactNotTaken == EL1.ExactNotTaken) BECount = EL0.ExactNotTaken; } // There are cases (e.g. PR26207) where computeExitLimitFromCond is able // to be more aggressive when computing BECount than when computing // MaxBECount. In these cases it is possible for EL0.ExactNotTaken and // EL1.ExactNotTaken to match, but for EL0.MaxNotTaken and EL1.MaxNotTaken // to not. if (isa(MaxBECount) && !isa(BECount)) MaxBECount = getConstant(getUnsignedRangeMax(BECount)); return ExitLimit(BECount, MaxBECount, false, {&EL0.Predicates, &EL1.Predicates}); } if (BO->getOpcode() == Instruction::Or) { // Recurse on the operands of the or. bool EitherMayExit = L->contains(FBB); ExitLimit EL0 = computeExitLimitFromCondCached( Cache, L, BO->getOperand(0), TBB, FBB, ControlsExit && !EitherMayExit, AllowPredicates); ExitLimit EL1 = computeExitLimitFromCondCached( Cache, L, BO->getOperand(1), TBB, FBB, ControlsExit && !EitherMayExit, AllowPredicates); const SCEV *BECount = getCouldNotCompute(); const SCEV *MaxBECount = getCouldNotCompute(); if (EitherMayExit) { // Both conditions must be false for the loop to continue executing. // Choose the less conservative count. if (EL0.ExactNotTaken == getCouldNotCompute() || EL1.ExactNotTaken == getCouldNotCompute()) BECount = getCouldNotCompute(); else BECount = getUMinFromMismatchedTypes(EL0.ExactNotTaken, EL1.ExactNotTaken); if (EL0.MaxNotTaken == getCouldNotCompute()) MaxBECount = EL1.MaxNotTaken; else if (EL1.MaxNotTaken == getCouldNotCompute()) MaxBECount = EL0.MaxNotTaken; else MaxBECount = getUMinFromMismatchedTypes(EL0.MaxNotTaken, EL1.MaxNotTaken); } else { // Both conditions must be false at the same time for the loop to exit. // For now, be conservative. assert(L->contains(TBB) && "Loop block has no successor in loop!"); if (EL0.MaxNotTaken == EL1.MaxNotTaken) MaxBECount = EL0.MaxNotTaken; if (EL0.ExactNotTaken == EL1.ExactNotTaken) BECount = EL0.ExactNotTaken; } return ExitLimit(BECount, MaxBECount, false, {&EL0.Predicates, &EL1.Predicates}); } } // With an icmp, it may be feasible to compute an exact backedge-taken count. // Proceed to the next level to examine the icmp. if (ICmpInst *ExitCondICmp = dyn_cast(ExitCond)) { ExitLimit EL = computeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit); if (EL.hasFullInfo() || !AllowPredicates) return EL; // Try again, but use SCEV predicates this time. return computeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit, /*AllowPredicates=*/true); } // Check for a constant condition. These are normally stripped out by // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to // preserve the CFG and is temporarily leaving constant conditions // in place. if (ConstantInt *CI = dyn_cast(ExitCond)) { if (L->contains(FBB) == !CI->getZExtValue()) // The backedge is always taken. return getCouldNotCompute(); else // The backedge is never taken. return getZero(CI->getType()); } // If it's not an integer or pointer comparison then compute it the hard way. return computeExitCountExhaustively(L, ExitCond, !L->contains(TBB)); } ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromICmp(const Loop *L, ICmpInst *ExitCond, BasicBlock *TBB, BasicBlock *FBB, bool ControlsExit, bool AllowPredicates) { // If the condition was exit on true, convert the condition to exit on false ICmpInst::Predicate Pred; if (!L->contains(FBB)) Pred = ExitCond->getPredicate(); else Pred = ExitCond->getInversePredicate(); const ICmpInst::Predicate OriginalPred = Pred; // Handle common loops like: for (X = "string"; *X; ++X) if (LoadInst *LI = dyn_cast(ExitCond->getOperand(0))) if (Constant *RHS = dyn_cast(ExitCond->getOperand(1))) { ExitLimit ItCnt = computeLoadConstantCompareExitLimit(LI, RHS, L, Pred); if (ItCnt.hasAnyInfo()) return ItCnt; } const SCEV *LHS = getSCEV(ExitCond->getOperand(0)); const SCEV *RHS = getSCEV(ExitCond->getOperand(1)); // Try to evaluate any dependencies out of the loop. LHS = getSCEVAtScope(LHS, L); RHS = getSCEVAtScope(RHS, L); // At this point, we would like to compute how many iterations of the // loop the predicate will return true for these inputs. if (isLoopInvariant(LHS, L) && !isLoopInvariant(RHS, L)) { // If there is a loop-invariant, force it into the RHS. std::swap(LHS, RHS); Pred = ICmpInst::getSwappedPredicate(Pred); } // Simplify the operands before analyzing them. (void)SimplifyICmpOperands(Pred, LHS, RHS); // If we have a comparison of a chrec against a constant, try to use value // ranges to answer this query. if (const SCEVConstant *RHSC = dyn_cast(RHS)) if (const SCEVAddRecExpr *AddRec = dyn_cast(LHS)) if (AddRec->getLoop() == L) { // Form the constant range. ConstantRange CompRange = ConstantRange::makeExactICmpRegion(Pred, RHSC->getAPInt()); const SCEV *Ret = AddRec->getNumIterationsInRange(CompRange, *this); if (!isa(Ret)) return Ret; } switch (Pred) { case ICmpInst::ICMP_NE: { // while (X != Y) // Convert to: while (X-Y != 0) ExitLimit EL = howFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit, AllowPredicates); if (EL.hasAnyInfo()) return EL; break; } case ICmpInst::ICMP_EQ: { // while (X == Y) // Convert to: while (X-Y == 0) ExitLimit EL = howFarToNonZero(getMinusSCEV(LHS, RHS), L); if (EL.hasAnyInfo()) return EL; break; } case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_ULT: { // while (X < Y) bool IsSigned = Pred == ICmpInst::ICMP_SLT; ExitLimit EL = howManyLessThans(LHS, RHS, L, IsSigned, ControlsExit, AllowPredicates); if (EL.hasAnyInfo()) return EL; break; } case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_UGT: { // while (X > Y) bool IsSigned = Pred == ICmpInst::ICMP_SGT; ExitLimit EL = howManyGreaterThans(LHS, RHS, L, IsSigned, ControlsExit, AllowPredicates); if (EL.hasAnyInfo()) return EL; break; } default: break; } auto *ExhaustiveCount = computeExitCountExhaustively(L, ExitCond, !L->contains(TBB)); if (!isa(ExhaustiveCount)) return ExhaustiveCount; return computeShiftCompareExitLimit(ExitCond->getOperand(0), ExitCond->getOperand(1), L, OriginalPred); } ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromSingleExitSwitch(const Loop *L, SwitchInst *Switch, BasicBlock *ExitingBlock, bool ControlsExit) { assert(!L->contains(ExitingBlock) && "Not an exiting block!"); // Give up if the exit is the default dest of a switch. if (Switch->getDefaultDest() == ExitingBlock) return getCouldNotCompute(); assert(L->contains(Switch->getDefaultDest()) && "Default case must not exit the loop!"); const SCEV *LHS = getSCEVAtScope(Switch->getCondition(), L); const SCEV *RHS = getConstant(Switch->findCaseDest(ExitingBlock)); // while (X != Y) --> while (X-Y != 0) ExitLimit EL = howFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit); if (EL.hasAnyInfo()) return EL; return getCouldNotCompute(); } static ConstantInt * EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C, ScalarEvolution &SE) { const SCEV *InVal = SE.getConstant(C); const SCEV *Val = AddRec->evaluateAtIteration(InVal, SE); assert(isa(Val) && "Evaluation of SCEV at constant didn't fold correctly?"); return cast(Val)->getValue(); } /// Given an exit condition of 'icmp op load X, cst', try to see if we can /// compute the backedge execution count. ScalarEvolution::ExitLimit ScalarEvolution::computeLoadConstantCompareExitLimit( LoadInst *LI, Constant *RHS, const Loop *L, ICmpInst::Predicate predicate) { if (LI->isVolatile()) return getCouldNotCompute(); // Check to see if the loaded pointer is a getelementptr of a global. // TODO: Use SCEV instead of manually grubbing with GEPs. GetElementPtrInst *GEP = dyn_cast(LI->getOperand(0)); if (!GEP) return getCouldNotCompute(); // Make sure that it is really a constant global we are gepping, with an // initializer, and make sure the first IDX is really 0. GlobalVariable *GV = dyn_cast(GEP->getOperand(0)); if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() || GEP->getNumOperands() < 3 || !isa(GEP->getOperand(1)) || !cast(GEP->getOperand(1))->isNullValue()) return getCouldNotCompute(); // Okay, we allow one non-constant index into the GEP instruction. Value *VarIdx = nullptr; std::vector Indexes; unsigned VarIdxNum = 0; for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i) if (ConstantInt *CI = dyn_cast(GEP->getOperand(i))) { Indexes.push_back(CI); } else if (!isa(GEP->getOperand(i))) { if (VarIdx) return getCouldNotCompute(); // Multiple non-constant idx's. VarIdx = GEP->getOperand(i); VarIdxNum = i-2; Indexes.push_back(nullptr); } // Loop-invariant loads may be a byproduct of loop optimization. Skip them. if (!VarIdx) return getCouldNotCompute(); // Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant. // Check to see if X is a loop variant variable value now. const SCEV *Idx = getSCEV(VarIdx); Idx = getSCEVAtScope(Idx, L); // We can only recognize very limited forms of loop index expressions, in // particular, only affine AddRec's like {C1,+,C2}. const SCEVAddRecExpr *IdxExpr = dyn_cast(Idx); if (!IdxExpr || !IdxExpr->isAffine() || isLoopInvariant(IdxExpr, L) || !isa(IdxExpr->getOperand(0)) || !isa(IdxExpr->getOperand(1))) return getCouldNotCompute(); unsigned MaxSteps = MaxBruteForceIterations; for (unsigned IterationNum = 0; IterationNum != MaxSteps; ++IterationNum) { ConstantInt *ItCst = ConstantInt::get( cast(IdxExpr->getType()), IterationNum); ConstantInt *Val = EvaluateConstantChrecAtConstant(IdxExpr, ItCst, *this); // Form the GEP offset. Indexes[VarIdxNum] = Val; Constant *Result = ConstantFoldLoadThroughGEPIndices(GV->getInitializer(), Indexes); if (!Result) break; // Cannot compute! // Evaluate the condition for this iteration. Result = ConstantExpr::getICmp(predicate, Result, RHS); if (!isa(Result)) break; // Couldn't decide for sure if (cast(Result)->getValue().isMinValue()) { ++NumArrayLenItCounts; return getConstant(ItCst); // Found terminating iteration! } } return getCouldNotCompute(); } ScalarEvolution::ExitLimit ScalarEvolution::computeShiftCompareExitLimit( Value *LHS, Value *RHSV, const Loop *L, ICmpInst::Predicate Pred) { ConstantInt *RHS = dyn_cast(RHSV); if (!RHS) return getCouldNotCompute(); const BasicBlock *Latch = L->getLoopLatch(); if (!Latch) return getCouldNotCompute(); const BasicBlock *Predecessor = L->getLoopPredecessor(); if (!Predecessor) return getCouldNotCompute(); // Return true if V is of the form "LHS `shift_op` ". // Return LHS in OutLHS and shift_opt in OutOpCode. auto MatchPositiveShift = [](Value *V, Value *&OutLHS, Instruction::BinaryOps &OutOpCode) { using namespace PatternMatch; ConstantInt *ShiftAmt; if (match(V, m_LShr(m_Value(OutLHS), m_ConstantInt(ShiftAmt)))) OutOpCode = Instruction::LShr; else if (match(V, m_AShr(m_Value(OutLHS), m_ConstantInt(ShiftAmt)))) OutOpCode = Instruction::AShr; else if (match(V, m_Shl(m_Value(OutLHS), m_ConstantInt(ShiftAmt)))) OutOpCode = Instruction::Shl; else return false; return ShiftAmt->getValue().isStrictlyPositive(); }; // Recognize a "shift recurrence" either of the form %iv or of %iv.shifted in // // loop: // %iv = phi i32 [ %iv.shifted, %loop ], [ %val, %preheader ] // %iv.shifted = lshr i32 %iv, // // Return true on a successful match. Return the corresponding PHI node (%iv // above) in PNOut and the opcode of the shift operation in OpCodeOut. auto MatchShiftRecurrence = [&](Value *V, PHINode *&PNOut, Instruction::BinaryOps &OpCodeOut) { Optional PostShiftOpCode; { Instruction::BinaryOps OpC; Value *V; // If we encounter a shift instruction, "peel off" the shift operation, // and remember that we did so. Later when we inspect %iv's backedge // value, we will make sure that the backedge value uses the same // operation. // // Note: the peeled shift operation does not have to be the same // instruction as the one feeding into the PHI's backedge value. We only // really care about it being the same *kind* of shift instruction -- // that's all that is required for our later inferences to hold. if (MatchPositiveShift(LHS, V, OpC)) { PostShiftOpCode = OpC; LHS = V; } } PNOut = dyn_cast(LHS); if (!PNOut || PNOut->getParent() != L->getHeader()) return false; Value *BEValue = PNOut->getIncomingValueForBlock(Latch); Value *OpLHS; return // The backedge value for the PHI node must be a shift by a positive // amount MatchPositiveShift(BEValue, OpLHS, OpCodeOut) && // of the PHI node itself OpLHS == PNOut && // and the kind of shift should be match the kind of shift we peeled // off, if any. (!PostShiftOpCode.hasValue() || *PostShiftOpCode == OpCodeOut); }; PHINode *PN; Instruction::BinaryOps OpCode; if (!MatchShiftRecurrence(LHS, PN, OpCode)) return getCouldNotCompute(); const DataLayout &DL = getDataLayout(); // The key rationale for this optimization is that for some kinds of shift // recurrences, the value of the recurrence "stabilizes" to either 0 or -1 // within a finite number of iterations. If the condition guarding the // backedge (in the sense that the backedge is taken if the condition is true) // is false for the value the shift recurrence stabilizes to, then we know // that the backedge is taken only a finite number of times. ConstantInt *StableValue = nullptr; switch (OpCode) { default: llvm_unreachable("Impossible case!"); case Instruction::AShr: { // {K,ashr,} stabilizes to signum(K) in at most // bitwidth(K) iterations. Value *FirstValue = PN->getIncomingValueForBlock(Predecessor); KnownBits Known = computeKnownBits(FirstValue, DL, 0, nullptr, Predecessor->getTerminator(), &DT); auto *Ty = cast(RHS->getType()); if (Known.isNonNegative()) StableValue = ConstantInt::get(Ty, 0); else if (Known.isNegative()) StableValue = ConstantInt::get(Ty, -1, true); else return getCouldNotCompute(); break; } case Instruction::LShr: case Instruction::Shl: // Both {K,lshr,} and {K,shl,} // stabilize to 0 in at most bitwidth(K) iterations. StableValue = ConstantInt::get(cast(RHS->getType()), 0); break; } auto *Result = ConstantFoldCompareInstOperands(Pred, StableValue, RHS, DL, &TLI); assert(Result->getType()->isIntegerTy(1) && "Otherwise cannot be an operand to a branch instruction"); if (Result->isZeroValue()) { unsigned BitWidth = getTypeSizeInBits(RHS->getType()); const SCEV *UpperBound = getConstant(getEffectiveSCEVType(RHS->getType()), BitWidth); return ExitLimit(getCouldNotCompute(), UpperBound, false); } return getCouldNotCompute(); } /// Return true if we can constant fold an instruction of the specified type, /// assuming that all operands were constants. static bool CanConstantFold(const Instruction *I) { if (isa(I) || isa(I) || isa(I) || isa(I) || isa(I) || isa(I)) return true; if (const CallInst *CI = dyn_cast(I)) if (const Function *F = CI->getCalledFunction()) return canConstantFoldCallTo(CI, F); return false; } /// Determine whether this instruction can constant evolve within this loop /// assuming its operands can all constant evolve. static bool canConstantEvolve(Instruction *I, const Loop *L) { // An instruction outside of the loop can't be derived from a loop PHI. if (!L->contains(I)) return false; if (isa(I)) { // We don't currently keep track of the control flow needed to evaluate // PHIs, so we cannot handle PHIs inside of loops. return L->getHeader() == I->getParent(); } // If we won't be able to constant fold this expression even if the operands // are constants, bail early. return CanConstantFold(I); } /// getConstantEvolvingPHIOperands - Implement getConstantEvolvingPHI by /// recursing through each instruction operand until reaching a loop header phi. static PHINode * getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L, DenseMap &PHIMap, unsigned Depth) { if (Depth > MaxConstantEvolvingDepth) return nullptr; // Otherwise, we can evaluate this instruction if all of its operands are // constant or derived from a PHI node themselves. PHINode *PHI = nullptr; for (Value *Op : UseInst->operands()) { if (isa(Op)) continue; Instruction *OpInst = dyn_cast(Op); if (!OpInst || !canConstantEvolve(OpInst, L)) return nullptr; PHINode *P = dyn_cast(OpInst); if (!P) // If this operand is already visited, reuse the prior result. // We may have P != PHI if this is the deepest point at which the // inconsistent paths meet. P = PHIMap.lookup(OpInst); if (!P) { // Recurse and memoize the results, whether a phi is found or not. // This recursive call invalidates pointers into PHIMap. P = getConstantEvolvingPHIOperands(OpInst, L, PHIMap, Depth + 1); PHIMap[OpInst] = P; } if (!P) return nullptr; // Not evolving from PHI if (PHI && PHI != P) return nullptr; // Evolving from multiple different PHIs. PHI = P; } // This is a expression evolving from a constant PHI! return PHI; } /// getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node /// in the loop that V is derived from. We allow arbitrary operations along the /// way, but the operands of an operation must either be constants or a value /// derived from a constant PHI. If this expression does not fit with these /// constraints, return null. static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) { Instruction *I = dyn_cast(V); if (!I || !canConstantEvolve(I, L)) return nullptr; if (PHINode *PN = dyn_cast(I)) return PN; // Record non-constant instructions contained by the loop. DenseMap PHIMap; return getConstantEvolvingPHIOperands(I, L, PHIMap, 0); } /// EvaluateExpression - Given an expression that passes the /// getConstantEvolvingPHI predicate, evaluate its value assuming the PHI node /// in the loop has the value PHIVal. If we can't fold this expression for some /// reason, return null. static Constant *EvaluateExpression(Value *V, const Loop *L, DenseMap &Vals, const DataLayout &DL, const TargetLibraryInfo *TLI) { // Convenient constant check, but redundant for recursive calls. if (Constant *C = dyn_cast(V)) return C; Instruction *I = dyn_cast(V); if (!I) return nullptr; if (Constant *C = Vals.lookup(I)) return C; // An instruction inside the loop depends on a value outside the loop that we // weren't given a mapping for, or a value such as a call inside the loop. if (!canConstantEvolve(I, L)) return nullptr; // An unmapped PHI can be due to a branch or another loop inside this loop, // or due to this not being the initial iteration through a loop where we // couldn't compute the evolution of this particular PHI last time. if (isa(I)) return nullptr; std::vector Operands(I->getNumOperands()); for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { Instruction *Operand = dyn_cast(I->getOperand(i)); if (!Operand) { Operands[i] = dyn_cast(I->getOperand(i)); if (!Operands[i]) return nullptr; continue; } Constant *C = EvaluateExpression(Operand, L, Vals, DL, TLI); Vals[Operand] = C; if (!C) return nullptr; Operands[i] = C; } if (CmpInst *CI = dyn_cast(I)) return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0], Operands[1], DL, TLI); if (LoadInst *LI = dyn_cast(I)) { if (!LI->isVolatile()) return ConstantFoldLoadFromConstPtr(Operands[0], LI->getType(), DL); } return ConstantFoldInstOperands(I, Operands, DL, TLI); } // If every incoming value to PN except the one for BB is a specific Constant, // return that, else return nullptr. static Constant *getOtherIncomingValue(PHINode *PN, BasicBlock *BB) { Constant *IncomingVal = nullptr; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { if (PN->getIncomingBlock(i) == BB) continue; auto *CurrentVal = dyn_cast(PN->getIncomingValue(i)); if (!CurrentVal) return nullptr; if (IncomingVal != CurrentVal) { if (IncomingVal) return nullptr; IncomingVal = CurrentVal; } } return IncomingVal; } /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is /// in the header of its containing loop, we know the loop executes a /// constant number of times, and the PHI node is just a recurrence /// involving constants, fold it. Constant * ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, const APInt &BEs, const Loop *L) { auto I = ConstantEvolutionLoopExitValue.find(PN); if (I != ConstantEvolutionLoopExitValue.end()) return I->second; if (BEs.ugt(MaxBruteForceIterations)) return ConstantEvolutionLoopExitValue[PN] = nullptr; // Not going to evaluate it. Constant *&RetVal = ConstantEvolutionLoopExitValue[PN]; DenseMap CurrentIterVals; BasicBlock *Header = L->getHeader(); assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!"); BasicBlock *Latch = L->getLoopLatch(); if (!Latch) return nullptr; for (PHINode &PHI : Header->phis()) { if (auto *StartCST = getOtherIncomingValue(&PHI, Latch)) CurrentIterVals[&PHI] = StartCST; } if (!CurrentIterVals.count(PN)) return RetVal = nullptr; Value *BEValue = PN->getIncomingValueForBlock(Latch); // Execute the loop symbolically to determine the exit value. assert(BEs.getActiveBits() < CHAR_BIT * sizeof(unsigned) && "BEs is <= MaxBruteForceIterations which is an 'unsigned'!"); unsigned NumIterations = BEs.getZExtValue(); // must be in range unsigned IterationNum = 0; const DataLayout &DL = getDataLayout(); for (; ; ++IterationNum) { if (IterationNum == NumIterations) return RetVal = CurrentIterVals[PN]; // Got exit value! // Compute the value of the PHIs for the next iteration. // EvaluateExpression adds non-phi values to the CurrentIterVals map. DenseMap NextIterVals; Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI); if (!NextPHI) return nullptr; // Couldn't evaluate! NextIterVals[PN] = NextPHI; bool StoppedEvolving = NextPHI == CurrentIterVals[PN]; // Also evaluate the other PHI nodes. However, we don't get to stop if we // cease to be able to evaluate one of them or if they stop evolving, // because that doesn't necessarily prevent us from computing PN. SmallVector, 8> PHIsToCompute; for (const auto &I : CurrentIterVals) { PHINode *PHI = dyn_cast(I.first); if (!PHI || PHI == PN || PHI->getParent() != Header) continue; PHIsToCompute.emplace_back(PHI, I.second); } // We use two distinct loops because EvaluateExpression may invalidate any // iterators into CurrentIterVals. for (const auto &I : PHIsToCompute) { PHINode *PHI = I.first; Constant *&NextPHI = NextIterVals[PHI]; if (!NextPHI) { // Not already computed. Value *BEValue = PHI->getIncomingValueForBlock(Latch); NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI); } if (NextPHI != I.second) StoppedEvolving = false; } // If all entries in CurrentIterVals == NextIterVals then we can stop // iterating, the loop can't continue to change. if (StoppedEvolving) return RetVal = CurrentIterVals[PN]; CurrentIterVals.swap(NextIterVals); } } const SCEV *ScalarEvolution::computeExitCountExhaustively(const Loop *L, Value *Cond, bool ExitWhen) { PHINode *PN = getConstantEvolvingPHI(Cond, L); if (!PN) return getCouldNotCompute(); // If the loop is canonicalized, the PHI will have exactly two entries. // That's the only form we support here. if (PN->getNumIncomingValues() != 2) return getCouldNotCompute(); DenseMap CurrentIterVals; BasicBlock *Header = L->getHeader(); assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!"); BasicBlock *Latch = L->getLoopLatch(); assert(Latch && "Should follow from NumIncomingValues == 2!"); for (PHINode &PHI : Header->phis()) { if (auto *StartCST = getOtherIncomingValue(&PHI, Latch)) CurrentIterVals[&PHI] = StartCST; } if (!CurrentIterVals.count(PN)) return getCouldNotCompute(); // Okay, we find a PHI node that defines the trip count of this loop. Execute // the loop symbolically to determine when the condition gets a value of // "ExitWhen". unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis. const DataLayout &DL = getDataLayout(); for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){ auto *CondVal = dyn_cast_or_null( EvaluateExpression(Cond, L, CurrentIterVals, DL, &TLI)); // Couldn't symbolically evaluate. if (!CondVal) return getCouldNotCompute(); if (CondVal->getValue() == uint64_t(ExitWhen)) { ++NumBruteForceTripCountsComputed; return getConstant(Type::getInt32Ty(getContext()), IterationNum); } // Update all the PHI nodes for the next iteration. DenseMap NextIterVals; // Create a list of which PHIs we need to compute. We want to do this before // calling EvaluateExpression on them because that may invalidate iterators // into CurrentIterVals. SmallVector PHIsToCompute; for (const auto &I : CurrentIterVals) { PHINode *PHI = dyn_cast(I.first); if (!PHI || PHI->getParent() != Header) continue; PHIsToCompute.push_back(PHI); } for (PHINode *PHI : PHIsToCompute) { Constant *&NextPHI = NextIterVals[PHI]; if (NextPHI) continue; // Already computed! Value *BEValue = PHI->getIncomingValueForBlock(Latch); NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI); } CurrentIterVals.swap(NextIterVals); } // Too many iterations were needed to evaluate. return getCouldNotCompute(); } const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { SmallVector, 2> &Values = ValuesAtScopes[V]; // Check to see if we've folded this expression at this loop before. for (auto &LS : Values) if (LS.first == L) return LS.second ? LS.second : V; Values.emplace_back(L, nullptr); // Otherwise compute it. const SCEV *C = computeSCEVAtScope(V, L); for (auto &LS : reverse(ValuesAtScopes[V])) if (LS.first == L) { LS.second = C; break; } return C; } /// This builds up a Constant using the ConstantExpr interface. That way, we /// will return Constants for objects which aren't represented by a /// SCEVConstant, because SCEVConstant is restricted to ConstantInt. /// Returns NULL if the SCEV isn't representable as a Constant. static Constant *BuildConstantFromSCEV(const SCEV *V) { switch (static_cast(V->getSCEVType())) { case scCouldNotCompute: case scAddRecExpr: break; case scConstant: return cast(V)->getValue(); case scUnknown: return dyn_cast(cast(V)->getValue()); case scSignExtend: { const SCEVSignExtendExpr *SS = cast(V); if (Constant *CastOp = BuildConstantFromSCEV(SS->getOperand())) return ConstantExpr::getSExt(CastOp, SS->getType()); break; } case scZeroExtend: { const SCEVZeroExtendExpr *SZ = cast(V); if (Constant *CastOp = BuildConstantFromSCEV(SZ->getOperand())) return ConstantExpr::getZExt(CastOp, SZ->getType()); break; } case scTruncate: { const SCEVTruncateExpr *ST = cast(V); if (Constant *CastOp = BuildConstantFromSCEV(ST->getOperand())) return ConstantExpr::getTrunc(CastOp, ST->getType()); break; } case scAddExpr: { const SCEVAddExpr *SA = cast(V); if (Constant *C = BuildConstantFromSCEV(SA->getOperand(0))) { if (PointerType *PTy = dyn_cast(C->getType())) { unsigned AS = PTy->getAddressSpace(); Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS); C = ConstantExpr::getBitCast(C, DestPtrTy); } for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) { Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i)); if (!C2) return nullptr; // First pointer! if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) { unsigned AS = C2->getType()->getPointerAddressSpace(); std::swap(C, C2); Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS); // The offsets have been converted to bytes. We can add bytes to an // i8* by GEP with the byte count in the first index. C = ConstantExpr::getBitCast(C, DestPtrTy); } // Don't bother trying to sum two pointers. We probably can't // statically compute a load that results from it anyway. if (C2->getType()->isPointerTy()) return nullptr; if (PointerType *PTy = dyn_cast(C->getType())) { if (PTy->getElementType()->isStructTy()) C2 = ConstantExpr::getIntegerCast( C2, Type::getInt32Ty(C->getContext()), true); C = ConstantExpr::getGetElementPtr(PTy->getElementType(), C, C2); } else C = ConstantExpr::getAdd(C, C2); } return C; } break; } case scMulExpr: { const SCEVMulExpr *SM = cast(V); if (Constant *C = BuildConstantFromSCEV(SM->getOperand(0))) { // Don't bother with pointers at all. if (C->getType()->isPointerTy()) return nullptr; for (unsigned i = 1, e = SM->getNumOperands(); i != e; ++i) { Constant *C2 = BuildConstantFromSCEV(SM->getOperand(i)); if (!C2 || C2->getType()->isPointerTy()) return nullptr; C = ConstantExpr::getMul(C, C2); } return C; } break; } case scUDivExpr: { const SCEVUDivExpr *SU = cast(V); if (Constant *LHS = BuildConstantFromSCEV(SU->getLHS())) if (Constant *RHS = BuildConstantFromSCEV(SU->getRHS())) if (LHS->getType() == RHS->getType()) return ConstantExpr::getUDiv(LHS, RHS); break; } case scSMaxExpr: case scUMaxExpr: break; // TODO: smax, umax. } return nullptr; } const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { if (isa(V)) return V; // If this instruction is evolved from a constant-evolving PHI, compute the // exit value from the loop without using SCEVs. if (const SCEVUnknown *SU = dyn_cast(V)) { if (Instruction *I = dyn_cast(SU->getValue())) { const Loop *LI = this->LI[I->getParent()]; if (LI && LI->getParentLoop() == L) // Looking for loop exit value. if (PHINode *PN = dyn_cast(I)) if (PN->getParent() == LI->getHeader()) { // Okay, there is no closed form solution for the PHI node. Check // to see if the loop that contains it has a known backedge-taken // count. If so, we may be able to force computation of the exit // value. const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI); if (const SCEVConstant *BTCC = dyn_cast(BackedgeTakenCount)) { // This trivial case can show up in some degenerate cases where // the incoming IR has not yet been fully simplified. if (BTCC->getValue()->isZero()) { Value *InitValue = nullptr; bool MultipleInitValues = false; for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) { if (!LI->contains(PN->getIncomingBlock(i))) { if (!InitValue) InitValue = PN->getIncomingValue(i); else if (InitValue != PN->getIncomingValue(i)) { MultipleInitValues = true; break; } } if (!MultipleInitValues && InitValue) return getSCEV(InitValue); } } // Okay, we know how many times the containing loop executes. If // this is a constant evolving PHI node, get the final value at // the specified iteration number. Constant *RV = getConstantEvolutionLoopExitValue(PN, BTCC->getAPInt(), LI); if (RV) return getSCEV(RV); } } // Okay, this is an expression that we cannot symbolically evaluate // into a SCEV. Check to see if it's possible to symbolically evaluate // the arguments into constants, and if so, try to constant propagate the // result. This is particularly useful for computing loop exit values. if (CanConstantFold(I)) { SmallVector Operands; bool MadeImprovement = false; for (Value *Op : I->operands()) { if (Constant *C = dyn_cast(Op)) { Operands.push_back(C); continue; } // If any of the operands is non-constant and if they are // non-integer and non-pointer, don't even try to analyze them // with scev techniques. if (!isSCEVable(Op->getType())) return V; const SCEV *OrigV = getSCEV(Op); const SCEV *OpV = getSCEVAtScope(OrigV, L); MadeImprovement |= OrigV != OpV; Constant *C = BuildConstantFromSCEV(OpV); if (!C) return V; if (C->getType() != Op->getType()) C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false, Op->getType(), false), C, Op->getType()); Operands.push_back(C); } // Check to see if getSCEVAtScope actually made an improvement. if (MadeImprovement) { Constant *C = nullptr; const DataLayout &DL = getDataLayout(); if (const CmpInst *CI = dyn_cast(I)) C = ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0], Operands[1], DL, &TLI); else if (const LoadInst *LI = dyn_cast(I)) { if (!LI->isVolatile()) C = ConstantFoldLoadFromConstPtr(Operands[0], LI->getType(), DL); } else C = ConstantFoldInstOperands(I, Operands, DL, &TLI); if (!C) return V; return getSCEV(C); } } } // This is some other type of SCEVUnknown, just return it. return V; } if (const SCEVCommutativeExpr *Comm = dyn_cast(V)) { // Avoid performing the look-up in the common case where the specified // expression has no loop-variant portions. for (unsigned i = 0, e = Comm->getNumOperands(); i != e; ++i) { const SCEV *OpAtScope = getSCEVAtScope(Comm->getOperand(i), L); if (OpAtScope != Comm->getOperand(i)) { // Okay, at least one of these operands is loop variant but might be // foldable. Build a new instance of the folded commutative expression. SmallVector NewOps(Comm->op_begin(), Comm->op_begin()+i); NewOps.push_back(OpAtScope); for (++i; i != e; ++i) { OpAtScope = getSCEVAtScope(Comm->getOperand(i), L); NewOps.push_back(OpAtScope); } if (isa(Comm)) return getAddExpr(NewOps); if (isa(Comm)) return getMulExpr(NewOps); if (isa(Comm)) return getSMaxExpr(NewOps); if (isa(Comm)) return getUMaxExpr(NewOps); llvm_unreachable("Unknown commutative SCEV type!"); } } // If we got here, all operands are loop invariant. return Comm; } if (const SCEVUDivExpr *Div = dyn_cast(V)) { const SCEV *LHS = getSCEVAtScope(Div->getLHS(), L); const SCEV *RHS = getSCEVAtScope(Div->getRHS(), L); if (LHS == Div->getLHS() && RHS == Div->getRHS()) return Div; // must be loop invariant return getUDivExpr(LHS, RHS); } // If this is a loop recurrence for a loop that does not contain L, then we // are dealing with the final value computed by the loop. if (const SCEVAddRecExpr *AddRec = dyn_cast(V)) { // First, attempt to evaluate each operand. // Avoid performing the look-up in the common case where the specified // expression has no loop-variant portions. for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) { const SCEV *OpAtScope = getSCEVAtScope(AddRec->getOperand(i), L); if (OpAtScope == AddRec->getOperand(i)) continue; // Okay, at least one of these operands is loop variant but might be // foldable. Build a new instance of the folded commutative expression. SmallVector NewOps(AddRec->op_begin(), AddRec->op_begin()+i); NewOps.push_back(OpAtScope); for (++i; i != e; ++i) NewOps.push_back(getSCEVAtScope(AddRec->getOperand(i), L)); const SCEV *FoldedRec = getAddRecExpr(NewOps, AddRec->getLoop(), AddRec->getNoWrapFlags(SCEV::FlagNW)); AddRec = dyn_cast(FoldedRec); // The addrec may be folded to a nonrecurrence, for example, if the // induction variable is multiplied by zero after constant folding. Go // ahead and return the folded value. if (!AddRec) return FoldedRec; break; } // If the scope is outside the addrec's loop, evaluate it by using the // loop exit value of the addrec. if (!AddRec->getLoop()->contains(L)) { // To evaluate this recurrence, we need to know how many times the AddRec // loop iterates. Compute this now. const SCEV *BackedgeTakenCount = getBackedgeTakenCount(AddRec->getLoop()); if (BackedgeTakenCount == getCouldNotCompute()) return AddRec; // Then, evaluate the AddRec. return AddRec->evaluateAtIteration(BackedgeTakenCount, *this); } return AddRec; } if (const SCEVZeroExtendExpr *Cast = dyn_cast(V)) { const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L); if (Op == Cast->getOperand()) return Cast; // must be loop invariant return getZeroExtendExpr(Op, Cast->getType()); } if (const SCEVSignExtendExpr *Cast = dyn_cast(V)) { const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L); if (Op == Cast->getOperand()) return Cast; // must be loop invariant return getSignExtendExpr(Op, Cast->getType()); } if (const SCEVTruncateExpr *Cast = dyn_cast(V)) { const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L); if (Op == Cast->getOperand()) return Cast; // must be loop invariant return getTruncateExpr(Op, Cast->getType()); } llvm_unreachable("Unknown SCEV type!"); } const SCEV *ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) { return getSCEVAtScope(getSCEV(V), L); } /// Finds the minimum unsigned root of the following equation: /// /// A * X = B (mod N) /// /// where N = 2^BW and BW is the common bit width of A and B. The signedness of /// A and B isn't important. /// /// If the equation does not have a solution, SCEVCouldNotCompute is returned. static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const SCEV *B, ScalarEvolution &SE) { uint32_t BW = A.getBitWidth(); assert(BW == SE.getTypeSizeInBits(B->getType())); assert(A != 0 && "A must be non-zero."); // 1. D = gcd(A, N) // // The gcd of A and N may have only one prime factor: 2. The number of // trailing zeros in A is its multiplicity uint32_t Mult2 = A.countTrailingZeros(); // D = 2^Mult2 // 2. Check if B is divisible by D. // // B is divisible by D if and only if the multiplicity of prime factor 2 for B // is not less than multiplicity of this prime factor for D. if (SE.GetMinTrailingZeros(B) < Mult2) return SE.getCouldNotCompute(); // 3. Compute I: the multiplicative inverse of (A / D) in arithmetic // modulo (N / D). // // If D == 1, (N / D) == N == 2^BW, so we need one extra bit to represent // (N / D) in general. The inverse itself always fits into BW bits, though, // so we immediately truncate it. APInt AD = A.lshr(Mult2).zext(BW + 1); // AD = A / D APInt Mod(BW + 1, 0); Mod.setBit(BW - Mult2); // Mod = N / D APInt I = AD.multiplicativeInverse(Mod).trunc(BW); // 4. Compute the minimum unsigned root of the equation: // I * (B / D) mod (N / D) // To simplify the computation, we factor out the divide by D: // (I * B mod N) / D const SCEV *D = SE.getConstant(APInt::getOneBitSet(BW, Mult2)); return SE.getUDivExactExpr(SE.getMulExpr(B, SE.getConstant(I)), D); } /// Find the roots of the quadratic equation for the given quadratic chrec /// {L,+,M,+,N}. This returns either the two roots (which might be the same) or /// two SCEVCouldNotCompute objects. static Optional> SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { assert(AddRec->getNumOperands() == 3 && "This is not a quadratic chrec!"); const SCEVConstant *LC = dyn_cast(AddRec->getOperand(0)); const SCEVConstant *MC = dyn_cast(AddRec->getOperand(1)); const SCEVConstant *NC = dyn_cast(AddRec->getOperand(2)); // We currently can only solve this if the coefficients are constants. if (!LC || !MC || !NC) return None; uint32_t BitWidth = LC->getAPInt().getBitWidth(); const APInt &L = LC->getAPInt(); const APInt &M = MC->getAPInt(); const APInt &N = NC->getAPInt(); APInt Two(BitWidth, 2); // Convert from chrec coefficients to polynomial coefficients AX^2+BX+C // The A coefficient is N/2 APInt A = N.sdiv(Two); // The B coefficient is M-N/2 APInt B = M; B -= A; // A is the same as N/2. // The C coefficient is L. const APInt& C = L; // Compute the B^2-4ac term. APInt SqrtTerm = B; SqrtTerm *= B; SqrtTerm -= 4 * (A * C); if (SqrtTerm.isNegative()) { // The loop is provably infinite. return None; } // Compute sqrt(B^2-4ac). This is guaranteed to be the nearest // integer value or else APInt::sqrt() will assert. APInt SqrtVal = SqrtTerm.sqrt(); // Compute the two solutions for the quadratic formula. // The divisions must be performed as signed divisions. APInt NegB = -std::move(B); APInt TwoA = std::move(A); TwoA <<= 1; if (TwoA.isNullValue()) return None; LLVMContext &Context = SE.getContext(); ConstantInt *Solution1 = ConstantInt::get(Context, (NegB + SqrtVal).sdiv(TwoA)); ConstantInt *Solution2 = ConstantInt::get(Context, (NegB - SqrtVal).sdiv(TwoA)); return std::make_pair(cast(SE.getConstant(Solution1)), cast(SE.getConstant(Solution2))); } ScalarEvolution::ExitLimit ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit, bool AllowPredicates) { // This is only used for loops with a "x != y" exit test. The exit condition // is now expressed as a single expression, V = x-y. So the exit test is // effectively V != 0. We know and take advantage of the fact that this // expression only being used in a comparison by zero context. SmallPtrSet Predicates; // If the value is a constant if (const SCEVConstant *C = dyn_cast(V)) { // If the value is already zero, the branch will execute zero times. if (C->getValue()->isZero()) return C; return getCouldNotCompute(); // Otherwise it will loop infinitely. } const SCEVAddRecExpr *AddRec = dyn_cast(V); if (!AddRec && AllowPredicates) // Try to make this an AddRec using runtime tests, in the first X // iterations of this loop, where X is the SCEV expression found by the // algorithm below. AddRec = convertSCEVToAddRecWithPredicates(V, L, Predicates); if (!AddRec || AddRec->getLoop() != L) return getCouldNotCompute(); // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of // the quadratic equation to solve it. if (AddRec->isQuadratic() && AddRec->getType()->isIntegerTy()) { if (auto Roots = SolveQuadraticEquation(AddRec, *this)) { const SCEVConstant *R1 = Roots->first; const SCEVConstant *R2 = Roots->second; // Pick the smallest positive root value. if (ConstantInt *CB = dyn_cast(ConstantExpr::getICmp( CmpInst::ICMP_ULT, R1->getValue(), R2->getValue()))) { if (!CB->getZExtValue()) std::swap(R1, R2); // R1 is the minimum root now. // We can only use this value if the chrec ends up with an exact zero // value at this index. When solving for "X*X != 5", for example, we // should not accept a root of 2. const SCEV *Val = AddRec->evaluateAtIteration(R1, *this); if (Val->isZero()) // We found a quadratic root! return ExitLimit(R1, R1, false, Predicates); } } return getCouldNotCompute(); } // Otherwise we can only handle this if it is affine. if (!AddRec->isAffine()) return getCouldNotCompute(); // If this is an affine expression, the execution count of this branch is // the minimum unsigned root of the following equation: // // Start + Step*N = 0 (mod 2^BW) // // equivalent to: // // Step*N = -Start (mod 2^BW) // // where BW is the common bit width of Start and Step. // Get the initial value for the loop. const SCEV *Start = getSCEVAtScope(AddRec->getStart(), L->getParentLoop()); const SCEV *Step = getSCEVAtScope(AddRec->getOperand(1), L->getParentLoop()); // For now we handle only constant steps. // // TODO: Handle a nonconstant Step given AddRec. If the // AddRec is NUW, then (in an unsigned sense) it cannot be counting up to wrap // to 0, it must be counting down to equal 0. Consequently, N = Start / -Step. // We have not yet seen any such cases. const SCEVConstant *StepC = dyn_cast(Step); if (!StepC || StepC->getValue()->isZero()) return getCouldNotCompute(); // For positive steps (counting up until unsigned overflow): // N = -Start/Step (as unsigned) // For negative steps (counting down to zero): // N = Start/-Step // First compute the unsigned distance from zero in the direction of Step. bool CountDown = StepC->getAPInt().isNegative(); const SCEV *Distance = CountDown ? Start : getNegativeSCEV(Start); // Handle unitary steps, which cannot wraparound. // 1*N = -Start; -1*N = Start (mod 2^BW), so: // N = Distance (as unsigned) if (StepC->getValue()->isOne() || StepC->getValue()->isMinusOne()) { APInt MaxBECount = getUnsignedRangeMax(Distance); // When a loop like "for (int i = 0; i != n; ++i) { /* body */ }" is rotated, // we end up with a loop whose backedge-taken count is n - 1. Detect this // case, and see if we can improve the bound. // // Explicitly handling this here is necessary because getUnsignedRange // isn't context-sensitive; it doesn't know that we only care about the // range inside the loop. const SCEV *Zero = getZero(Distance->getType()); const SCEV *One = getOne(Distance->getType()); const SCEV *DistancePlusOne = getAddExpr(Distance, One); if (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, DistancePlusOne, Zero)) { // If Distance + 1 doesn't overflow, we can compute the maximum distance // as "unsigned_max(Distance + 1) - 1". ConstantRange CR = getUnsignedRange(DistancePlusOne); MaxBECount = APIntOps::umin(MaxBECount, CR.getUnsignedMax() - 1); } return ExitLimit(Distance, getConstant(MaxBECount), false, Predicates); } // If the condition controls loop exit (the loop exits only if the expression // is true) and the addition is no-wrap we can use unsigned divide to // compute the backedge count. In this case, the step may not divide the // distance, but we don't care because if the condition is "missed" the loop // will have undefined behavior due to wrapping. if (ControlsExit && AddRec->hasNoSelfWrap() && loopHasNoAbnormalExits(AddRec->getLoop())) { const SCEV *Exact = getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step); const SCEV *Max = Exact == getCouldNotCompute() ? Exact : getConstant(getUnsignedRangeMax(Exact)); return ExitLimit(Exact, Max, false, Predicates); } // Solve the general equation. const SCEV *E = SolveLinEquationWithOverflow(StepC->getAPInt(), getNegativeSCEV(Start), *this); const SCEV *M = E == getCouldNotCompute() ? E : getConstant(getUnsignedRangeMax(E)); return ExitLimit(E, M, false, Predicates); } ScalarEvolution::ExitLimit ScalarEvolution::howFarToNonZero(const SCEV *V, const Loop *L) { // Loops that look like: while (X == 0) are very strange indeed. We don't // handle them yet except for the trivial case. This could be expanded in the // future as needed. // If the value is a constant, check to see if it is known to be non-zero // already. If so, the backedge will execute zero times. if (const SCEVConstant *C = dyn_cast(V)) { if (!C->getValue()->isZero()) return getZero(C->getType()); return getCouldNotCompute(); // Otherwise it will loop infinitely. } // We could implement others, but I really doubt anyone writes loops like // this, and if they did, they would already be constant folded. return getCouldNotCompute(); } std::pair ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) { // If the block has a unique predecessor, then there is no path from the // predecessor to the block that does not go through the direct edge // from the predecessor to the block. if (BasicBlock *Pred = BB->getSinglePredecessor()) return {Pred, BB}; // A loop's header is defined to be a block that dominates the loop. // If the header has a unique predecessor outside the loop, it must be // a block that has exactly one successor that can reach the loop. if (Loop *L = LI.getLoopFor(BB)) return {L->getLoopPredecessor(), L->getHeader()}; return {nullptr, nullptr}; } /// SCEV structural equivalence is usually sufficient for testing whether two /// expressions are equal, however for the purposes of looking for a condition /// guarding a loop, it can be useful to be a little more general, since a /// front-end may have replicated the controlling expression. static bool HasSameValue(const SCEV *A, const SCEV *B) { // Quick check to see if they are the same SCEV. if (A == B) return true; auto ComputesEqualValues = [](const Instruction *A, const Instruction *B) { // Not all instructions that are "identical" compute the same value. For // instance, two distinct alloca instructions allocating the same type are // identical and do not read memory; but compute distinct values. return A->isIdenticalTo(B) && (isa(A) || isa(A)); }; // Otherwise, if they're both SCEVUnknown, it's possible that they hold // two different instructions with the same value. Check for this case. if (const SCEVUnknown *AU = dyn_cast(A)) if (const SCEVUnknown *BU = dyn_cast(B)) if (const Instruction *AI = dyn_cast(AU->getValue())) if (const Instruction *BI = dyn_cast(BU->getValue())) if (ComputesEqualValues(AI, BI)) return true; // Otherwise assume they may have a different value. return false; } bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, const SCEV *&LHS, const SCEV *&RHS, unsigned Depth) { bool Changed = false; // If we hit the max recursion limit bail out. if (Depth >= 3) return false; // Canonicalize a constant to the right side. if (const SCEVConstant *LHSC = dyn_cast(LHS)) { // Check for both operands constant. if (const SCEVConstant *RHSC = dyn_cast(RHS)) { if (ConstantExpr::getICmp(Pred, LHSC->getValue(), RHSC->getValue())->isNullValue()) goto trivially_false; else goto trivially_true; } // Otherwise swap the operands to put the constant on the right. std::swap(LHS, RHS); Pred = ICmpInst::getSwappedPredicate(Pred); Changed = true; } // If we're comparing an addrec with a value which is loop-invariant in the // addrec's loop, put the addrec on the left. Also make a dominance check, // as both operands could be addrecs loop-invariant in each other's loop. if (const SCEVAddRecExpr *AR = dyn_cast(RHS)) { const Loop *L = AR->getLoop(); if (isLoopInvariant(LHS, L) && properlyDominates(LHS, L->getHeader())) { std::swap(LHS, RHS); Pred = ICmpInst::getSwappedPredicate(Pred); Changed = true; } } // If there's a constant operand, canonicalize comparisons with boundary // cases, and canonicalize *-or-equal comparisons to regular comparisons. if (const SCEVConstant *RC = dyn_cast(RHS)) { const APInt &RA = RC->getAPInt(); bool SimplifiedByConstantRange = false; if (!ICmpInst::isEquality(Pred)) { ConstantRange ExactCR = ConstantRange::makeExactICmpRegion(Pred, RA); if (ExactCR.isFullSet()) goto trivially_true; else if (ExactCR.isEmptySet()) goto trivially_false; APInt NewRHS; CmpInst::Predicate NewPred; if (ExactCR.getEquivalentICmp(NewPred, NewRHS) && ICmpInst::isEquality(NewPred)) { // We were able to convert an inequality to an equality. Pred = NewPred; RHS = getConstant(NewRHS); Changed = SimplifiedByConstantRange = true; } } if (!SimplifiedByConstantRange) { switch (Pred) { default: break; case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_NE: // Fold ((-1) * %a) + %b == 0 (equivalent to %b-%a == 0) into %a == %b. if (!RA) if (const SCEVAddExpr *AE = dyn_cast(LHS)) if (const SCEVMulExpr *ME = dyn_cast(AE->getOperand(0))) if (AE->getNumOperands() == 2 && ME->getNumOperands() == 2 && ME->getOperand(0)->isAllOnesValue()) { RHS = AE->getOperand(1); LHS = ME->getOperand(1); Changed = true; } break; // The "Should have been caught earlier!" messages refer to the fact // that the ExactCR.isFullSet() or ExactCR.isEmptySet() check above // should have fired on the corresponding cases, and canonicalized the // check to trivially_true or trivially_false. case ICmpInst::ICMP_UGE: assert(!RA.isMinValue() && "Should have been caught earlier!"); Pred = ICmpInst::ICMP_UGT; RHS = getConstant(RA - 1); Changed = true; break; case ICmpInst::ICMP_ULE: assert(!RA.isMaxValue() && "Should have been caught earlier!"); Pred = ICmpInst::ICMP_ULT; RHS = getConstant(RA + 1); Changed = true; break; case ICmpInst::ICMP_SGE: assert(!RA.isMinSignedValue() && "Should have been caught earlier!"); Pred = ICmpInst::ICMP_SGT; RHS = getConstant(RA - 1); Changed = true; break; case ICmpInst::ICMP_SLE: assert(!RA.isMaxSignedValue() && "Should have been caught earlier!"); Pred = ICmpInst::ICMP_SLT; RHS = getConstant(RA + 1); Changed = true; break; } } } // Check for obvious equality. if (HasSameValue(LHS, RHS)) { if (ICmpInst::isTrueWhenEqual(Pred)) goto trivially_true; if (ICmpInst::isFalseWhenEqual(Pred)) goto trivially_false; } // If possible, canonicalize GE/LE comparisons to GT/LT comparisons, by // adding or subtracting 1 from one of the operands. switch (Pred) { case ICmpInst::ICMP_SLE: if (!getSignedRangeMax(RHS).isMaxSignedValue()) { RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS, SCEV::FlagNSW); Pred = ICmpInst::ICMP_SLT; Changed = true; } else if (!getSignedRangeMin(LHS).isMinSignedValue()) { LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS, SCEV::FlagNSW); Pred = ICmpInst::ICMP_SLT; Changed = true; } break; case ICmpInst::ICMP_SGE: if (!getSignedRangeMin(RHS).isMinSignedValue()) { RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS, SCEV::FlagNSW); Pred = ICmpInst::ICMP_SGT; Changed = true; } else if (!getSignedRangeMax(LHS).isMaxSignedValue()) { LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS, SCEV::FlagNSW); Pred = ICmpInst::ICMP_SGT; Changed = true; } break; case ICmpInst::ICMP_ULE: if (!getUnsignedRangeMax(RHS).isMaxValue()) { RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS, SCEV::FlagNUW); Pred = ICmpInst::ICMP_ULT; Changed = true; } else if (!getUnsignedRangeMin(LHS).isMinValue()) { LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS); Pred = ICmpInst::ICMP_ULT; Changed = true; } break; case ICmpInst::ICMP_UGE: if (!getUnsignedRangeMin(RHS).isMinValue()) { RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS); Pred = ICmpInst::ICMP_UGT; Changed = true; } else if (!getUnsignedRangeMax(LHS).isMaxValue()) { LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS, SCEV::FlagNUW); Pred = ICmpInst::ICMP_UGT; Changed = true; } break; default: break; } // TODO: More simplifications are possible here. // Recursively simplify until we either hit a recursion limit or nothing // changes. if (Changed) return SimplifyICmpOperands(Pred, LHS, RHS, Depth+1); return Changed; trivially_true: // Return 0 == 0. LHS = RHS = getConstant(ConstantInt::getFalse(getContext())); Pred = ICmpInst::ICMP_EQ; return true; trivially_false: // Return 0 != 0. LHS = RHS = getConstant(ConstantInt::getFalse(getContext())); Pred = ICmpInst::ICMP_NE; return true; } bool ScalarEvolution::isKnownNegative(const SCEV *S) { return getSignedRangeMax(S).isNegative(); } bool ScalarEvolution::isKnownPositive(const SCEV *S) { return getSignedRangeMin(S).isStrictlyPositive(); } bool ScalarEvolution::isKnownNonNegative(const SCEV *S) { return !getSignedRangeMin(S).isNegative(); } bool ScalarEvolution::isKnownNonPositive(const SCEV *S) { return !getSignedRangeMax(S).isStrictlyPositive(); } bool ScalarEvolution::isKnownNonZero(const SCEV *S) { return isKnownNegative(S) || isKnownPositive(S); } bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { // Canonicalize the inputs first. (void)SimplifyICmpOperands(Pred, LHS, RHS); // If LHS or RHS is an addrec, check to see if the condition is true in // every iteration of the loop. // If LHS and RHS are both addrec, both conditions must be true in // every iteration of the loop. const SCEVAddRecExpr *LAR = dyn_cast(LHS); const SCEVAddRecExpr *RAR = dyn_cast(RHS); bool LeftGuarded = false; bool RightGuarded = false; if (LAR) { const Loop *L = LAR->getLoop(); if (isLoopEntryGuardedByCond(L, Pred, LAR->getStart(), RHS) && isLoopBackedgeGuardedByCond(L, Pred, LAR->getPostIncExpr(*this), RHS)) { if (!RAR) return true; LeftGuarded = true; } } if (RAR) { const Loop *L = RAR->getLoop(); if (isLoopEntryGuardedByCond(L, Pred, LHS, RAR->getStart()) && isLoopBackedgeGuardedByCond(L, Pred, LHS, RAR->getPostIncExpr(*this))) { if (!LAR) return true; RightGuarded = true; } } if (LeftGuarded && RightGuarded) return true; if (isKnownPredicateViaSplitting(Pred, LHS, RHS)) return true; // Otherwise see what can be done with known constant ranges. return isKnownPredicateViaConstantRanges(Pred, LHS, RHS); } bool ScalarEvolution::isMonotonicPredicate(const SCEVAddRecExpr *LHS, ICmpInst::Predicate Pred, bool &Increasing) { bool Result = isMonotonicPredicateImpl(LHS, Pred, Increasing); #ifndef NDEBUG // Verify an invariant: inverting the predicate should turn a monotonically // increasing change to a monotonically decreasing one, and vice versa. bool IncreasingSwapped; bool ResultSwapped = isMonotonicPredicateImpl( LHS, ICmpInst::getSwappedPredicate(Pred), IncreasingSwapped); assert(Result == ResultSwapped && "should be able to analyze both!"); if (ResultSwapped) assert(Increasing == !IncreasingSwapped && "monotonicity should flip as we flip the predicate"); #endif return Result; } bool ScalarEvolution::isMonotonicPredicateImpl(const SCEVAddRecExpr *LHS, ICmpInst::Predicate Pred, bool &Increasing) { // A zero step value for LHS means the induction variable is essentially a // loop invariant value. We don't really depend on the predicate actually // flipping from false to true (for increasing predicates, and the other way // around for decreasing predicates), all we care about is that *if* the // predicate changes then it only changes from false to true. // // A zero step value in itself is not very useful, but there may be places // where SCEV can prove X >= 0 but not prove X > 0, so it is helpful to be // as general as possible. switch (Pred) { default: return false; // Conservative answer case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_UGE: case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: if (!LHS->hasNoUnsignedWrap()) return false; Increasing = Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE; return true; case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_SGE: case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: { if (!LHS->hasNoSignedWrap()) return false; const SCEV *Step = LHS->getStepRecurrence(*this); if (isKnownNonNegative(Step)) { Increasing = Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE; return true; } if (isKnownNonPositive(Step)) { Increasing = Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE; return true; } return false; } } llvm_unreachable("switch has default clause!"); } bool ScalarEvolution::isLoopInvariantPredicate( ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L, ICmpInst::Predicate &InvariantPred, const SCEV *&InvariantLHS, const SCEV *&InvariantRHS) { // If there is a loop-invariant, force it into the RHS, otherwise bail out. if (!isLoopInvariant(RHS, L)) { if (!isLoopInvariant(LHS, L)) return false; std::swap(LHS, RHS); Pred = ICmpInst::getSwappedPredicate(Pred); } const SCEVAddRecExpr *ArLHS = dyn_cast(LHS); if (!ArLHS || ArLHS->getLoop() != L) return false; bool Increasing; if (!isMonotonicPredicate(ArLHS, Pred, Increasing)) return false; // If the predicate "ArLHS `Pred` RHS" monotonically increases from false to // true as the loop iterates, and the backedge is control dependent on // "ArLHS `Pred` RHS" == true then we can reason as follows: // // * if the predicate was false in the first iteration then the predicate // is never evaluated again, since the loop exits without taking the // backedge. // * if the predicate was true in the first iteration then it will // continue to be true for all future iterations since it is // monotonically increasing. // // For both the above possibilities, we can replace the loop varying // predicate with its value on the first iteration of the loop (which is // loop invariant). // // A similar reasoning applies for a monotonically decreasing predicate, by // replacing true with false and false with true in the above two bullets. auto P = Increasing ? Pred : ICmpInst::getInversePredicate(Pred); if (!isLoopBackedgeGuardedByCond(L, P, LHS, RHS)) return false; InvariantPred = Pred; InvariantLHS = ArLHS->getStart(); InvariantRHS = RHS; return true; } bool ScalarEvolution::isKnownPredicateViaConstantRanges( ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { if (HasSameValue(LHS, RHS)) return ICmpInst::isTrueWhenEqual(Pred); // This code is split out from isKnownPredicate because it is called from // within isLoopEntryGuardedByCond. auto CheckRanges = [&](const ConstantRange &RangeLHS, const ConstantRange &RangeRHS) { return ConstantRange::makeSatisfyingICmpRegion(Pred, RangeRHS) .contains(RangeLHS); }; // The check at the top of the function catches the case where the values are // known to be equal. if (Pred == CmpInst::ICMP_EQ) return false; if (Pred == CmpInst::ICMP_NE) return CheckRanges(getSignedRange(LHS), getSignedRange(RHS)) || CheckRanges(getUnsignedRange(LHS), getUnsignedRange(RHS)) || isKnownNonZero(getMinusSCEV(LHS, RHS)); if (CmpInst::isSigned(Pred)) return CheckRanges(getSignedRange(LHS), getSignedRange(RHS)); return CheckRanges(getUnsignedRange(LHS), getUnsignedRange(RHS)); } bool ScalarEvolution::isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { // Match Result to (X + Y) where Y is a constant integer. // Return Y via OutY. auto MatchBinaryAddToConst = [this](const SCEV *Result, const SCEV *X, APInt &OutY, SCEV::NoWrapFlags ExpectedFlags) { const SCEV *NonConstOp, *ConstOp; SCEV::NoWrapFlags FlagsPresent; if (!splitBinaryAdd(Result, ConstOp, NonConstOp, FlagsPresent) || !isa(ConstOp) || NonConstOp != X) return false; OutY = cast(ConstOp)->getAPInt(); return (FlagsPresent & ExpectedFlags) == ExpectedFlags; }; APInt C; switch (Pred) { default: break; case ICmpInst::ICMP_SGE: std::swap(LHS, RHS); LLVM_FALLTHROUGH; case ICmpInst::ICMP_SLE: // X s<= (X + C) if C >= 0 if (MatchBinaryAddToConst(RHS, LHS, C, SCEV::FlagNSW) && C.isNonNegative()) return true; // (X + C) s<= X if C <= 0 if (MatchBinaryAddToConst(LHS, RHS, C, SCEV::FlagNSW) && !C.isStrictlyPositive()) return true; break; case ICmpInst::ICMP_SGT: std::swap(LHS, RHS); LLVM_FALLTHROUGH; case ICmpInst::ICMP_SLT: // X s< (X + C) if C > 0 if (MatchBinaryAddToConst(RHS, LHS, C, SCEV::FlagNSW) && C.isStrictlyPositive()) return true; // (X + C) s< X if C < 0 if (MatchBinaryAddToConst(LHS, RHS, C, SCEV::FlagNSW) && C.isNegative()) return true; break; } return false; } bool ScalarEvolution::isKnownPredicateViaSplitting(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { if (Pred != ICmpInst::ICMP_ULT || ProvingSplitPredicate) return false; // Allowing arbitrary number of activations of isKnownPredicateViaSplitting on // the stack can result in exponential time complexity. SaveAndRestore Restore(ProvingSplitPredicate, true); // If L >= 0 then I `ult` L <=> I >= 0 && I `slt` L // // To prove L >= 0 we use isKnownNonNegative whereas to prove I >= 0 we use // isKnownPredicate. isKnownPredicate is more powerful, but also more // expensive; and using isKnownNonNegative(RHS) is sufficient for most of the // interesting cases seen in practice. We can consider "upgrading" L >= 0 to // use isKnownPredicate later if needed. return isKnownNonNegative(RHS) && isKnownPredicate(CmpInst::ICMP_SGE, LHS, getZero(LHS->getType())) && isKnownPredicate(CmpInst::ICMP_SLT, LHS, RHS); } bool ScalarEvolution::isImpliedViaGuard(BasicBlock *BB, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { // No need to even try if we know the module has no guards. if (!HasGuards) return false; return any_of(*BB, [&](Instruction &I) { using namespace llvm::PatternMatch; Value *Condition; return match(&I, m_Intrinsic( m_Value(Condition))) && isImpliedCond(Pred, LHS, RHS, Condition, false); }); } /// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is /// protected by a conditional between LHS and RHS. This is used to /// to eliminate casts. bool ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { // Interpret a null as meaning no loop, where there is obviously no guard // (interprocedural conditions notwithstanding). if (!L) return true; if (isKnownPredicateViaConstantRanges(Pred, LHS, RHS)) return true; BasicBlock *Latch = L->getLoopLatch(); if (!Latch) return false; BranchInst *LoopContinuePredicate = dyn_cast(Latch->getTerminator()); if (LoopContinuePredicate && LoopContinuePredicate->isConditional() && isImpliedCond(Pred, LHS, RHS, LoopContinuePredicate->getCondition(), LoopContinuePredicate->getSuccessor(0) != L->getHeader())) return true; // We don't want more than one activation of the following loops on the stack // -- that can lead to O(n!) time complexity. if (WalkingBEDominatingConds) return false; SaveAndRestore ClearOnExit(WalkingBEDominatingConds, true); // See if we can exploit a trip count to prove the predicate. const auto &BETakenInfo = getBackedgeTakenInfo(L); const SCEV *LatchBECount = BETakenInfo.getExact(Latch, this); if (LatchBECount != getCouldNotCompute()) { // We know that Latch branches back to the loop header exactly // LatchBECount times. This means the backdege condition at Latch is // equivalent to "{0,+,1} u< LatchBECount". Type *Ty = LatchBECount->getType(); auto NoWrapFlags = SCEV::NoWrapFlags(SCEV::FlagNUW | SCEV::FlagNW); const SCEV *LoopCounter = getAddRecExpr(getZero(Ty), getOne(Ty), L, NoWrapFlags); if (isImpliedCond(Pred, LHS, RHS, ICmpInst::ICMP_ULT, LoopCounter, LatchBECount)) return true; } // Check conditions due to any @llvm.assume intrinsics. for (auto &AssumeVH : AC.assumptions()) { if (!AssumeVH) continue; auto *CI = cast(AssumeVH); if (!DT.dominates(CI, Latch->getTerminator())) continue; if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false)) return true; } // If the loop is not reachable from the entry block, we risk running into an // infinite loop as we walk up into the dom tree. These loops do not matter // anyway, so we just return a conservative answer when we see them. if (!DT.isReachableFromEntry(L->getHeader())) return false; if (isImpliedViaGuard(Latch, Pred, LHS, RHS)) return true; for (DomTreeNode *DTN = DT[Latch], *HeaderDTN = DT[L->getHeader()]; DTN != HeaderDTN; DTN = DTN->getIDom()) { assert(DTN && "should reach the loop header before reaching the root!"); BasicBlock *BB = DTN->getBlock(); if (isImpliedViaGuard(BB, Pred, LHS, RHS)) return true; BasicBlock *PBB = BB->getSinglePredecessor(); if (!PBB) continue; BranchInst *ContinuePredicate = dyn_cast(PBB->getTerminator()); if (!ContinuePredicate || !ContinuePredicate->isConditional()) continue; Value *Condition = ContinuePredicate->getCondition(); // If we have an edge `E` within the loop body that dominates the only // latch, the condition guarding `E` also guards the backedge. This // reasoning works only for loops with a single latch. BasicBlockEdge DominatingEdge(PBB, BB); if (DominatingEdge.isSingleEdge()) { // We're constructively (and conservatively) enumerating edges within the // loop body that dominate the latch. The dominator tree better agree // with us on this: assert(DT.dominates(DominatingEdge, Latch) && "should be!"); if (isImpliedCond(Pred, LHS, RHS, Condition, BB != ContinuePredicate->getSuccessor(0))) return true; } } return false; } bool ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { // Interpret a null as meaning no loop, where there is obviously no guard // (interprocedural conditions notwithstanding). if (!L) return false; if (isKnownPredicateViaConstantRanges(Pred, LHS, RHS)) return true; // Starting at the loop predecessor, climb up the predecessor chain, as long // as there are predecessors that can be found that have unique successors // leading to the original header. for (std::pair Pair(L->getLoopPredecessor(), L->getHeader()); Pair.first; Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) { if (isImpliedViaGuard(Pair.first, Pred, LHS, RHS)) return true; BranchInst *LoopEntryPredicate = dyn_cast(Pair.first->getTerminator()); if (!LoopEntryPredicate || LoopEntryPredicate->isUnconditional()) continue; if (isImpliedCond(Pred, LHS, RHS, LoopEntryPredicate->getCondition(), LoopEntryPredicate->getSuccessor(0) != Pair.second)) return true; } // Check conditions due to any @llvm.assume intrinsics. for (auto &AssumeVH : AC.assumptions()) { if (!AssumeVH) continue; auto *CI = cast(AssumeVH); if (!DT.dominates(CI, L->getHeader())) continue; if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false)) return true; } return false; } bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, Value *FoundCondValue, bool Inverse) { if (!PendingLoopPredicates.insert(FoundCondValue).second) return false; auto ClearOnExit = make_scope_exit([&]() { PendingLoopPredicates.erase(FoundCondValue); }); // Recursively handle And and Or conditions. if (BinaryOperator *BO = dyn_cast(FoundCondValue)) { if (BO->getOpcode() == Instruction::And) { if (!Inverse) return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) || isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse); } else if (BO->getOpcode() == Instruction::Or) { if (Inverse) return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) || isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse); } } ICmpInst *ICI = dyn_cast(FoundCondValue); if (!ICI) return false; // Now that we found a conditional branch that dominates the loop or controls // the loop latch. Check to see if it is the comparison we are looking for. ICmpInst::Predicate FoundPred; if (Inverse) FoundPred = ICI->getInversePredicate(); else FoundPred = ICI->getPredicate(); const SCEV *FoundLHS = getSCEV(ICI->getOperand(0)); const SCEV *FoundRHS = getSCEV(ICI->getOperand(1)); return isImpliedCond(Pred, LHS, RHS, FoundPred, FoundLHS, FoundRHS); } bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, ICmpInst::Predicate FoundPred, const SCEV *FoundLHS, const SCEV *FoundRHS) { // Balance the types. if (getTypeSizeInBits(LHS->getType()) < getTypeSizeInBits(FoundLHS->getType())) { if (CmpInst::isSigned(Pred)) { LHS = getSignExtendExpr(LHS, FoundLHS->getType()); RHS = getSignExtendExpr(RHS, FoundLHS->getType()); } else { LHS = getZeroExtendExpr(LHS, FoundLHS->getType()); RHS = getZeroExtendExpr(RHS, FoundLHS->getType()); } } else if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(FoundLHS->getType())) { if (CmpInst::isSigned(FoundPred)) { FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType()); FoundRHS = getSignExtendExpr(FoundRHS, LHS->getType()); } else { FoundLHS = getZeroExtendExpr(FoundLHS, LHS->getType()); FoundRHS = getZeroExtendExpr(FoundRHS, LHS->getType()); } } // Canonicalize the query to match the way instcombine will have // canonicalized the comparison. if (SimplifyICmpOperands(Pred, LHS, RHS)) if (LHS == RHS) return CmpInst::isTrueWhenEqual(Pred); if (SimplifyICmpOperands(FoundPred, FoundLHS, FoundRHS)) if (FoundLHS == FoundRHS) return CmpInst::isFalseWhenEqual(FoundPred); // Check to see if we can make the LHS or RHS match. if (LHS == FoundRHS || RHS == FoundLHS) { if (isa(RHS)) { std::swap(FoundLHS, FoundRHS); FoundPred = ICmpInst::getSwappedPredicate(FoundPred); } else { std::swap(LHS, RHS); Pred = ICmpInst::getSwappedPredicate(Pred); } } // Check whether the found predicate is the same as the desired predicate. if (FoundPred == Pred) return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS); // Check whether swapping the found predicate makes it the same as the // desired predicate. if (ICmpInst::getSwappedPredicate(FoundPred) == Pred) { if (isa(RHS)) return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS); else return isImpliedCondOperands(ICmpInst::getSwappedPredicate(Pred), RHS, LHS, FoundLHS, FoundRHS); } // Unsigned comparison is the same as signed comparison when both the operands // are non-negative. if (CmpInst::isUnsigned(FoundPred) && CmpInst::getSignedPredicate(FoundPred) == Pred && isKnownNonNegative(FoundLHS) && isKnownNonNegative(FoundRHS)) return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS); // Check if we can make progress by sharpening ranges. if (FoundPred == ICmpInst::ICMP_NE && (isa(FoundLHS) || isa(FoundRHS))) { const SCEVConstant *C = nullptr; const SCEV *V = nullptr; if (isa(FoundLHS)) { C = cast(FoundLHS); V = FoundRHS; } else { C = cast(FoundRHS); V = FoundLHS; } // The guarding predicate tells us that C != V. If the known range // of V is [C, t), we can sharpen the range to [C + 1, t). The // range we consider has to correspond to same signedness as the // predicate we're interested in folding. APInt Min = ICmpInst::isSigned(Pred) ? getSignedRangeMin(V) : getUnsignedRangeMin(V); if (Min == C->getAPInt()) { // Given (V >= Min && V != Min) we conclude V >= (Min + 1). // This is true even if (Min + 1) wraps around -- in case of // wraparound, (Min + 1) < Min, so (V >= Min => V >= (Min + 1)). APInt SharperMin = Min + 1; switch (Pred) { case ICmpInst::ICMP_SGE: case ICmpInst::ICMP_UGE: // We know V `Pred` SharperMin. If this implies LHS `Pred` // RHS, we're done. if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(SharperMin))) return true; LLVM_FALLTHROUGH; case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_UGT: // We know from the range information that (V `Pred` Min || // V == Min). We know from the guarding condition that !(V // == Min). This gives us // // V `Pred` Min || V == Min && !(V == Min) // => V `Pred` Min // // If V `Pred` Min implies LHS `Pred` RHS, we're done. if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(Min))) return true; LLVM_FALLTHROUGH; default: // No change break; } } } // Check whether the actual condition is beyond sufficient. if (FoundPred == ICmpInst::ICMP_EQ) if (ICmpInst::isTrueWhenEqual(Pred)) if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS)) return true; if (Pred == ICmpInst::ICMP_NE) if (!ICmpInst::isTrueWhenEqual(FoundPred)) if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS)) return true; // Otherwise assume the worst. return false; } bool ScalarEvolution::splitBinaryAdd(const SCEV *Expr, const SCEV *&L, const SCEV *&R, SCEV::NoWrapFlags &Flags) { const auto *AE = dyn_cast(Expr); if (!AE || AE->getNumOperands() != 2) return false; L = AE->getOperand(0); R = AE->getOperand(1); Flags = AE->getNoWrapFlags(); return true; } Optional ScalarEvolution::computeConstantDifference(const SCEV *More, const SCEV *Less) { // We avoid subtracting expressions here because this function is usually // fairly deep in the call stack (i.e. is called many times). if (isa(Less) && isa(More)) { const auto *LAR = cast(Less); const auto *MAR = cast(More); if (LAR->getLoop() != MAR->getLoop()) return None; // We look at affine expressions only; not for correctness but to keep // getStepRecurrence cheap. if (!LAR->isAffine() || !MAR->isAffine()) return None; if (LAR->getStepRecurrence(*this) != MAR->getStepRecurrence(*this)) return None; Less = LAR->getStart(); More = MAR->getStart(); // fall through } if (isa(Less) && isa(More)) { const auto &M = cast(More)->getAPInt(); const auto &L = cast(Less)->getAPInt(); return M - L; } const SCEV *L, *R; SCEV::NoWrapFlags Flags; if (splitBinaryAdd(Less, L, R, Flags)) if (const auto *LC = dyn_cast(L)) if (R == More) return -(LC->getAPInt()); if (splitBinaryAdd(More, L, R, Flags)) if (const auto *LC = dyn_cast(L)) if (R == Less) return LC->getAPInt(); return None; } bool ScalarEvolution::isImpliedCondOperandsViaNoOverflow( ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, const SCEV *FoundRHS) { if (Pred != CmpInst::ICMP_SLT && Pred != CmpInst::ICMP_ULT) return false; const auto *AddRecLHS = dyn_cast(LHS); if (!AddRecLHS) return false; const auto *AddRecFoundLHS = dyn_cast(FoundLHS); if (!AddRecFoundLHS) return false; // We'd like to let SCEV reason about control dependencies, so we constrain // both the inequalities to be about add recurrences on the same loop. This // way we can use isLoopEntryGuardedByCond later. const Loop *L = AddRecFoundLHS->getLoop(); if (L != AddRecLHS->getLoop()) return false; // FoundLHS u< FoundRHS u< -C => (FoundLHS + C) u< (FoundRHS + C) ... (1) // // FoundLHS s< FoundRHS s< INT_MIN - C => (FoundLHS + C) s< (FoundRHS + C) // ... (2) // // Informal proof for (2), assuming (1) [*]: // // We'll also assume (A s< B) <=> ((A + INT_MIN) u< (B + INT_MIN)) ... (3)[**] // // Then // // FoundLHS s< FoundRHS s< INT_MIN - C // <=> (FoundLHS + INT_MIN) u< (FoundRHS + INT_MIN) u< -C [ using (3) ] // <=> (FoundLHS + INT_MIN + C) u< (FoundRHS + INT_MIN + C) [ using (1) ] // <=> (FoundLHS + INT_MIN + C + INT_MIN) s< // (FoundRHS + INT_MIN + C + INT_MIN) [ using (3) ] // <=> FoundLHS + C s< FoundRHS + C // // [*]: (1) can be proved by ruling out overflow. // // [**]: This can be proved by analyzing all the four possibilities: // (A s< 0, B s< 0), (A s< 0, B s>= 0), (A s>= 0, B s< 0) and // (A s>= 0, B s>= 0). // // Note: // Despite (2), "FoundRHS s< INT_MIN - C" does not mean that "FoundRHS + C" // will not sign underflow. For instance, say FoundLHS = (i8 -128), FoundRHS // = (i8 -127) and C = (i8 -100). Then INT_MIN - C = (i8 -28), and FoundRHS // s< (INT_MIN - C). Lack of sign overflow / underflow in "FoundRHS + C" is // neither necessary nor sufficient to prove "(FoundLHS + C) s< (FoundRHS + // C)". Optional LDiff = computeConstantDifference(LHS, FoundLHS); Optional RDiff = computeConstantDifference(RHS, FoundRHS); if (!LDiff || !RDiff || *LDiff != *RDiff) return false; if (LDiff->isMinValue()) return true; APInt FoundRHSLimit; if (Pred == CmpInst::ICMP_ULT) { FoundRHSLimit = -(*RDiff); } else { assert(Pred == CmpInst::ICMP_SLT && "Checked above!"); FoundRHSLimit = APInt::getSignedMinValue(getTypeSizeInBits(RHS->getType())) - *RDiff; } // Try to prove (1) or (2), as needed. return isLoopEntryGuardedByCond(L, Pred, FoundRHS, getConstant(FoundRHSLimit)); } bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, const SCEV *FoundRHS) { if (isImpliedCondOperandsViaRanges(Pred, LHS, RHS, FoundLHS, FoundRHS)) return true; if (isImpliedCondOperandsViaNoOverflow(Pred, LHS, RHS, FoundLHS, FoundRHS)) return true; return isImpliedCondOperandsHelper(Pred, LHS, RHS, FoundLHS, FoundRHS) || // ~x < ~y --> x > y isImpliedCondOperandsHelper(Pred, LHS, RHS, getNotSCEV(FoundRHS), getNotSCEV(FoundLHS)); } /// If Expr computes ~A, return A else return nullptr static const SCEV *MatchNotExpr(const SCEV *Expr) { const SCEVAddExpr *Add = dyn_cast(Expr); if (!Add || Add->getNumOperands() != 2 || !Add->getOperand(0)->isAllOnesValue()) return nullptr; const SCEVMulExpr *AddRHS = dyn_cast(Add->getOperand(1)); if (!AddRHS || AddRHS->getNumOperands() != 2 || !AddRHS->getOperand(0)->isAllOnesValue()) return nullptr; return AddRHS->getOperand(1); } /// Is MaybeMaxExpr an SMax or UMax of Candidate and some other values? template static bool IsMaxConsistingOf(const SCEV *MaybeMaxExpr, const SCEV *Candidate) { const MaxExprType *MaxExpr = dyn_cast(MaybeMaxExpr); if (!MaxExpr) return false; return find(MaxExpr->operands(), Candidate) != MaxExpr->op_end(); } /// Is MaybeMinExpr an SMin or UMin of Candidate and some other values? template static bool IsMinConsistingOf(ScalarEvolution &SE, const SCEV *MaybeMinExpr, const SCEV *Candidate) { const SCEV *MaybeMaxExpr = MatchNotExpr(MaybeMinExpr); if (!MaybeMaxExpr) return false; return IsMaxConsistingOf(MaybeMaxExpr, SE.getNotSCEV(Candidate)); } static bool IsKnownPredicateViaAddRecStart(ScalarEvolution &SE, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { // If both sides are affine addrecs for the same loop, with equal // steps, and we know the recurrences don't wrap, then we only // need to check the predicate on the starting values. if (!ICmpInst::isRelational(Pred)) return false; const SCEVAddRecExpr *LAR = dyn_cast(LHS); if (!LAR) return false; const SCEVAddRecExpr *RAR = dyn_cast(RHS); if (!RAR) return false; if (LAR->getLoop() != RAR->getLoop()) return false; if (!LAR->isAffine() || !RAR->isAffine()) return false; if (LAR->getStepRecurrence(SE) != RAR->getStepRecurrence(SE)) return false; SCEV::NoWrapFlags NW = ICmpInst::isSigned(Pred) ? SCEV::FlagNSW : SCEV::FlagNUW; if (!LAR->getNoWrapFlags(NW) || !RAR->getNoWrapFlags(NW)) return false; return SE.isKnownPredicate(Pred, LAR->getStart(), RAR->getStart()); } /// Is LHS `Pred` RHS true on the virtue of LHS or RHS being a Min or Max /// expression? static bool IsKnownPredicateViaMinOrMax(ScalarEvolution &SE, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { switch (Pred) { default: return false; case ICmpInst::ICMP_SGE: std::swap(LHS, RHS); LLVM_FALLTHROUGH; case ICmpInst::ICMP_SLE: return // min(A, ...) <= A IsMinConsistingOf(SE, LHS, RHS) || // A <= max(A, ...) IsMaxConsistingOf(RHS, LHS); case ICmpInst::ICMP_UGE: std::swap(LHS, RHS); LLVM_FALLTHROUGH; case ICmpInst::ICMP_ULE: return // min(A, ...) <= A IsMinConsistingOf(SE, LHS, RHS) || // A <= max(A, ...) IsMaxConsistingOf(RHS, LHS); } llvm_unreachable("covered switch fell through?!"); } bool ScalarEvolution::isImpliedViaOperations(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, const SCEV *FoundRHS, unsigned Depth) { assert(getTypeSizeInBits(LHS->getType()) == getTypeSizeInBits(RHS->getType()) && "LHS and RHS have different sizes?"); assert(getTypeSizeInBits(FoundLHS->getType()) == getTypeSizeInBits(FoundRHS->getType()) && "FoundLHS and FoundRHS have different sizes?"); // We want to avoid hurting the compile time with analysis of too big trees. if (Depth > MaxSCEVOperationsImplicationDepth) return false; // We only want to work with ICMP_SGT comparison so far. // TODO: Extend to ICMP_UGT? if (Pred == ICmpInst::ICMP_SLT) { Pred = ICmpInst::ICMP_SGT; std::swap(LHS, RHS); std::swap(FoundLHS, FoundRHS); } if (Pred != ICmpInst::ICMP_SGT) return false; auto GetOpFromSExt = [&](const SCEV *S) { if (auto *Ext = dyn_cast(S)) return Ext->getOperand(); // TODO: If S is a SCEVConstant then you can cheaply "strip" the sext off // the constant in some cases. return S; }; // Acquire values from extensions. auto *OrigFoundLHS = FoundLHS; LHS = GetOpFromSExt(LHS); FoundLHS = GetOpFromSExt(FoundLHS); // Is the SGT predicate can be proved trivially or using the found context. auto IsSGTViaContext = [&](const SCEV *S1, const SCEV *S2) { return isKnownViaSimpleReasoning(ICmpInst::ICMP_SGT, S1, S2) || isImpliedViaOperations(ICmpInst::ICMP_SGT, S1, S2, OrigFoundLHS, FoundRHS, Depth + 1); }; if (auto *LHSAddExpr = dyn_cast(LHS)) { // We want to avoid creation of any new non-constant SCEV. Since we are // going to compare the operands to RHS, we should be certain that we don't // need any size extensions for this. So let's decline all cases when the // sizes of types of LHS and RHS do not match. // TODO: Maybe try to get RHS from sext to catch more cases? if (getTypeSizeInBits(LHS->getType()) != getTypeSizeInBits(RHS->getType())) return false; // Should not overflow. if (!LHSAddExpr->hasNoSignedWrap()) return false; auto *LL = LHSAddExpr->getOperand(0); auto *LR = LHSAddExpr->getOperand(1); auto *MinusOne = getNegativeSCEV(getOne(RHS->getType())); // Checks that S1 >= 0 && S2 > RHS, trivially or using the found context. auto IsSumGreaterThanRHS = [&](const SCEV *S1, const SCEV *S2) { return IsSGTViaContext(S1, MinusOne) && IsSGTViaContext(S2, RHS); }; // Try to prove the following rule: // (LHS = LL + LR) && (LL >= 0) && (LR > RHS) => (LHS > RHS). // (LHS = LL + LR) && (LR >= 0) && (LL > RHS) => (LHS > RHS). if (IsSumGreaterThanRHS(LL, LR) || IsSumGreaterThanRHS(LR, LL)) return true; } else if (auto *LHSUnknownExpr = dyn_cast(LHS)) { Value *LL, *LR; // FIXME: Once we have SDiv implemented, we can get rid of this matching. using namespace llvm::PatternMatch; if (match(LHSUnknownExpr->getValue(), m_SDiv(m_Value(LL), m_Value(LR)))) { // Rules for division. // We are going to perform some comparisons with Denominator and its // derivative expressions. In general case, creating a SCEV for it may // lead to a complex analysis of the entire graph, and in particular it // can request trip count recalculation for the same loop. This would // cache as SCEVCouldNotCompute to avoid the infinite recursion. To avoid // this, we only want to create SCEVs that are constants in this section. // So we bail if Denominator is not a constant. if (!isa(LR)) return false; auto *Denominator = cast(getSCEV(LR)); // We want to make sure that LHS = FoundLHS / Denominator. If it is so, // then a SCEV for the numerator already exists and matches with FoundLHS. auto *Numerator = getExistingSCEV(LL); if (!Numerator || Numerator->getType() != FoundLHS->getType()) return false; // Make sure that the numerator matches with FoundLHS and the denominator // is positive. if (!HasSameValue(Numerator, FoundLHS) || !isKnownPositive(Denominator)) return false; auto *DTy = Denominator->getType(); auto *FRHSTy = FoundRHS->getType(); if (DTy->isPointerTy() != FRHSTy->isPointerTy()) // One of types is a pointer and another one is not. We cannot extend // them properly to a wider type, so let us just reject this case. // TODO: Usage of getEffectiveSCEVType for DTy, FRHSTy etc should help // to avoid this check. return false; // Given that: // FoundLHS > FoundRHS, LHS = FoundLHS / Denominator, Denominator > 0. auto *WTy = getWiderType(DTy, FRHSTy); auto *DenominatorExt = getNoopOrSignExtend(Denominator, WTy); auto *FoundRHSExt = getNoopOrSignExtend(FoundRHS, WTy); // Try to prove the following rule: // (FoundRHS > Denominator - 2) && (RHS <= 0) => (LHS > RHS). // For example, given that FoundLHS > 2. It means that FoundLHS is at // least 3. If we divide it by Denominator < 4, we will have at least 1. auto *DenomMinusTwo = getMinusSCEV(DenominatorExt, getConstant(WTy, 2)); if (isKnownNonPositive(RHS) && IsSGTViaContext(FoundRHSExt, DenomMinusTwo)) return true; // Try to prove the following rule: // (FoundRHS > -1 - Denominator) && (RHS < 0) => (LHS > RHS). // For example, given that FoundLHS > -3. Then FoundLHS is at least -2. // If we divide it by Denominator > 2, then: // 1. If FoundLHS is negative, then the result is 0. // 2. If FoundLHS is non-negative, then the result is non-negative. // Anyways, the result is non-negative. auto *MinusOne = getNegativeSCEV(getOne(WTy)); auto *NegDenomMinusOne = getMinusSCEV(MinusOne, DenominatorExt); if (isKnownNegative(RHS) && IsSGTViaContext(FoundRHSExt, NegDenomMinusOne)) return true; } } return false; } bool ScalarEvolution::isKnownViaSimpleReasoning(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { return isKnownPredicateViaConstantRanges(Pred, LHS, RHS) || IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS) || IsKnownPredicateViaAddRecStart(*this, Pred, LHS, RHS) || isKnownPredicateViaNoOverflow(Pred, LHS, RHS); } bool ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, const SCEV *FoundRHS) { switch (Pred) { default: llvm_unreachable("Unexpected ICmpInst::Predicate value!"); case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_NE: if (HasSameValue(LHS, FoundLHS) && HasSameValue(RHS, FoundRHS)) return true; break; case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: if (isKnownViaSimpleReasoning(ICmpInst::ICMP_SLE, LHS, FoundLHS) && isKnownViaSimpleReasoning(ICmpInst::ICMP_SGE, RHS, FoundRHS)) return true; break; case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_SGE: if (isKnownViaSimpleReasoning(ICmpInst::ICMP_SGE, LHS, FoundLHS) && isKnownViaSimpleReasoning(ICmpInst::ICMP_SLE, RHS, FoundRHS)) return true; break; case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: if (isKnownViaSimpleReasoning(ICmpInst::ICMP_ULE, LHS, FoundLHS) && isKnownViaSimpleReasoning(ICmpInst::ICMP_UGE, RHS, FoundRHS)) return true; break; case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_UGE: if (isKnownViaSimpleReasoning(ICmpInst::ICMP_UGE, LHS, FoundLHS) && isKnownViaSimpleReasoning(ICmpInst::ICMP_ULE, RHS, FoundRHS)) return true; break; } // Maybe it can be proved via operations? if (isImpliedViaOperations(Pred, LHS, RHS, FoundLHS, FoundRHS)) return true; return false; } bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, const SCEV *FoundRHS) { if (!isa(RHS) || !isa(FoundRHS)) // The restriction on `FoundRHS` be lifted easily -- it exists only to // reduce the compile time impact of this optimization. return false; Optional Addend = computeConstantDifference(LHS, FoundLHS); if (!Addend) return false; const APInt &ConstFoundRHS = cast(FoundRHS)->getAPInt(); // `FoundLHSRange` is the range we know `FoundLHS` to be in by virtue of the // antecedent "`FoundLHS` `Pred` `FoundRHS`". ConstantRange FoundLHSRange = ConstantRange::makeAllowedICmpRegion(Pred, ConstFoundRHS); // Since `LHS` is `FoundLHS` + `Addend`, we can compute a range for `LHS`: ConstantRange LHSRange = FoundLHSRange.add(ConstantRange(*Addend)); // We can also compute the range of values for `LHS` that satisfy the // consequent, "`LHS` `Pred` `RHS`": const APInt &ConstRHS = cast(RHS)->getAPInt(); ConstantRange SatisfyingLHSRange = ConstantRange::makeSatisfyingICmpRegion(Pred, ConstRHS); // The antecedent implies the consequent if every value of `LHS` that // satisfies the antecedent also satisfies the consequent. return SatisfyingLHSRange.contains(LHSRange); } bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride, bool IsSigned, bool NoWrap) { assert(isKnownPositive(Stride) && "Positive stride expected!"); if (NoWrap) return false; unsigned BitWidth = getTypeSizeInBits(RHS->getType()); const SCEV *One = getOne(Stride->getType()); if (IsSigned) { APInt MaxRHS = getSignedRangeMax(RHS); APInt MaxValue = APInt::getSignedMaxValue(BitWidth); APInt MaxStrideMinusOne = getSignedRangeMax(getMinusSCEV(Stride, One)); // SMaxRHS + SMaxStrideMinusOne > SMaxValue => overflow! return (std::move(MaxValue) - MaxStrideMinusOne).slt(MaxRHS); } APInt MaxRHS = getUnsignedRangeMax(RHS); APInt MaxValue = APInt::getMaxValue(BitWidth); APInt MaxStrideMinusOne = getUnsignedRangeMax(getMinusSCEV(Stride, One)); // UMaxRHS + UMaxStrideMinusOne > UMaxValue => overflow! return (std::move(MaxValue) - MaxStrideMinusOne).ult(MaxRHS); } bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride, bool IsSigned, bool NoWrap) { if (NoWrap) return false; unsigned BitWidth = getTypeSizeInBits(RHS->getType()); const SCEV *One = getOne(Stride->getType()); if (IsSigned) { APInt MinRHS = getSignedRangeMin(RHS); APInt MinValue = APInt::getSignedMinValue(BitWidth); APInt MaxStrideMinusOne = getSignedRangeMax(getMinusSCEV(Stride, One)); // SMinRHS - SMaxStrideMinusOne < SMinValue => overflow! return (std::move(MinValue) + MaxStrideMinusOne).sgt(MinRHS); } APInt MinRHS = getUnsignedRangeMin(RHS); APInt MinValue = APInt::getMinValue(BitWidth); APInt MaxStrideMinusOne = getUnsignedRangeMax(getMinusSCEV(Stride, One)); // UMinRHS - UMaxStrideMinusOne < UMinValue => overflow! return (std::move(MinValue) + MaxStrideMinusOne).ugt(MinRHS); } const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step, bool Equality) { const SCEV *One = getOne(Step->getType()); Delta = Equality ? getAddExpr(Delta, Step) : getAddExpr(Delta, getMinusSCEV(Step, One)); return getUDivExpr(Delta, Step); } const SCEV *ScalarEvolution::computeMaxBECountForLT(const SCEV *Start, const SCEV *Stride, const SCEV *End, unsigned BitWidth, bool IsSigned) { assert(!isKnownNonPositive(Stride) && "Stride is expected strictly positive!"); // Calculate the maximum backedge count based on the range of values // permitted by Start, End, and Stride. const SCEV *MaxBECount; APInt MinStart = IsSigned ? getSignedRangeMin(Start) : getUnsignedRangeMin(Start); APInt StrideForMaxBECount = IsSigned ? getSignedRangeMin(Stride) : getUnsignedRangeMin(Stride); // We already know that the stride is positive, so we paper over conservatism // in our range computation by forcing StrideForMaxBECount to be at least one. // In theory this is unnecessary, but we expect MaxBECount to be a // SCEVConstant, and (udiv 0) is not constant folded by SCEV (there // is nothing to constant fold it to). APInt One(BitWidth, 1, IsSigned); StrideForMaxBECount = APIntOps::smax(One, StrideForMaxBECount); APInt MaxValue = IsSigned ? APInt::getSignedMaxValue(BitWidth) : APInt::getMaxValue(BitWidth); APInt Limit = MaxValue - (StrideForMaxBECount - 1); // Although End can be a MAX expression we estimate MaxEnd considering only // the case End = RHS of the loop termination condition. This is safe because // in the other case (End - Start) is zero, leading to a zero maximum backedge // taken count. APInt MaxEnd = IsSigned ? APIntOps::smin(getSignedRangeMax(End), Limit) : APIntOps::umin(getUnsignedRangeMax(End), Limit); MaxBECount = computeBECount(getConstant(MaxEnd - MinStart) /* Delta */, getConstant(StrideForMaxBECount) /* Step */, false /* Equality */); return MaxBECount; } ScalarEvolution::ExitLimit ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, const Loop *L, bool IsSigned, bool ControlsExit, bool AllowPredicates) { SmallPtrSet Predicates; const SCEVAddRecExpr *IV = dyn_cast(LHS); bool PredicatedIV = false; if (!IV && AllowPredicates) { // Try to make this an AddRec using runtime tests, in the first X // iterations of this loop, where X is the SCEV expression found by the // algorithm below. IV = convertSCEVToAddRecWithPredicates(LHS, L, Predicates); PredicatedIV = true; } // Avoid weird loops if (!IV || IV->getLoop() != L || !IV->isAffine()) return getCouldNotCompute(); bool NoWrap = ControlsExit && IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW); const SCEV *Stride = IV->getStepRecurrence(*this); bool PositiveStride = isKnownPositive(Stride); // Avoid negative or zero stride values. if (!PositiveStride) { // We can compute the correct backedge taken count for loops with unknown // strides if we can prove that the loop is not an infinite loop with side // effects. Here's the loop structure we are trying to handle - // // i = start // do { // A[i] = i; // i += s; // } while (i < end); // // The backedge taken count for such loops is evaluated as - // (max(end, start + stride) - start - 1) /u stride // // The additional preconditions that we need to check to prove correctness // of the above formula is as follows - // // a) IV is either nuw or nsw depending upon signedness (indicated by the // NoWrap flag). // b) loop is single exit with no side effects. // // // Precondition a) implies that if the stride is negative, this is a single // trip loop. The backedge taken count formula reduces to zero in this case. // // Precondition b) implies that the unknown stride cannot be zero otherwise // we have UB. // // The positive stride case is the same as isKnownPositive(Stride) returning // true (original behavior of the function). // // We want to make sure that the stride is truly unknown as there are edge // cases where ScalarEvolution propagates no wrap flags to the // post-increment/decrement IV even though the increment/decrement operation // itself is wrapping. The computed backedge taken count may be wrong in // such cases. This is prevented by checking that the stride is not known to // be either positive or non-positive. For example, no wrap flags are // propagated to the post-increment IV of this loop with a trip count of 2 - // // unsigned char i; // for(i=127; i<128; i+=129) // A[i] = i; // if (PredicatedIV || !NoWrap || isKnownNonPositive(Stride) || !loopHasNoSideEffects(L)) return getCouldNotCompute(); } else if (!Stride->isOne() && doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap)) // Avoid proven overflow cases: this will ensure that the backedge taken // count will not generate any unsigned overflow. Relaxed no-overflow // conditions exploit NoWrapFlags, allowing to optimize in presence of // undefined behaviors like the case of C language. return getCouldNotCompute(); ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; const SCEV *Start = IV->getStart(); const SCEV *End = RHS; // When the RHS is not invariant, we do not know the end bound of the loop and // cannot calculate the ExactBECount needed by ExitLimit. However, we can // calculate the MaxBECount, given the start, stride and max value for the end // bound of the loop (RHS), and the fact that IV does not overflow (which is // checked above). if (!isLoopInvariant(RHS, L)) { const SCEV *MaxBECount = computeMaxBECountForLT( Start, Stride, RHS, getTypeSizeInBits(LHS->getType()), IsSigned); return ExitLimit(getCouldNotCompute() /* ExactNotTaken */, MaxBECount, false /*MaxOrZero*/, Predicates); } // If the backedge is taken at least once, then it will be taken // (End-Start)/Stride times (rounded up to a multiple of Stride), where Start // is the LHS value of the less-than comparison the first time it is evaluated // and End is the RHS. const SCEV *BECountIfBackedgeTaken = computeBECount(getMinusSCEV(End, Start), Stride, false); // If the loop entry is guarded by the result of the backedge test of the // first loop iteration, then we know the backedge will be taken at least // once and so the backedge taken count is as above. If not then we use the // expression (max(End,Start)-Start)/Stride to describe the backedge count, // as if the backedge is taken at least once max(End,Start) is End and so the // result is as above, and if not max(End,Start) is Start so we get a backedge // count of zero. const SCEV *BECount; if (isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride), RHS)) BECount = BECountIfBackedgeTaken; else { End = IsSigned ? getSMaxExpr(RHS, Start) : getUMaxExpr(RHS, Start); BECount = computeBECount(getMinusSCEV(End, Start), Stride, false); } const SCEV *MaxBECount; bool MaxOrZero = false; if (isa(BECount)) MaxBECount = BECount; else if (isa(BECountIfBackedgeTaken)) { // If we know exactly how many times the backedge will be taken if it's // taken at least once, then the backedge count will either be that or // zero. MaxBECount = BECountIfBackedgeTaken; MaxOrZero = true; } else { MaxBECount = computeMaxBECountForLT( Start, Stride, RHS, getTypeSizeInBits(LHS->getType()), IsSigned); } if (isa(MaxBECount) && !isa(BECount)) MaxBECount = getConstant(getUnsignedRangeMax(BECount)); return ExitLimit(BECount, MaxBECount, MaxOrZero, Predicates); } ScalarEvolution::ExitLimit ScalarEvolution::howManyGreaterThans(const SCEV *LHS, const SCEV *RHS, const Loop *L, bool IsSigned, bool ControlsExit, bool AllowPredicates) { SmallPtrSet Predicates; // We handle only IV > Invariant if (!isLoopInvariant(RHS, L)) return getCouldNotCompute(); const SCEVAddRecExpr *IV = dyn_cast(LHS); if (!IV && AllowPredicates) // Try to make this an AddRec using runtime tests, in the first X // iterations of this loop, where X is the SCEV expression found by the // algorithm below. IV = convertSCEVToAddRecWithPredicates(LHS, L, Predicates); // Avoid weird loops if (!IV || IV->getLoop() != L || !IV->isAffine()) return getCouldNotCompute(); bool NoWrap = ControlsExit && IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW); const SCEV *Stride = getNegativeSCEV(IV->getStepRecurrence(*this)); // Avoid negative or zero stride values if (!isKnownPositive(Stride)) return getCouldNotCompute(); // Avoid proven overflow cases: this will ensure that the backedge taken count // will not generate any unsigned overflow. Relaxed no-overflow conditions // exploit NoWrapFlags, allowing to optimize in presence of undefined // behaviors like the case of C language. if (!Stride->isOne() && doesIVOverflowOnGT(RHS, Stride, IsSigned, NoWrap)) return getCouldNotCompute(); ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; const SCEV *Start = IV->getStart(); const SCEV *End = RHS; if (!isLoopEntryGuardedByCond(L, Cond, getAddExpr(Start, Stride), RHS)) End = IsSigned ? getSMinExpr(RHS, Start) : getUMinExpr(RHS, Start); const SCEV *BECount = computeBECount(getMinusSCEV(Start, End), Stride, false); APInt MaxStart = IsSigned ? getSignedRangeMax(Start) : getUnsignedRangeMax(Start); APInt MinStride = IsSigned ? getSignedRangeMin(Stride) : getUnsignedRangeMin(Stride); unsigned BitWidth = getTypeSizeInBits(LHS->getType()); APInt Limit = IsSigned ? APInt::getSignedMinValue(BitWidth) + (MinStride - 1) : APInt::getMinValue(BitWidth) + (MinStride - 1); // Although End can be a MIN expression we estimate MinEnd considering only // the case End = RHS. This is safe because in the other case (Start - End) // is zero, leading to a zero maximum backedge taken count. APInt MinEnd = IsSigned ? APIntOps::smax(getSignedRangeMin(RHS), Limit) : APIntOps::umax(getUnsignedRangeMin(RHS), Limit); const SCEV *MaxBECount = getCouldNotCompute(); if (isa(BECount)) MaxBECount = BECount; else MaxBECount = computeBECount(getConstant(MaxStart - MinEnd), getConstant(MinStride), false); if (isa(MaxBECount)) MaxBECount = BECount; return ExitLimit(BECount, MaxBECount, false, Predicates); } const SCEV *SCEVAddRecExpr::getNumIterationsInRange(const ConstantRange &Range, ScalarEvolution &SE) const { if (Range.isFullSet()) // Infinite loop. return SE.getCouldNotCompute(); // If the start is a non-zero constant, shift the range to simplify things. if (const SCEVConstant *SC = dyn_cast(getStart())) if (!SC->getValue()->isZero()) { SmallVector Operands(op_begin(), op_end()); Operands[0] = SE.getZero(SC->getType()); const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop(), getNoWrapFlags(FlagNW)); if (const auto *ShiftedAddRec = dyn_cast(Shifted)) return ShiftedAddRec->getNumIterationsInRange( Range.subtract(SC->getAPInt()), SE); // This is strange and shouldn't happen. return SE.getCouldNotCompute(); } // The only time we can solve this is when we have all constant indices. // Otherwise, we cannot determine the overflow conditions. if (any_of(operands(), [](const SCEV *Op) { return !isa(Op); })) return SE.getCouldNotCompute(); // Okay at this point we know that all elements of the chrec are constants and // that the start element is zero. // First check to see if the range contains zero. If not, the first // iteration exits. unsigned BitWidth = SE.getTypeSizeInBits(getType()); if (!Range.contains(APInt(BitWidth, 0))) return SE.getZero(getType()); if (isAffine()) { // If this is an affine expression then we have this situation: // Solve {0,+,A} in Range === Ax in Range // We know that zero is in the range. If A is positive then we know that // the upper value of the range must be the first possible exit value. // If A is negative then the lower of the range is the last possible loop // value. Also note that we already checked for a full range. APInt A = cast(getOperand(1))->getAPInt(); APInt End = A.sge(1) ? (Range.getUpper() - 1) : Range.getLower(); // The exit value should be (End+A)/A. APInt ExitVal = (End + A).udiv(A); ConstantInt *ExitValue = ConstantInt::get(SE.getContext(), ExitVal); // Evaluate at the exit value. If we really did fall out of the valid // range, then we computed our trip count, otherwise wrap around or other // things must have happened. ConstantInt *Val = EvaluateConstantChrecAtConstant(this, ExitValue, SE); if (Range.contains(Val->getValue())) return SE.getCouldNotCompute(); // Something strange happened // Ensure that the previous value is in the range. This is a sanity check. assert(Range.contains( EvaluateConstantChrecAtConstant(this, ConstantInt::get(SE.getContext(), ExitVal - 1), SE)->getValue()) && "Linear scev computation is off in a bad way!"); return SE.getConstant(ExitValue); } else if (isQuadratic()) { // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of the // quadratic equation to solve it. To do this, we must frame our problem in // terms of figuring out when zero is crossed, instead of when // Range.getUpper() is crossed. SmallVector NewOps(op_begin(), op_end()); NewOps[0] = SE.getNegativeSCEV(SE.getConstant(Range.getUpper())); const SCEV *NewAddRec = SE.getAddRecExpr(NewOps, getLoop(), FlagAnyWrap); // Next, solve the constructed addrec if (auto Roots = SolveQuadraticEquation(cast(NewAddRec), SE)) { const SCEVConstant *R1 = Roots->first; const SCEVConstant *R2 = Roots->second; // Pick the smallest positive root value. if (ConstantInt *CB = dyn_cast(ConstantExpr::getICmp( ICmpInst::ICMP_ULT, R1->getValue(), R2->getValue()))) { if (!CB->getZExtValue()) std::swap(R1, R2); // R1 is the minimum root now. // Make sure the root is not off by one. The returned iteration should // not be in the range, but the previous one should be. When solving // for "X*X < 5", for example, we should not return a root of 2. ConstantInt *R1Val = EvaluateConstantChrecAtConstant(this, R1->getValue(), SE); if (Range.contains(R1Val->getValue())) { // The next iteration must be out of the range... ConstantInt *NextVal = ConstantInt::get(SE.getContext(), R1->getAPInt() + 1); R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE); if (!Range.contains(R1Val->getValue())) return SE.getConstant(NextVal); return SE.getCouldNotCompute(); // Something strange happened } // If R1 was not in the range, then it is a good return value. Make // sure that R1-1 WAS in the range though, just in case. ConstantInt *NextVal = ConstantInt::get(SE.getContext(), R1->getAPInt() - 1); R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE); if (Range.contains(R1Val->getValue())) return R1; return SE.getCouldNotCompute(); // Something strange happened } } } return SE.getCouldNotCompute(); } // Return true when S contains at least an undef value. static inline bool containsUndefs(const SCEV *S) { return SCEVExprContains(S, [](const SCEV *S) { if (const auto *SU = dyn_cast(S)) return isa(SU->getValue()); else if (const auto *SC = dyn_cast(S)) return isa(SC->getValue()); return false; }); } namespace { // Collect all steps of SCEV expressions. struct SCEVCollectStrides { ScalarEvolution &SE; SmallVectorImpl &Strides; SCEVCollectStrides(ScalarEvolution &SE, SmallVectorImpl &S) : SE(SE), Strides(S) {} bool follow(const SCEV *S) { if (const SCEVAddRecExpr *AR = dyn_cast(S)) Strides.push_back(AR->getStepRecurrence(SE)); return true; } bool isDone() const { return false; } }; // Collect all SCEVUnknown and SCEVMulExpr expressions. struct SCEVCollectTerms { SmallVectorImpl &Terms; SCEVCollectTerms(SmallVectorImpl &T) : Terms(T) {} bool follow(const SCEV *S) { if (isa(S) || isa(S) || isa(S)) { if (!containsUndefs(S)) Terms.push_back(S); // Stop recursion: once we collected a term, do not walk its operands. return false; } // Keep looking. return true; } bool isDone() const { return false; } }; // Check if a SCEV contains an AddRecExpr. struct SCEVHasAddRec { bool &ContainsAddRec; SCEVHasAddRec(bool &ContainsAddRec) : ContainsAddRec(ContainsAddRec) { ContainsAddRec = false; } bool follow(const SCEV *S) { if (isa(S)) { ContainsAddRec = true; // Stop recursion: once we collected a term, do not walk its operands. return false; } // Keep looking. return true; } bool isDone() const { return false; } }; // Find factors that are multiplied with an expression that (possibly as a // subexpression) contains an AddRecExpr. In the expression: // // 8 * (100 + %p * %q * (%a + {0, +, 1}_loop)) // // "%p * %q" are factors multiplied by the expression "(%a + {0, +, 1}_loop)" // that contains the AddRec {0, +, 1}_loop. %p * %q are likely to be array size // parameters as they form a product with an induction variable. // // This collector expects all array size parameters to be in the same MulExpr. // It might be necessary to later add support for collecting parameters that are // spread over different nested MulExpr. struct SCEVCollectAddRecMultiplies { SmallVectorImpl &Terms; ScalarEvolution &SE; SCEVCollectAddRecMultiplies(SmallVectorImpl &T, ScalarEvolution &SE) : Terms(T), SE(SE) {} bool follow(const SCEV *S) { if (auto *Mul = dyn_cast(S)) { bool HasAddRec = false; SmallVector Operands; for (auto Op : Mul->operands()) { const SCEVUnknown *Unknown = dyn_cast(Op); if (Unknown && !isa(Unknown->getValue())) { Operands.push_back(Op); } else if (Unknown) { HasAddRec = true; } else { bool ContainsAddRec; SCEVHasAddRec ContiansAddRec(ContainsAddRec); visitAll(Op, ContiansAddRec); HasAddRec |= ContainsAddRec; } } if (Operands.size() == 0) return true; if (!HasAddRec) return false; Terms.push_back(SE.getMulExpr(Operands)); // Stop recursion: once we collected a term, do not walk its operands. return false; } // Keep looking. return true; } bool isDone() const { return false; } }; } // end anonymous namespace /// Find parametric terms in this SCEVAddRecExpr. We first for parameters in /// two places: /// 1) The strides of AddRec expressions. /// 2) Unknowns that are multiplied with AddRec expressions. void ScalarEvolution::collectParametricTerms(const SCEV *Expr, SmallVectorImpl &Terms) { SmallVector Strides; SCEVCollectStrides StrideCollector(*this, Strides); visitAll(Expr, StrideCollector); DEBUG({ dbgs() << "Strides:\n"; for (const SCEV *S : Strides) dbgs() << *S << "\n"; }); for (const SCEV *S : Strides) { SCEVCollectTerms TermCollector(Terms); visitAll(S, TermCollector); } DEBUG({ dbgs() << "Terms:\n"; for (const SCEV *T : Terms) dbgs() << *T << "\n"; }); SCEVCollectAddRecMultiplies MulCollector(Terms, *this); visitAll(Expr, MulCollector); } static bool findArrayDimensionsRec(ScalarEvolution &SE, SmallVectorImpl &Terms, SmallVectorImpl &Sizes) { int Last = Terms.size() - 1; const SCEV *Step = Terms[Last]; // End of recursion. if (Last == 0) { if (const SCEVMulExpr *M = dyn_cast(Step)) { SmallVector Qs; for (const SCEV *Op : M->operands()) if (!isa(Op)) Qs.push_back(Op); Step = SE.getMulExpr(Qs); } Sizes.push_back(Step); return true; } for (const SCEV *&Term : Terms) { // Normalize the terms before the next call to findArrayDimensionsRec. const SCEV *Q, *R; SCEVDivision::divide(SE, Term, Step, &Q, &R); // Bail out when GCD does not evenly divide one of the terms. if (!R->isZero()) return false; Term = Q; } // Remove all SCEVConstants. Terms.erase( remove_if(Terms, [](const SCEV *E) { return isa(E); }), Terms.end()); if (Terms.size() > 0) if (!findArrayDimensionsRec(SE, Terms, Sizes)) return false; Sizes.push_back(Step); return true; } // Returns true when one of the SCEVs of Terms contains a SCEVUnknown parameter. static inline bool containsParameters(SmallVectorImpl &Terms) { for (const SCEV *T : Terms) if (SCEVExprContains(T, isa)) return true; return false; } // Return the number of product terms in S. static inline int numberOfTerms(const SCEV *S) { if (const SCEVMulExpr *Expr = dyn_cast(S)) return Expr->getNumOperands(); return 1; } static const SCEV *removeConstantFactors(ScalarEvolution &SE, const SCEV *T) { if (isa(T)) return nullptr; if (isa(T)) return T; if (const SCEVMulExpr *M = dyn_cast(T)) { SmallVector Factors; for (const SCEV *Op : M->operands()) if (!isa(Op)) Factors.push_back(Op); return SE.getMulExpr(Factors); } return T; } /// Return the size of an element read or written by Inst. const SCEV *ScalarEvolution::getElementSize(Instruction *Inst) { Type *Ty; if (StoreInst *Store = dyn_cast(Inst)) Ty = Store->getValueOperand()->getType(); else if (LoadInst *Load = dyn_cast(Inst)) Ty = Load->getType(); else return nullptr; Type *ETy = getEffectiveSCEVType(PointerType::getUnqual(Ty)); return getSizeOfExpr(ETy, Ty); } void ScalarEvolution::findArrayDimensions(SmallVectorImpl &Terms, SmallVectorImpl &Sizes, const SCEV *ElementSize) { if (Terms.size() < 1 || !ElementSize) return; // Early return when Terms do not contain parameters: we do not delinearize // non parametric SCEVs. if (!containsParameters(Terms)) return; DEBUG({ dbgs() << "Terms:\n"; for (const SCEV *T : Terms) dbgs() << *T << "\n"; }); // Remove duplicates. array_pod_sort(Terms.begin(), Terms.end()); Terms.erase(std::unique(Terms.begin(), Terms.end()), Terms.end()); // Put larger terms first. std::sort(Terms.begin(), Terms.end(), [](const SCEV *LHS, const SCEV *RHS) { return numberOfTerms(LHS) > numberOfTerms(RHS); }); // Try to divide all terms by the element size. If term is not divisible by // element size, proceed with the original term. for (const SCEV *&Term : Terms) { const SCEV *Q, *R; SCEVDivision::divide(*this, Term, ElementSize, &Q, &R); if (!Q->isZero()) Term = Q; } SmallVector NewTerms; // Remove constant factors. for (const SCEV *T : Terms) if (const SCEV *NewT = removeConstantFactors(*this, T)) NewTerms.push_back(NewT); DEBUG({ dbgs() << "Terms after sorting:\n"; for (const SCEV *T : NewTerms) dbgs() << *T << "\n"; }); if (NewTerms.empty() || !findArrayDimensionsRec(*this, NewTerms, Sizes)) { Sizes.clear(); return; } // The last element to be pushed into Sizes is the size of an element. Sizes.push_back(ElementSize); DEBUG({ dbgs() << "Sizes:\n"; for (const SCEV *S : Sizes) dbgs() << *S << "\n"; }); } void ScalarEvolution::computeAccessFunctions( const SCEV *Expr, SmallVectorImpl &Subscripts, SmallVectorImpl &Sizes) { // Early exit in case this SCEV is not an affine multivariate function. if (Sizes.empty()) return; if (auto *AR = dyn_cast(Expr)) if (!AR->isAffine()) return; const SCEV *Res = Expr; int Last = Sizes.size() - 1; for (int i = Last; i >= 0; i--) { const SCEV *Q, *R; SCEVDivision::divide(*this, Res, Sizes[i], &Q, &R); DEBUG({ dbgs() << "Res: " << *Res << "\n"; dbgs() << "Sizes[i]: " << *Sizes[i] << "\n"; dbgs() << "Res divided by Sizes[i]:\n"; dbgs() << "Quotient: " << *Q << "\n"; dbgs() << "Remainder: " << *R << "\n"; }); Res = Q; // Do not record the last subscript corresponding to the size of elements in // the array. if (i == Last) { // Bail out if the remainder is too complex. if (isa(R)) { Subscripts.clear(); Sizes.clear(); return; } continue; } // Record the access function for the current subscript. Subscripts.push_back(R); } // Also push in last position the remainder of the last division: it will be // the access function of the innermost dimension. Subscripts.push_back(Res); std::reverse(Subscripts.begin(), Subscripts.end()); DEBUG({ dbgs() << "Subscripts:\n"; for (const SCEV *S : Subscripts) dbgs() << *S << "\n"; }); } /// Splits the SCEV into two vectors of SCEVs representing the subscripts and /// sizes of an array access. Returns the remainder of the delinearization that /// is the offset start of the array. The SCEV->delinearize algorithm computes /// the multiples of SCEV coefficients: that is a pattern matching of sub /// expressions in the stride and base of a SCEV corresponding to the /// computation of a GCD (greatest common divisor) of base and stride. When /// SCEV->delinearize fails, it returns the SCEV unchanged. /// /// For example: when analyzing the memory access A[i][j][k] in this loop nest /// /// void foo(long n, long m, long o, double A[n][m][o]) { /// /// for (long i = 0; i < n; i++) /// for (long j = 0; j < m; j++) /// for (long k = 0; k < o; k++) /// A[i][j][k] = 1.0; /// } /// /// the delinearization input is the following AddRec SCEV: /// /// AddRec: {{{%A,+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k> /// /// From this SCEV, we are able to say that the base offset of the access is %A /// because it appears as an offset that does not divide any of the strides in /// the loops: /// /// CHECK: Base offset: %A /// /// and then SCEV->delinearize determines the size of some of the dimensions of /// the array as these are the multiples by which the strides are happening: /// /// CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes. /// /// Note that the outermost dimension remains of UnknownSize because there are /// no strides that would help identifying the size of the last dimension: when /// the array has been statically allocated, one could compute the size of that /// dimension by dividing the overall size of the array by the size of the known /// dimensions: %m * %o * 8. /// /// Finally delinearize provides the access functions for the array reference /// that does correspond to A[i][j][k] of the above C testcase: /// /// CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>] /// /// The testcases are checking the output of a function pass: /// DelinearizationPass that walks through all loads and stores of a function /// asking for the SCEV of the memory access with respect to all enclosing /// loops, calling SCEV->delinearize on that and printing the results. void ScalarEvolution::delinearize(const SCEV *Expr, SmallVectorImpl &Subscripts, SmallVectorImpl &Sizes, const SCEV *ElementSize) { // First step: collect parametric terms. SmallVector Terms; collectParametricTerms(Expr, Terms); if (Terms.empty()) return; // Second step: find subscript sizes. findArrayDimensions(Terms, Sizes, ElementSize); if (Sizes.empty()) return; // Third step: compute the access functions for each subscript. computeAccessFunctions(Expr, Subscripts, Sizes); if (Subscripts.empty()) return; DEBUG({ dbgs() << "succeeded to delinearize " << *Expr << "\n"; dbgs() << "ArrayDecl[UnknownSize]"; for (const SCEV *S : Sizes) dbgs() << "[" << *S << "]"; dbgs() << "\nArrayRef"; for (const SCEV *S : Subscripts) dbgs() << "[" << *S << "]"; dbgs() << "\n"; }); } //===----------------------------------------------------------------------===// // SCEVCallbackVH Class Implementation //===----------------------------------------------------------------------===// void ScalarEvolution::SCEVCallbackVH::deleted() { assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!"); if (PHINode *PN = dyn_cast(getValPtr())) SE->ConstantEvolutionLoopExitValue.erase(PN); SE->eraseValueFromMap(getValPtr()); // this now dangles! } void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) { assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!"); // Forget all the expressions associated with users of the old value, // so that future queries will recompute the expressions using the new // value. Value *Old = getValPtr(); SmallVector Worklist(Old->user_begin(), Old->user_end()); SmallPtrSet Visited; while (!Worklist.empty()) { User *U = Worklist.pop_back_val(); // Deleting the Old value will cause this to dangle. Postpone // that until everything else is done. if (U == Old) continue; if (!Visited.insert(U).second) continue; if (PHINode *PN = dyn_cast(U)) SE->ConstantEvolutionLoopExitValue.erase(PN); SE->eraseValueFromMap(U); Worklist.insert(Worklist.end(), U->user_begin(), U->user_end()); } // Delete the Old value. if (PHINode *PN = dyn_cast(Old)) SE->ConstantEvolutionLoopExitValue.erase(PN); SE->eraseValueFromMap(Old); // this now dangles! } ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se) : CallbackVH(V), SE(se) {} //===----------------------------------------------------------------------===// // ScalarEvolution Class Implementation //===----------------------------------------------------------------------===// ScalarEvolution::ScalarEvolution(Function &F, TargetLibraryInfo &TLI, AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI) : F(F), TLI(TLI), AC(AC), DT(DT), LI(LI), CouldNotCompute(new SCEVCouldNotCompute()), ValuesAtScopes(64), LoopDispositions(64), BlockDispositions(64) { // To use guards for proving predicates, we need to scan every instruction in // relevant basic blocks, and not just terminators. Doing this is a waste of // time if the IR does not actually contain any calls to // @llvm.experimental.guard, so do a quick check and remember this beforehand. // // This pessimizes the case where a pass that preserves ScalarEvolution wants // to _add_ guards to the module when there weren't any before, and wants // ScalarEvolution to optimize based on those guards. For now we prefer to be // efficient in lieu of being smart in that rather obscure case. auto *GuardDecl = F.getParent()->getFunction( Intrinsic::getName(Intrinsic::experimental_guard)); HasGuards = GuardDecl && !GuardDecl->use_empty(); } ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg) : F(Arg.F), HasGuards(Arg.HasGuards), TLI(Arg.TLI), AC(Arg.AC), DT(Arg.DT), LI(Arg.LI), CouldNotCompute(std::move(Arg.CouldNotCompute)), ValueExprMap(std::move(Arg.ValueExprMap)), PendingLoopPredicates(std::move(Arg.PendingLoopPredicates)), MinTrailingZerosCache(std::move(Arg.MinTrailingZerosCache)), BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)), PredicatedBackedgeTakenCounts( std::move(Arg.PredicatedBackedgeTakenCounts)), ConstantEvolutionLoopExitValue( std::move(Arg.ConstantEvolutionLoopExitValue)), ValuesAtScopes(std::move(Arg.ValuesAtScopes)), LoopDispositions(std::move(Arg.LoopDispositions)), LoopPropertiesCache(std::move(Arg.LoopPropertiesCache)), BlockDispositions(std::move(Arg.BlockDispositions)), UnsignedRanges(std::move(Arg.UnsignedRanges)), SignedRanges(std::move(Arg.SignedRanges)), UniqueSCEVs(std::move(Arg.UniqueSCEVs)), UniquePreds(std::move(Arg.UniquePreds)), SCEVAllocator(std::move(Arg.SCEVAllocator)), LoopUsers(std::move(Arg.LoopUsers)), PredicatedSCEVRewrites(std::move(Arg.PredicatedSCEVRewrites)), FirstUnknown(Arg.FirstUnknown) { Arg.FirstUnknown = nullptr; } ScalarEvolution::~ScalarEvolution() { // Iterate through all the SCEVUnknown instances and call their // destructors, so that they release their references to their values. for (SCEVUnknown *U = FirstUnknown; U;) { SCEVUnknown *Tmp = U; U = U->Next; Tmp->~SCEVUnknown(); } FirstUnknown = nullptr; ExprValueMap.clear(); ValueExprMap.clear(); HasRecMap.clear(); // Free any extra memory created for ExitNotTakenInfo in the unlikely event // that a loop had multiple computable exits. for (auto &BTCI : BackedgeTakenCounts) BTCI.second.clear(); for (auto &BTCI : PredicatedBackedgeTakenCounts) BTCI.second.clear(); assert(PendingLoopPredicates.empty() && "isImpliedCond garbage"); assert(!WalkingBEDominatingConds && "isLoopBackedgeGuardedByCond garbage!"); assert(!ProvingSplitPredicate && "ProvingSplitPredicate garbage!"); } bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) { return !isa(getBackedgeTakenCount(L)); } static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE, const Loop *L) { // Print all inner loops first for (Loop *I : *L) PrintLoopInfo(OS, SE, I); OS << "Loop "; L->getHeader()->printAsOperand(OS, /*PrintType=*/false); OS << ": "; SmallVector ExitBlocks; L->getExitBlocks(ExitBlocks); if (ExitBlocks.size() != 1) OS << " "; if (SE->hasLoopInvariantBackedgeTakenCount(L)) { OS << "backedge-taken count is " << *SE->getBackedgeTakenCount(L); } else { OS << "Unpredictable backedge-taken count. "; } OS << "\n" "Loop "; L->getHeader()->printAsOperand(OS, /*PrintType=*/false); OS << ": "; if (!isa(SE->getMaxBackedgeTakenCount(L))) { OS << "max backedge-taken count is " << *SE->getMaxBackedgeTakenCount(L); if (SE->isBackedgeTakenCountMaxOrZero(L)) OS << ", actual taken count either this or zero."; } else { OS << "Unpredictable max backedge-taken count. "; } OS << "\n" "Loop "; L->getHeader()->printAsOperand(OS, /*PrintType=*/false); OS << ": "; SCEVUnionPredicate Pred; auto PBT = SE->getPredicatedBackedgeTakenCount(L, Pred); if (!isa(PBT)) { OS << "Predicated backedge-taken count is " << *PBT << "\n"; OS << " Predicates:\n"; Pred.print(OS, 4); } else { OS << "Unpredictable predicated backedge-taken count. "; } OS << "\n"; if (SE->hasLoopInvariantBackedgeTakenCount(L)) { OS << "Loop "; L->getHeader()->printAsOperand(OS, /*PrintType=*/false); OS << ": "; OS << "Trip multiple is " << SE->getSmallConstantTripMultiple(L) << "\n"; } } static StringRef loopDispositionToStr(ScalarEvolution::LoopDisposition LD) { switch (LD) { case ScalarEvolution::LoopVariant: return "Variant"; case ScalarEvolution::LoopInvariant: return "Invariant"; case ScalarEvolution::LoopComputable: return "Computable"; } llvm_unreachable("Unknown ScalarEvolution::LoopDisposition kind!"); } void ScalarEvolution::print(raw_ostream &OS) const { // ScalarEvolution's implementation of the print method is to print // out SCEV values of all instructions that are interesting. Doing // this potentially causes it to create new SCEV objects though, // which technically conflicts with the const qualifier. This isn't // observable from outside the class though, so casting away the // const isn't dangerous. ScalarEvolution &SE = *const_cast(this); OS << "Classifying expressions for: "; F.printAsOperand(OS, /*PrintType=*/false); OS << "\n"; for (Instruction &I : instructions(F)) if (isSCEVable(I.getType()) && !isa(I)) { OS << I << '\n'; OS << " --> "; const SCEV *SV = SE.getSCEV(&I); SV->print(OS); if (!isa(SV)) { OS << " U: "; SE.getUnsignedRange(SV).print(OS); OS << " S: "; SE.getSignedRange(SV).print(OS); } const Loop *L = LI.getLoopFor(I.getParent()); const SCEV *AtUse = SE.getSCEVAtScope(SV, L); if (AtUse != SV) { OS << " --> "; AtUse->print(OS); if (!isa(AtUse)) { OS << " U: "; SE.getUnsignedRange(AtUse).print(OS); OS << " S: "; SE.getSignedRange(AtUse).print(OS); } } if (L) { OS << "\t\t" "Exits: "; const SCEV *ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop()); if (!SE.isLoopInvariant(ExitValue, L)) { OS << "<>"; } else { OS << *ExitValue; } bool First = true; for (auto *Iter = L; Iter; Iter = Iter->getParentLoop()) { if (First) { OS << "\t\t" "LoopDispositions: { "; First = false; } else { OS << ", "; } Iter->getHeader()->printAsOperand(OS, /*PrintType=*/false); OS << ": " << loopDispositionToStr(SE.getLoopDisposition(SV, Iter)); } for (auto *InnerL : depth_first(L)) { if (InnerL == L) continue; if (First) { OS << "\t\t" "LoopDispositions: { "; First = false; } else { OS << ", "; } InnerL->getHeader()->printAsOperand(OS, /*PrintType=*/false); OS << ": " << loopDispositionToStr(SE.getLoopDisposition(SV, InnerL)); } OS << " }"; } OS << "\n"; } OS << "Determining loop execution counts for: "; F.printAsOperand(OS, /*PrintType=*/false); OS << "\n"; for (Loop *I : LI) PrintLoopInfo(OS, &SE, I); } ScalarEvolution::LoopDisposition ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) { auto &Values = LoopDispositions[S]; for (auto &V : Values) { if (V.getPointer() == L) return V.getInt(); } Values.emplace_back(L, LoopVariant); LoopDisposition D = computeLoopDisposition(S, L); auto &Values2 = LoopDispositions[S]; for (auto &V : make_range(Values2.rbegin(), Values2.rend())) { if (V.getPointer() == L) { V.setInt(D); break; } } return D; } ScalarEvolution::LoopDisposition ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) { switch (static_cast(S->getSCEVType())) { case scConstant: return LoopInvariant; case scTruncate: case scZeroExtend: case scSignExtend: return getLoopDisposition(cast(S)->getOperand(), L); case scAddRecExpr: { const SCEVAddRecExpr *AR = cast(S); // If L is the addrec's loop, it's computable. if (AR->getLoop() == L) return LoopComputable; // Add recurrences are never invariant in the function-body (null loop). if (!L) return LoopVariant; // Everything that is not defined at loop entry is variant. if (DT.dominates(L->getHeader(), AR->getLoop()->getHeader())) return LoopVariant; assert(!L->contains(AR->getLoop()) && "Containing loop's header does not" " dominate the contained loop's header?"); // This recurrence is invariant w.r.t. L if AR's loop contains L. if (AR->getLoop()->contains(L)) return LoopInvariant; // This recurrence is variant w.r.t. L if any of its operands // are variant. for (auto *Op : AR->operands()) if (!isLoopInvariant(Op, L)) return LoopVariant; // Otherwise it's loop-invariant. return LoopInvariant; } case scAddExpr: case scMulExpr: case scUMaxExpr: case scSMaxExpr: { bool HasVarying = false; for (auto *Op : cast(S)->operands()) { LoopDisposition D = getLoopDisposition(Op, L); if (D == LoopVariant) return LoopVariant; if (D == LoopComputable) HasVarying = true; } return HasVarying ? LoopComputable : LoopInvariant; } case scUDivExpr: { const SCEVUDivExpr *UDiv = cast(S); LoopDisposition LD = getLoopDisposition(UDiv->getLHS(), L); if (LD == LoopVariant) return LoopVariant; LoopDisposition RD = getLoopDisposition(UDiv->getRHS(), L); if (RD == LoopVariant) return LoopVariant; return (LD == LoopInvariant && RD == LoopInvariant) ? LoopInvariant : LoopComputable; } case scUnknown: // All non-instruction values are loop invariant. All instructions are loop // invariant if they are not contained in the specified loop. // Instructions are never considered invariant in the function body // (null loop) because they are defined within the "loop". if (auto *I = dyn_cast(cast(S)->getValue())) return (L && !L->contains(I)) ? LoopInvariant : LoopVariant; return LoopInvariant; case scCouldNotCompute: llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); } llvm_unreachable("Unknown SCEV kind!"); } bool ScalarEvolution::isLoopInvariant(const SCEV *S, const Loop *L) { return getLoopDisposition(S, L) == LoopInvariant; } bool ScalarEvolution::hasComputableLoopEvolution(const SCEV *S, const Loop *L) { return getLoopDisposition(S, L) == LoopComputable; } ScalarEvolution::BlockDisposition ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) { auto &Values = BlockDispositions[S]; for (auto &V : Values) { if (V.getPointer() == BB) return V.getInt(); } Values.emplace_back(BB, DoesNotDominateBlock); BlockDisposition D = computeBlockDisposition(S, BB); auto &Values2 = BlockDispositions[S]; for (auto &V : make_range(Values2.rbegin(), Values2.rend())) { if (V.getPointer() == BB) { V.setInt(D); break; } } return D; } ScalarEvolution::BlockDisposition ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) { switch (static_cast(S->getSCEVType())) { case scConstant: return ProperlyDominatesBlock; case scTruncate: case scZeroExtend: case scSignExtend: return getBlockDisposition(cast(S)->getOperand(), BB); case scAddRecExpr: { // This uses a "dominates" query instead of "properly dominates" query // to test for proper dominance too, because the instruction which // produces the addrec's value is a PHI, and a PHI effectively properly // dominates its entire containing block. const SCEVAddRecExpr *AR = cast(S); if (!DT.dominates(AR->getLoop()->getHeader(), BB)) return DoesNotDominateBlock; // Fall through into SCEVNAryExpr handling. LLVM_FALLTHROUGH; } case scAddExpr: case scMulExpr: case scUMaxExpr: case scSMaxExpr: { const SCEVNAryExpr *NAry = cast(S); bool Proper = true; for (const SCEV *NAryOp : NAry->operands()) { BlockDisposition D = getBlockDisposition(NAryOp, BB); if (D == DoesNotDominateBlock) return DoesNotDominateBlock; if (D == DominatesBlock) Proper = false; } return Proper ? ProperlyDominatesBlock : DominatesBlock; } case scUDivExpr: { const SCEVUDivExpr *UDiv = cast(S); const SCEV *LHS = UDiv->getLHS(), *RHS = UDiv->getRHS(); BlockDisposition LD = getBlockDisposition(LHS, BB); if (LD == DoesNotDominateBlock) return DoesNotDominateBlock; BlockDisposition RD = getBlockDisposition(RHS, BB); if (RD == DoesNotDominateBlock) return DoesNotDominateBlock; return (LD == ProperlyDominatesBlock && RD == ProperlyDominatesBlock) ? ProperlyDominatesBlock : DominatesBlock; } case scUnknown: if (Instruction *I = dyn_cast(cast(S)->getValue())) { if (I->getParent() == BB) return DominatesBlock; if (DT.properlyDominates(I->getParent(), BB)) return ProperlyDominatesBlock; return DoesNotDominateBlock; } return ProperlyDominatesBlock; case scCouldNotCompute: llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); } llvm_unreachable("Unknown SCEV kind!"); } bool ScalarEvolution::dominates(const SCEV *S, const BasicBlock *BB) { return getBlockDisposition(S, BB) >= DominatesBlock; } bool ScalarEvolution::properlyDominates(const SCEV *S, const BasicBlock *BB) { return getBlockDisposition(S, BB) == ProperlyDominatesBlock; } bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const { return SCEVExprContains(S, [&](const SCEV *Expr) { return Expr == Op; }); } bool ScalarEvolution::ExitLimit::hasOperand(const SCEV *S) const { auto IsS = [&](const SCEV *X) { return S == X; }; auto ContainsS = [&](const SCEV *X) { return !isa(X) && SCEVExprContains(X, IsS); }; return ContainsS(ExactNotTaken) || ContainsS(MaxNotTaken); } void ScalarEvolution::forgetMemoizedResults(const SCEV *S) { ValuesAtScopes.erase(S); LoopDispositions.erase(S); BlockDispositions.erase(S); UnsignedRanges.erase(S); SignedRanges.erase(S); ExprValueMap.erase(S); HasRecMap.erase(S); MinTrailingZerosCache.erase(S); for (auto I = PredicatedSCEVRewrites.begin(); I != PredicatedSCEVRewrites.end();) { std::pair Entry = I->first; if (Entry.first == S) PredicatedSCEVRewrites.erase(I++); else ++I; } auto RemoveSCEVFromBackedgeMap = [S, this](DenseMap &Map) { for (auto I = Map.begin(), E = Map.end(); I != E;) { BackedgeTakenInfo &BEInfo = I->second; if (BEInfo.hasOperand(S, this)) { BEInfo.clear(); Map.erase(I++); } else ++I; } }; RemoveSCEVFromBackedgeMap(BackedgeTakenCounts); RemoveSCEVFromBackedgeMap(PredicatedBackedgeTakenCounts); } void ScalarEvolution::addToLoopUseLists(const SCEV *S) { struct FindUsedLoops { SmallPtrSet LoopsUsed; bool follow(const SCEV *S) { if (auto *AR = dyn_cast(S)) LoopsUsed.insert(AR->getLoop()); return true; } bool isDone() const { return false; } }; FindUsedLoops F; SCEVTraversal(F).visitAll(S); for (auto *L : F.LoopsUsed) LoopUsers[L].push_back(S); } void ScalarEvolution::verify() const { ScalarEvolution &SE = *const_cast(this); ScalarEvolution SE2(F, TLI, AC, DT, LI); SmallVector LoopStack(LI.begin(), LI.end()); // Map's SCEV expressions from one ScalarEvolution "universe" to another. struct SCEVMapper : public SCEVRewriteVisitor { SCEVMapper(ScalarEvolution &SE) : SCEVRewriteVisitor(SE) {} const SCEV *visitConstant(const SCEVConstant *Constant) { return SE.getConstant(Constant->getAPInt()); } const SCEV *visitUnknown(const SCEVUnknown *Expr) { return SE.getUnknown(Expr->getValue()); } const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) { return SE.getCouldNotCompute(); } }; SCEVMapper SCM(SE2); while (!LoopStack.empty()) { auto *L = LoopStack.pop_back_val(); LoopStack.insert(LoopStack.end(), L->begin(), L->end()); auto *CurBECount = SCM.visit( const_cast(this)->getBackedgeTakenCount(L)); auto *NewBECount = SE2.getBackedgeTakenCount(L); if (CurBECount == SE2.getCouldNotCompute() || NewBECount == SE2.getCouldNotCompute()) { // NB! This situation is legal, but is very suspicious -- whatever pass // change the loop to make a trip count go from could not compute to // computable or vice-versa *should have* invalidated SCEV. However, we // choose not to assert here (for now) since we don't want false // positives. continue; } if (containsUndefs(CurBECount) || containsUndefs(NewBECount)) { // SCEV treats "undef" as an unknown but consistent value (i.e. it does // not propagate undef aggressively). This means we can (and do) fail // verification in cases where a transform makes the trip count of a loop // go from "undef" to "undef+1" (say). The transform is fine, since in // both cases the loop iterates "undef" times, but SCEV thinks we // increased the trip count of the loop by 1 incorrectly. continue; } if (SE.getTypeSizeInBits(CurBECount->getType()) > SE.getTypeSizeInBits(NewBECount->getType())) NewBECount = SE2.getZeroExtendExpr(NewBECount, CurBECount->getType()); else if (SE.getTypeSizeInBits(CurBECount->getType()) < SE.getTypeSizeInBits(NewBECount->getType())) CurBECount = SE2.getZeroExtendExpr(CurBECount, NewBECount->getType()); auto *ConstantDelta = dyn_cast(SE2.getMinusSCEV(CurBECount, NewBECount)); if (ConstantDelta && ConstantDelta->getAPInt() != 0) { dbgs() << "Trip Count Changed!\n"; dbgs() << "Old: " << *CurBECount << "\n"; dbgs() << "New: " << *NewBECount << "\n"; dbgs() << "Delta: " << *ConstantDelta << "\n"; std::abort(); } } } bool ScalarEvolution::invalidate( Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv) { // Invalidate the ScalarEvolution object whenever it isn't preserved or one // of its dependencies is invalidated. auto PAC = PA.getChecker(); return !(PAC.preserved() || PAC.preservedSet>()) || Inv.invalidate(F, PA) || Inv.invalidate(F, PA) || Inv.invalidate(F, PA); } AnalysisKey ScalarEvolutionAnalysis::Key; ScalarEvolution ScalarEvolutionAnalysis::run(Function &F, FunctionAnalysisManager &AM) { return ScalarEvolution(F, AM.getResult(F), AM.getResult(F), AM.getResult(F), AM.getResult(F)); } PreservedAnalyses ScalarEvolutionPrinterPass::run(Function &F, FunctionAnalysisManager &AM) { AM.getResult(F).print(OS); return PreservedAnalyses::all(); } INITIALIZE_PASS_BEGIN(ScalarEvolutionWrapperPass, "scalar-evolution", "Scalar Evolution Analysis", false, true) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(ScalarEvolutionWrapperPass, "scalar-evolution", "Scalar Evolution Analysis", false, true) char ScalarEvolutionWrapperPass::ID = 0; ScalarEvolutionWrapperPass::ScalarEvolutionWrapperPass() : FunctionPass(ID) { initializeScalarEvolutionWrapperPassPass(*PassRegistry::getPassRegistry()); } bool ScalarEvolutionWrapperPass::runOnFunction(Function &F) { SE.reset(new ScalarEvolution( F, getAnalysis().getTLI(), getAnalysis().getAssumptionCache(F), getAnalysis().getDomTree(), getAnalysis().getLoopInfo())); return false; } void ScalarEvolutionWrapperPass::releaseMemory() { SE.reset(); } void ScalarEvolutionWrapperPass::print(raw_ostream &OS, const Module *) const { SE->print(OS); } void ScalarEvolutionWrapperPass::verifyAnalysis() const { if (!VerifySCEV) return; SE->verify(); } void ScalarEvolutionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequiredTransitive(); AU.addRequiredTransitive(); AU.addRequiredTransitive(); AU.addRequiredTransitive(); } const SCEVPredicate *ScalarEvolution::getEqualPredicate(const SCEV *LHS, const SCEV *RHS) { FoldingSetNodeID ID; assert(LHS->getType() == RHS->getType() && "Type mismatch between LHS and RHS"); // Unique this node based on the arguments ID.AddInteger(SCEVPredicate::P_Equal); ID.AddPointer(LHS); ID.AddPointer(RHS); void *IP = nullptr; if (const auto *S = UniquePreds.FindNodeOrInsertPos(ID, IP)) return S; SCEVEqualPredicate *Eq = new (SCEVAllocator) SCEVEqualPredicate(ID.Intern(SCEVAllocator), LHS, RHS); UniquePreds.InsertNode(Eq, IP); return Eq; } const SCEVPredicate *ScalarEvolution::getWrapPredicate( const SCEVAddRecExpr *AR, SCEVWrapPredicate::IncrementWrapFlags AddedFlags) { FoldingSetNodeID ID; // Unique this node based on the arguments ID.AddInteger(SCEVPredicate::P_Wrap); ID.AddPointer(AR); ID.AddInteger(AddedFlags); void *IP = nullptr; if (const auto *S = UniquePreds.FindNodeOrInsertPos(ID, IP)) return S; auto *OF = new (SCEVAllocator) SCEVWrapPredicate(ID.Intern(SCEVAllocator), AR, AddedFlags); UniquePreds.InsertNode(OF, IP); return OF; } namespace { class SCEVPredicateRewriter : public SCEVRewriteVisitor { public: /// Rewrites \p S in the context of a loop L and the SCEV predication /// infrastructure. /// /// If \p Pred is non-null, the SCEV expression is rewritten to respect the /// equivalences present in \p Pred. /// /// If \p NewPreds is non-null, rewrite is free to add further predicates to /// \p NewPreds such that the result will be an AddRecExpr. static const SCEV *rewrite(const SCEV *S, const Loop *L, ScalarEvolution &SE, SmallPtrSetImpl *NewPreds, SCEVUnionPredicate *Pred) { SCEVPredicateRewriter Rewriter(L, SE, NewPreds, Pred); return Rewriter.visit(S); } const SCEV *visitUnknown(const SCEVUnknown *Expr) { if (Pred) { auto ExprPreds = Pred->getPredicatesForExpr(Expr); for (auto *Pred : ExprPreds) if (const auto *IPred = dyn_cast(Pred)) if (IPred->getLHS() == Expr) return IPred->getRHS(); } return convertToAddRecWithPreds(Expr); } const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { const SCEV *Operand = visit(Expr->getOperand()); const SCEVAddRecExpr *AR = dyn_cast(Operand); if (AR && AR->getLoop() == L && AR->isAffine()) { // This couldn't be folded because the operand didn't have the nuw // flag. Add the nusw flag as an assumption that we could make. const SCEV *Step = AR->getStepRecurrence(SE); Type *Ty = Expr->getType(); if (addOverflowAssumption(AR, SCEVWrapPredicate::IncrementNUSW)) return SE.getAddRecExpr(SE.getZeroExtendExpr(AR->getStart(), Ty), SE.getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); } return SE.getZeroExtendExpr(Operand, Expr->getType()); } const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { const SCEV *Operand = visit(Expr->getOperand()); const SCEVAddRecExpr *AR = dyn_cast(Operand); if (AR && AR->getLoop() == L && AR->isAffine()) { // This couldn't be folded because the operand didn't have the nsw // flag. Add the nssw flag as an assumption that we could make. const SCEV *Step = AR->getStepRecurrence(SE); Type *Ty = Expr->getType(); if (addOverflowAssumption(AR, SCEVWrapPredicate::IncrementNSSW)) return SE.getAddRecExpr(SE.getSignExtendExpr(AR->getStart(), Ty), SE.getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); } return SE.getSignExtendExpr(Operand, Expr->getType()); } private: explicit SCEVPredicateRewriter(const Loop *L, ScalarEvolution &SE, SmallPtrSetImpl *NewPreds, SCEVUnionPredicate *Pred) : SCEVRewriteVisitor(SE), NewPreds(NewPreds), Pred(Pred), L(L) {} bool addOverflowAssumption(const SCEVPredicate *P) { if (!NewPreds) { // Check if we've already made this assumption. return Pred && Pred->implies(P); } NewPreds->insert(P); return true; } bool addOverflowAssumption(const SCEVAddRecExpr *AR, SCEVWrapPredicate::IncrementWrapFlags AddedFlags) { auto *A = SE.getWrapPredicate(AR, AddedFlags); return addOverflowAssumption(A); } // If \p Expr represents a PHINode, we try to see if it can be represented // as an AddRec, possibly under a predicate (PHISCEVPred). If it is possible // to add this predicate as a runtime overflow check, we return the AddRec. // If \p Expr does not meet these conditions (is not a PHI node, or we // couldn't create an AddRec for it, or couldn't add the predicate), we just // return \p Expr. const SCEV *convertToAddRecWithPreds(const SCEVUnknown *Expr) { + if (!VersionUnknown) + return Expr; if (!isa(Expr->getValue())) return Expr; Optional>> PredicatedRewrite = SE.createAddRecFromPHIWithCasts(Expr); if (!PredicatedRewrite) return Expr; for (auto *P : PredicatedRewrite->second){ if (!addOverflowAssumption(P)) return Expr; } return PredicatedRewrite->first; } SmallPtrSetImpl *NewPreds; SCEVUnionPredicate *Pred; const Loop *L; }; } // end anonymous namespace const SCEV *ScalarEvolution::rewriteUsingPredicate(const SCEV *S, const Loop *L, SCEVUnionPredicate &Preds) { return SCEVPredicateRewriter::rewrite(S, L, *this, nullptr, &Preds); } const SCEVAddRecExpr *ScalarEvolution::convertSCEVToAddRecWithPredicates( const SCEV *S, const Loop *L, SmallPtrSetImpl &Preds) { SmallPtrSet TransformPreds; S = SCEVPredicateRewriter::rewrite(S, L, *this, &TransformPreds, nullptr); auto *AddRec = dyn_cast(S); if (!AddRec) return nullptr; // Since the transformation was successful, we can now transfer the SCEV // predicates. for (auto *P : TransformPreds) Preds.insert(P); return AddRec; } /// SCEV predicates SCEVPredicate::SCEVPredicate(const FoldingSetNodeIDRef ID, SCEVPredicateKind Kind) : FastID(ID), Kind(Kind) {} SCEVEqualPredicate::SCEVEqualPredicate(const FoldingSetNodeIDRef ID, const SCEV *LHS, const SCEV *RHS) : SCEVPredicate(ID, P_Equal), LHS(LHS), RHS(RHS) { assert(LHS->getType() == RHS->getType() && "LHS and RHS types don't match"); assert(LHS != RHS && "LHS and RHS are the same SCEV"); } bool SCEVEqualPredicate::implies(const SCEVPredicate *N) const { const auto *Op = dyn_cast(N); if (!Op) return false; return Op->LHS == LHS && Op->RHS == RHS; } bool SCEVEqualPredicate::isAlwaysTrue() const { return false; } const SCEV *SCEVEqualPredicate::getExpr() const { return LHS; } void SCEVEqualPredicate::print(raw_ostream &OS, unsigned Depth) const { OS.indent(Depth) << "Equal predicate: " << *LHS << " == " << *RHS << "\n"; } SCEVWrapPredicate::SCEVWrapPredicate(const FoldingSetNodeIDRef ID, const SCEVAddRecExpr *AR, IncrementWrapFlags Flags) : SCEVPredicate(ID, P_Wrap), AR(AR), Flags(Flags) {} const SCEV *SCEVWrapPredicate::getExpr() const { return AR; } bool SCEVWrapPredicate::implies(const SCEVPredicate *N) const { const auto *Op = dyn_cast(N); return Op && Op->AR == AR && setFlags(Flags, Op->Flags) == Flags; } bool SCEVWrapPredicate::isAlwaysTrue() const { SCEV::NoWrapFlags ScevFlags = AR->getNoWrapFlags(); IncrementWrapFlags IFlags = Flags; if (ScalarEvolution::setFlags(ScevFlags, SCEV::FlagNSW) == ScevFlags) IFlags = clearFlags(IFlags, IncrementNSSW); return IFlags == IncrementAnyWrap; } void SCEVWrapPredicate::print(raw_ostream &OS, unsigned Depth) const { OS.indent(Depth) << *getExpr() << " Added Flags: "; if (SCEVWrapPredicate::IncrementNUSW & getFlags()) OS << ""; if (SCEVWrapPredicate::IncrementNSSW & getFlags()) OS << ""; OS << "\n"; } SCEVWrapPredicate::IncrementWrapFlags SCEVWrapPredicate::getImpliedFlags(const SCEVAddRecExpr *AR, ScalarEvolution &SE) { IncrementWrapFlags ImpliedFlags = IncrementAnyWrap; SCEV::NoWrapFlags StaticFlags = AR->getNoWrapFlags(); // We can safely transfer the NSW flag as NSSW. if (ScalarEvolution::setFlags(StaticFlags, SCEV::FlagNSW) == StaticFlags) ImpliedFlags = IncrementNSSW; if (ScalarEvolution::setFlags(StaticFlags, SCEV::FlagNUW) == StaticFlags) { // If the increment is positive, the SCEV NUW flag will also imply the // WrapPredicate NUSW flag. if (const auto *Step = dyn_cast(AR->getStepRecurrence(SE))) if (Step->getValue()->getValue().isNonNegative()) ImpliedFlags = setFlags(ImpliedFlags, IncrementNUSW); } return ImpliedFlags; } /// Union predicates don't get cached so create a dummy set ID for it. SCEVUnionPredicate::SCEVUnionPredicate() : SCEVPredicate(FoldingSetNodeIDRef(nullptr, 0), P_Union) {} bool SCEVUnionPredicate::isAlwaysTrue() const { return all_of(Preds, [](const SCEVPredicate *I) { return I->isAlwaysTrue(); }); } ArrayRef SCEVUnionPredicate::getPredicatesForExpr(const SCEV *Expr) { auto I = SCEVToPreds.find(Expr); if (I == SCEVToPreds.end()) return ArrayRef(); return I->second; } bool SCEVUnionPredicate::implies(const SCEVPredicate *N) const { if (const auto *Set = dyn_cast(N)) return all_of(Set->Preds, [this](const SCEVPredicate *I) { return this->implies(I); }); auto ScevPredsIt = SCEVToPreds.find(N->getExpr()); if (ScevPredsIt == SCEVToPreds.end()) return false; auto &SCEVPreds = ScevPredsIt->second; return any_of(SCEVPreds, [N](const SCEVPredicate *I) { return I->implies(N); }); } const SCEV *SCEVUnionPredicate::getExpr() const { return nullptr; } void SCEVUnionPredicate::print(raw_ostream &OS, unsigned Depth) const { for (auto Pred : Preds) Pred->print(OS, Depth); } void SCEVUnionPredicate::add(const SCEVPredicate *N) { if (const auto *Set = dyn_cast(N)) { for (auto Pred : Set->Preds) add(Pred); return; } if (implies(N)) return; const SCEV *Key = N->getExpr(); assert(Key && "Only SCEVUnionPredicate doesn't have an " " associated expression!"); SCEVToPreds[Key].push_back(N); Preds.push_back(N); } PredicatedScalarEvolution::PredicatedScalarEvolution(ScalarEvolution &SE, Loop &L) : SE(SE), L(L) {} const SCEV *PredicatedScalarEvolution::getSCEV(Value *V) { const SCEV *Expr = SE.getSCEV(V); RewriteEntry &Entry = RewriteMap[Expr]; // If we already have an entry and the version matches, return it. if (Entry.second && Generation == Entry.first) return Entry.second; // We found an entry but it's stale. Rewrite the stale entry // according to the current predicate. if (Entry.second) Expr = Entry.second; const SCEV *NewSCEV = SE.rewriteUsingPredicate(Expr, &L, Preds); Entry = {Generation, NewSCEV}; return NewSCEV; } const SCEV *PredicatedScalarEvolution::getBackedgeTakenCount() { if (!BackedgeCount) { SCEVUnionPredicate BackedgePred; BackedgeCount = SE.getPredicatedBackedgeTakenCount(&L, BackedgePred); addPredicate(BackedgePred); } return BackedgeCount; } void PredicatedScalarEvolution::addPredicate(const SCEVPredicate &Pred) { if (Preds.implies(&Pred)) return; Preds.add(&Pred); updateGeneration(); } const SCEVUnionPredicate &PredicatedScalarEvolution::getUnionPredicate() const { return Preds; } void PredicatedScalarEvolution::updateGeneration() { // If the generation number wrapped recompute everything. if (++Generation == 0) { for (auto &II : RewriteMap) { const SCEV *Rewritten = II.second.second; II.second = {Generation, SE.rewriteUsingPredicate(Rewritten, &L, Preds)}; } } } void PredicatedScalarEvolution::setNoOverflow( Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags) { const SCEV *Expr = getSCEV(V); const auto *AR = cast(Expr); auto ImpliedFlags = SCEVWrapPredicate::getImpliedFlags(AR, SE); // Clear the statically implied flags. Flags = SCEVWrapPredicate::clearFlags(Flags, ImpliedFlags); addPredicate(*SE.getWrapPredicate(AR, Flags)); auto II = FlagsMap.insert({V, Flags}); if (!II.second) II.first->second = SCEVWrapPredicate::setFlags(Flags, II.first->second); } bool PredicatedScalarEvolution::hasNoOverflow( Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags) { const SCEV *Expr = getSCEV(V); const auto *AR = cast(Expr); Flags = SCEVWrapPredicate::clearFlags( Flags, SCEVWrapPredicate::getImpliedFlags(AR, SE)); auto II = FlagsMap.find(V); if (II != FlagsMap.end()) Flags = SCEVWrapPredicate::clearFlags(Flags, II->second); return Flags == SCEVWrapPredicate::IncrementAnyWrap; } const SCEVAddRecExpr *PredicatedScalarEvolution::getAsAddRec(Value *V) { const SCEV *Expr = this->getSCEV(V); SmallPtrSet NewPreds; auto *New = SE.convertSCEVToAddRecWithPredicates(Expr, &L, NewPreds); if (!New) return nullptr; for (auto *P : NewPreds) Preds.add(P); updateGeneration(); RewriteMap[SE.getSCEV(V)] = {Generation, New}; return New; } PredicatedScalarEvolution::PredicatedScalarEvolution( const PredicatedScalarEvolution &Init) : RewriteMap(Init.RewriteMap), SE(Init.SE), L(Init.L), Preds(Init.Preds), Generation(Init.Generation), BackedgeCount(Init.BackedgeCount) { for (const auto &I : Init.FlagsMap) FlagsMap.insert(I); } void PredicatedScalarEvolution::print(raw_ostream &OS, unsigned Depth) const { // For each block. for (auto *BB : L.getBlocks()) for (auto &I : *BB) { if (!SE.isSCEVable(I.getType())) continue; auto *Expr = SE.getSCEV(&I); auto II = RewriteMap.find(Expr); if (II == RewriteMap.end()) continue; // Don't print things that are not interesting. if (II->second.second == Expr) continue; OS.indent(Depth) << "[PSE]" << I << ":\n"; OS.indent(Depth + 2) << *Expr << "\n"; OS.indent(Depth + 2) << "--> " << *II->second.second << "\n"; } } Index: head/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- head/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp (revision 329982) +++ head/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp (revision 329983) @@ -1,5833 +1,5835 @@ //===- BitcodeReader.cpp - Internal BitcodeReader implementation ----------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #include "llvm/Bitcode/BitcodeReader.h" #include "MetadataLoader.h" #include "ValueList.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/Bitcode/BitstreamReader.h" #include "llvm/Bitcode/LLVMBitCodes.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/AutoUpgrade.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Comdat.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GVMaterializer.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalIFunc.h" #include "llvm/IR/GlobalIndirectSymbol.h" #include "llvm/IR/GlobalObject.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/IR/Verifier.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace llvm; static cl::opt PrintSummaryGUIDs( "print-summary-global-ids", cl::init(false), cl::Hidden, cl::desc( "Print the global id for each value when reading the module summary")); namespace { enum { SWITCH_INST_MAGIC = 0x4B5 // May 2012 => 1205 => Hex }; } // end anonymous namespace static Error error(const Twine &Message) { return make_error( Message, make_error_code(BitcodeError::CorruptedBitcode)); } /// Helper to read the header common to all bitcode files. static bool hasValidBitcodeHeader(BitstreamCursor &Stream) { // Sniff for the signature. if (!Stream.canSkipToPos(4) || Stream.Read(8) != 'B' || Stream.Read(8) != 'C' || Stream.Read(4) != 0x0 || Stream.Read(4) != 0xC || Stream.Read(4) != 0xE || Stream.Read(4) != 0xD) return false; return true; } static Expected initStream(MemoryBufferRef Buffer) { const unsigned char *BufPtr = (const unsigned char *)Buffer.getBufferStart(); const unsigned char *BufEnd = BufPtr + Buffer.getBufferSize(); if (Buffer.getBufferSize() & 3) return error("Invalid bitcode signature"); // If we have a wrapper header, parse it and ignore the non-bc file contents. // The magic number is 0x0B17C0DE stored in little endian. if (isBitcodeWrapper(BufPtr, BufEnd)) if (SkipBitcodeWrapperHeader(BufPtr, BufEnd, true)) return error("Invalid bitcode wrapper header"); BitstreamCursor Stream(ArrayRef(BufPtr, BufEnd)); if (!hasValidBitcodeHeader(Stream)) return error("Invalid bitcode signature"); return std::move(Stream); } /// Convert a string from a record into an std::string, return true on failure. template static bool convertToString(ArrayRef Record, unsigned Idx, StrTy &Result) { if (Idx > Record.size()) return true; for (unsigned i = Idx, e = Record.size(); i != e; ++i) Result += (char)Record[i]; return false; } // Strip all the TBAA attachment for the module. static void stripTBAA(Module *M) { for (auto &F : *M) { if (F.isMaterializable()) continue; for (auto &I : instructions(F)) I.setMetadata(LLVMContext::MD_tbaa, nullptr); } } /// Read the "IDENTIFICATION_BLOCK_ID" block, do some basic enforcement on the /// "epoch" encoded in the bitcode, and return the producer name if any. static Expected readIdentificationBlock(BitstreamCursor &Stream) { if (Stream.EnterSubBlock(bitc::IDENTIFICATION_BLOCK_ID)) return error("Invalid record"); // Read all the records. SmallVector Record; std::string ProducerIdentification; while (true) { BitstreamEntry Entry = Stream.advance(); switch (Entry.Kind) { default: case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: return ProducerIdentification; case BitstreamEntry::Record: // The interesting case. break; } // Read a record. Record.clear(); unsigned BitCode = Stream.readRecord(Entry.ID, Record); switch (BitCode) { default: // Default behavior: reject return error("Invalid value"); case bitc::IDENTIFICATION_CODE_STRING: // IDENTIFICATION: [strchr x N] convertToString(Record, 0, ProducerIdentification); break; case bitc::IDENTIFICATION_CODE_EPOCH: { // EPOCH: [epoch#] unsigned epoch = (unsigned)Record[0]; if (epoch != bitc::BITCODE_CURRENT_EPOCH) { return error( Twine("Incompatible epoch: Bitcode '") + Twine(epoch) + "' vs current: '" + Twine(bitc::BITCODE_CURRENT_EPOCH) + "'"); } } } } } static Expected readIdentificationCode(BitstreamCursor &Stream) { // We expect a number of well-defined blocks, though we don't necessarily // need to understand them all. while (true) { if (Stream.AtEndOfStream()) return ""; BitstreamEntry Entry = Stream.advance(); switch (Entry.Kind) { case BitstreamEntry::EndBlock: case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::SubBlock: if (Entry.ID == bitc::IDENTIFICATION_BLOCK_ID) return readIdentificationBlock(Stream); // Ignore other sub-blocks. if (Stream.SkipBlock()) return error("Malformed block"); continue; case BitstreamEntry::Record: Stream.skipRecord(Entry.ID); continue; } } } static Expected hasObjCCategoryInModule(BitstreamCursor &Stream) { if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID)) return error("Invalid record"); SmallVector Record; // Read all the records for this module. while (true) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: return false; case BitstreamEntry::Record: // The interesting case. break; } // Read a record. switch (Stream.readRecord(Entry.ID, Record)) { default: break; // Default behavior, ignore unknown content. case bitc::MODULE_CODE_SECTIONNAME: { // SECTIONNAME: [strchr x N] std::string S; if (convertToString(Record, 0, S)) return error("Invalid record"); // Check for the i386 and other (x86_64, ARM) conventions if (S.find("__DATA,__objc_catlist") != std::string::npos || S.find("__OBJC,__category") != std::string::npos) return true; break; } } Record.clear(); } llvm_unreachable("Exit infinite loop"); } static Expected hasObjCCategory(BitstreamCursor &Stream) { // We expect a number of well-defined blocks, though we don't necessarily // need to understand them all. while (true) { BitstreamEntry Entry = Stream.advance(); switch (Entry.Kind) { case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: return false; case BitstreamEntry::SubBlock: if (Entry.ID == bitc::MODULE_BLOCK_ID) return hasObjCCategoryInModule(Stream); // Ignore other sub-blocks. if (Stream.SkipBlock()) return error("Malformed block"); continue; case BitstreamEntry::Record: Stream.skipRecord(Entry.ID); continue; } } } static Expected readModuleTriple(BitstreamCursor &Stream) { if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID)) return error("Invalid record"); SmallVector Record; std::string Triple; // Read all the records for this module. while (true) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: return Triple; case BitstreamEntry::Record: // The interesting case. break; } // Read a record. switch (Stream.readRecord(Entry.ID, Record)) { default: break; // Default behavior, ignore unknown content. case bitc::MODULE_CODE_TRIPLE: { // TRIPLE: [strchr x N] std::string S; if (convertToString(Record, 0, S)) return error("Invalid record"); Triple = S; break; } } Record.clear(); } llvm_unreachable("Exit infinite loop"); } static Expected readTriple(BitstreamCursor &Stream) { // We expect a number of well-defined blocks, though we don't necessarily // need to understand them all. while (true) { BitstreamEntry Entry = Stream.advance(); switch (Entry.Kind) { case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: return ""; case BitstreamEntry::SubBlock: if (Entry.ID == bitc::MODULE_BLOCK_ID) return readModuleTriple(Stream); // Ignore other sub-blocks. if (Stream.SkipBlock()) return error("Malformed block"); continue; case BitstreamEntry::Record: Stream.skipRecord(Entry.ID); continue; } } } namespace { class BitcodeReaderBase { protected: BitcodeReaderBase(BitstreamCursor Stream, StringRef Strtab) : Stream(std::move(Stream)), Strtab(Strtab) { this->Stream.setBlockInfo(&BlockInfo); } BitstreamBlockInfo BlockInfo; BitstreamCursor Stream; StringRef Strtab; /// In version 2 of the bitcode we store names of global values and comdats in /// a string table rather than in the VST. bool UseStrtab = false; Expected parseVersionRecord(ArrayRef Record); /// If this module uses a string table, pop the reference to the string table /// and return the referenced string and the rest of the record. Otherwise /// just return the record itself. std::pair> readNameFromStrtab(ArrayRef Record); bool readBlockInfo(); // Contains an arbitrary and optional string identifying the bitcode producer std::string ProducerIdentification; Error error(const Twine &Message); }; } // end anonymous namespace Error BitcodeReaderBase::error(const Twine &Message) { std::string FullMsg = Message.str(); if (!ProducerIdentification.empty()) FullMsg += " (Producer: '" + ProducerIdentification + "' Reader: 'LLVM " + LLVM_VERSION_STRING "')"; return ::error(FullMsg); } Expected BitcodeReaderBase::parseVersionRecord(ArrayRef Record) { if (Record.empty()) return error("Invalid record"); unsigned ModuleVersion = Record[0]; if (ModuleVersion > 2) return error("Invalid value"); UseStrtab = ModuleVersion >= 2; return ModuleVersion; } std::pair> BitcodeReaderBase::readNameFromStrtab(ArrayRef Record) { if (!UseStrtab) return {"", Record}; // Invalid reference. Let the caller complain about the record being empty. if (Record[0] + Record[1] > Strtab.size()) return {"", {}}; return {StringRef(Strtab.data() + Record[0], Record[1]), Record.slice(2)}; } namespace { class BitcodeReader : public BitcodeReaderBase, public GVMaterializer { LLVMContext &Context; Module *TheModule = nullptr; // Next offset to start scanning for lazy parsing of function bodies. uint64_t NextUnreadBit = 0; // Last function offset found in the VST. uint64_t LastFunctionBlockBit = 0; bool SeenValueSymbolTable = false; uint64_t VSTOffset = 0; std::vector SectionTable; std::vector GCTable; std::vector TypeList; BitcodeReaderValueList ValueList; Optional MDLoader; std::vector ComdatList; SmallVector InstructionList; std::vector> GlobalInits; std::vector> IndirectSymbolInits; std::vector> FunctionPrefixes; std::vector> FunctionPrologues; std::vector> FunctionPersonalityFns; /// The set of attributes by index. Index zero in the file is for null, and /// is thus not represented here. As such all indices are off by one. std::vector MAttributes; /// The set of attribute groups. std::map MAttributeGroups; /// While parsing a function body, this is a list of the basic blocks for the /// function. std::vector FunctionBBs; // When reading the module header, this list is populated with functions that // have bodies later in the file. std::vector FunctionsWithBodies; // When intrinsic functions are encountered which require upgrading they are // stored here with their replacement function. using UpdatedIntrinsicMap = DenseMap; UpdatedIntrinsicMap UpgradedIntrinsics; // Intrinsics which were remangled because of types rename UpdatedIntrinsicMap RemangledIntrinsics; // Several operations happen after the module header has been read, but // before function bodies are processed. This keeps track of whether // we've done this yet. bool SeenFirstFunctionBody = false; /// When function bodies are initially scanned, this map contains info about /// where to find deferred function body in the stream. DenseMap DeferredFunctionInfo; /// When Metadata block is initially scanned when parsing the module, we may /// choose to defer parsing of the metadata. This vector contains info about /// which Metadata blocks are deferred. std::vector DeferredMetadataInfo; /// These are basic blocks forward-referenced by block addresses. They are /// inserted lazily into functions when they're loaded. The basic block ID is /// its index into the vector. DenseMap> BasicBlockFwdRefs; std::deque BasicBlockFwdRefQueue; /// Indicates that we are using a new encoding for instruction operands where /// most operands in the current FUNCTION_BLOCK are encoded relative to the /// instruction number, for a more compact encoding. Some instruction /// operands are not relative to the instruction ID: basic block numbers, and /// types. Once the old style function blocks have been phased out, we would /// not need this flag. bool UseRelativeIDs = false; /// True if all functions will be materialized, negating the need to process /// (e.g.) blockaddress forward references. bool WillMaterializeAllForwardRefs = false; bool StripDebugInfo = false; TBAAVerifier TBAAVerifyHelper; std::vector BundleTags; SmallVector SSIDs; public: BitcodeReader(BitstreamCursor Stream, StringRef Strtab, StringRef ProducerIdentification, LLVMContext &Context); Error materializeForwardReferencedFunctions(); Error materialize(GlobalValue *GV) override; Error materializeModule() override; std::vector getIdentifiedStructTypes() const override; /// \brief Main interface to parsing a bitcode buffer. /// \returns true if an error occurred. Error parseBitcodeInto(Module *M, bool ShouldLazyLoadMetadata = false, bool IsImporting = false); static uint64_t decodeSignRotatedValue(uint64_t V); /// Materialize any deferred Metadata block. Error materializeMetadata() override; void setStripDebugInfo() override; private: std::vector IdentifiedStructTypes; StructType *createIdentifiedStructType(LLVMContext &Context, StringRef Name); StructType *createIdentifiedStructType(LLVMContext &Context); Type *getTypeByID(unsigned ID); Value *getFnValueByID(unsigned ID, Type *Ty) { if (Ty && Ty->isMetadataTy()) return MetadataAsValue::get(Ty->getContext(), getFnMetadataByID(ID)); return ValueList.getValueFwdRef(ID, Ty); } Metadata *getFnMetadataByID(unsigned ID) { return MDLoader->getMetadataFwdRefOrLoad(ID); } BasicBlock *getBasicBlock(unsigned ID) const { if (ID >= FunctionBBs.size()) return nullptr; // Invalid ID return FunctionBBs[ID]; } AttributeList getAttributes(unsigned i) const { if (i-1 < MAttributes.size()) return MAttributes[i-1]; return AttributeList(); } /// Read a value/type pair out of the specified record from slot 'Slot'. /// Increment Slot past the number of slots used in the record. Return true on /// failure. bool getValueTypePair(SmallVectorImpl &Record, unsigned &Slot, unsigned InstNum, Value *&ResVal) { if (Slot == Record.size()) return true; unsigned ValNo = (unsigned)Record[Slot++]; // Adjust the ValNo, if it was encoded relative to the InstNum. if (UseRelativeIDs) ValNo = InstNum - ValNo; if (ValNo < InstNum) { // If this is not a forward reference, just return the value we already // have. ResVal = getFnValueByID(ValNo, nullptr); return ResVal == nullptr; } if (Slot == Record.size()) return true; unsigned TypeNo = (unsigned)Record[Slot++]; ResVal = getFnValueByID(ValNo, getTypeByID(TypeNo)); return ResVal == nullptr; } /// Read a value out of the specified record from slot 'Slot'. Increment Slot /// past the number of slots used by the value in the record. Return true if /// there is an error. bool popValue(SmallVectorImpl &Record, unsigned &Slot, unsigned InstNum, Type *Ty, Value *&ResVal) { if (getValue(Record, Slot, InstNum, Ty, ResVal)) return true; // All values currently take a single record slot. ++Slot; return false; } /// Like popValue, but does not increment the Slot number. bool getValue(SmallVectorImpl &Record, unsigned Slot, unsigned InstNum, Type *Ty, Value *&ResVal) { ResVal = getValue(Record, Slot, InstNum, Ty); return ResVal == nullptr; } /// Version of getValue that returns ResVal directly, or 0 if there is an /// error. Value *getValue(SmallVectorImpl &Record, unsigned Slot, unsigned InstNum, Type *Ty) { if (Slot == Record.size()) return nullptr; unsigned ValNo = (unsigned)Record[Slot]; // Adjust the ValNo, if it was encoded relative to the InstNum. if (UseRelativeIDs) ValNo = InstNum - ValNo; return getFnValueByID(ValNo, Ty); } /// Like getValue, but decodes signed VBRs. Value *getValueSigned(SmallVectorImpl &Record, unsigned Slot, unsigned InstNum, Type *Ty) { if (Slot == Record.size()) return nullptr; unsigned ValNo = (unsigned)decodeSignRotatedValue(Record[Slot]); // Adjust the ValNo, if it was encoded relative to the InstNum. if (UseRelativeIDs) ValNo = InstNum - ValNo; return getFnValueByID(ValNo, Ty); } /// Converts alignment exponent (i.e. power of two (or zero)) to the /// corresponding alignment to use. If alignment is too large, returns /// a corresponding error code. Error parseAlignmentValue(uint64_t Exponent, unsigned &Alignment); Error parseAttrKind(uint64_t Code, Attribute::AttrKind *Kind); Error parseModule(uint64_t ResumeBit, bool ShouldLazyLoadMetadata = false); Error parseComdatRecord(ArrayRef Record); Error parseGlobalVarRecord(ArrayRef Record); Error parseFunctionRecord(ArrayRef Record); Error parseGlobalIndirectSymbolRecord(unsigned BitCode, ArrayRef Record); Error parseAttributeBlock(); Error parseAttributeGroupBlock(); Error parseTypeTable(); Error parseTypeTableBody(); Error parseOperandBundleTags(); Error parseSyncScopeNames(); Expected recordValue(SmallVectorImpl &Record, unsigned NameIndex, Triple &TT); void setDeferredFunctionInfo(unsigned FuncBitcodeOffsetDelta, Function *F, ArrayRef Record); Error parseValueSymbolTable(uint64_t Offset = 0); Error parseGlobalValueSymbolTable(); Error parseConstants(); Error rememberAndSkipFunctionBodies(); Error rememberAndSkipFunctionBody(); /// Save the positions of the Metadata blocks and skip parsing the blocks. Error rememberAndSkipMetadata(); Error typeCheckLoadStoreInst(Type *ValType, Type *PtrType); Error parseFunctionBody(Function *F); Error globalCleanup(); Error resolveGlobalAndIndirectSymbolInits(); Error parseUseLists(); Error findFunctionInStream( Function *F, DenseMap::iterator DeferredFunctionInfoIterator); SyncScope::ID getDecodedSyncScopeID(unsigned Val); }; /// Class to manage reading and parsing function summary index bitcode /// files/sections. class ModuleSummaryIndexBitcodeReader : public BitcodeReaderBase { /// The module index built during parsing. ModuleSummaryIndex &TheIndex; /// Indicates whether we have encountered a global value summary section /// yet during parsing. bool SeenGlobalValSummary = false; /// Indicates whether we have already parsed the VST, used for error checking. bool SeenValueSymbolTable = false; /// Set to the offset of the VST recorded in the MODULE_CODE_VSTOFFSET record. /// Used to enable on-demand parsing of the VST. uint64_t VSTOffset = 0; // Map to save ValueId to ValueInfo association that was recorded in the // ValueSymbolTable. It is used after the VST is parsed to convert // call graph edges read from the function summary from referencing // callees by their ValueId to using the ValueInfo instead, which is how // they are recorded in the summary index being built. // We save a GUID which refers to the same global as the ValueInfo, but // ignoring the linkage, i.e. for values other than local linkage they are // identical. DenseMap> ValueIdToValueInfoMap; /// Map populated during module path string table parsing, from the /// module ID to a string reference owned by the index's module /// path string table, used to correlate with combined index /// summary records. DenseMap ModuleIdMap; /// Original source file name recorded in a bitcode record. std::string SourceFileName; /// The string identifier given to this module by the client, normally the /// path to the bitcode file. StringRef ModulePath; /// For per-module summary indexes, the unique numerical identifier given to /// this module by the client. unsigned ModuleId; public: ModuleSummaryIndexBitcodeReader(BitstreamCursor Stream, StringRef Strtab, ModuleSummaryIndex &TheIndex, StringRef ModulePath, unsigned ModuleId); Error parseModule(); private: void setValueGUID(uint64_t ValueID, StringRef ValueName, GlobalValue::LinkageTypes Linkage, StringRef SourceFileName); Error parseValueSymbolTable( uint64_t Offset, DenseMap &ValueIdToLinkageMap); std::vector makeRefList(ArrayRef Record); std::vector makeCallList(ArrayRef Record, bool IsOldProfileFormat, bool HasProfile); Error parseEntireSummary(unsigned ID); Error parseModuleStringTable(); std::pair getValueInfoFromValueId(unsigned ValueId); ModuleSummaryIndex::ModuleInfo *addThisModule(); }; } // end anonymous namespace std::error_code llvm::errorToErrorCodeAndEmitErrors(LLVMContext &Ctx, Error Err) { if (Err) { std::error_code EC; handleAllErrors(std::move(Err), [&](ErrorInfoBase &EIB) { EC = EIB.convertToErrorCode(); Ctx.emitError(EIB.message()); }); return EC; } return std::error_code(); } BitcodeReader::BitcodeReader(BitstreamCursor Stream, StringRef Strtab, StringRef ProducerIdentification, LLVMContext &Context) : BitcodeReaderBase(std::move(Stream), Strtab), Context(Context), ValueList(Context) { this->ProducerIdentification = ProducerIdentification; } Error BitcodeReader::materializeForwardReferencedFunctions() { if (WillMaterializeAllForwardRefs) return Error::success(); // Prevent recursion. WillMaterializeAllForwardRefs = true; while (!BasicBlockFwdRefQueue.empty()) { Function *F = BasicBlockFwdRefQueue.front(); BasicBlockFwdRefQueue.pop_front(); assert(F && "Expected valid function"); if (!BasicBlockFwdRefs.count(F)) // Already materialized. continue; // Check for a function that isn't materializable to prevent an infinite // loop. When parsing a blockaddress stored in a global variable, there // isn't a trivial way to check if a function will have a body without a // linear search through FunctionsWithBodies, so just check it here. if (!F->isMaterializable()) return error("Never resolved function from blockaddress"); // Try to materialize F. if (Error Err = materialize(F)) return Err; } assert(BasicBlockFwdRefs.empty() && "Function missing from queue"); // Reset state. WillMaterializeAllForwardRefs = false; return Error::success(); } //===----------------------------------------------------------------------===// // Helper functions to implement forward reference resolution, etc. //===----------------------------------------------------------------------===// static bool hasImplicitComdat(size_t Val) { switch (Val) { default: return false; case 1: // Old WeakAnyLinkage case 4: // Old LinkOnceAnyLinkage case 10: // Old WeakODRLinkage case 11: // Old LinkOnceODRLinkage return true; } } static GlobalValue::LinkageTypes getDecodedLinkage(unsigned Val) { switch (Val) { default: // Map unknown/new linkages to external case 0: return GlobalValue::ExternalLinkage; case 2: return GlobalValue::AppendingLinkage; case 3: return GlobalValue::InternalLinkage; case 5: return GlobalValue::ExternalLinkage; // Obsolete DLLImportLinkage case 6: return GlobalValue::ExternalLinkage; // Obsolete DLLExportLinkage case 7: return GlobalValue::ExternalWeakLinkage; case 8: return GlobalValue::CommonLinkage; case 9: return GlobalValue::PrivateLinkage; case 12: return GlobalValue::AvailableExternallyLinkage; case 13: return GlobalValue::PrivateLinkage; // Obsolete LinkerPrivateLinkage case 14: return GlobalValue::PrivateLinkage; // Obsolete LinkerPrivateWeakLinkage case 15: return GlobalValue::ExternalLinkage; // Obsolete LinkOnceODRAutoHideLinkage case 1: // Old value with implicit comdat. case 16: return GlobalValue::WeakAnyLinkage; case 10: // Old value with implicit comdat. case 17: return GlobalValue::WeakODRLinkage; case 4: // Old value with implicit comdat. case 18: return GlobalValue::LinkOnceAnyLinkage; case 11: // Old value with implicit comdat. case 19: return GlobalValue::LinkOnceODRLinkage; } } static FunctionSummary::FFlags getDecodedFFlags(uint64_t RawFlags) { FunctionSummary::FFlags Flags; Flags.ReadNone = RawFlags & 0x1; Flags.ReadOnly = (RawFlags >> 1) & 0x1; Flags.NoRecurse = (RawFlags >> 2) & 0x1; Flags.ReturnDoesNotAlias = (RawFlags >> 3) & 0x1; return Flags; } /// Decode the flags for GlobalValue in the summary. static GlobalValueSummary::GVFlags getDecodedGVSummaryFlags(uint64_t RawFlags, uint64_t Version) { // Summary were not emitted before LLVM 3.9, we don't need to upgrade Linkage // like getDecodedLinkage() above. Any future change to the linkage enum and // to getDecodedLinkage() will need to be taken into account here as above. auto Linkage = GlobalValue::LinkageTypes(RawFlags & 0xF); // 4 bits RawFlags = RawFlags >> 4; bool NotEligibleToImport = (RawFlags & 0x1) || Version < 3; // The Live flag wasn't introduced until version 3. For dead stripping // to work correctly on earlier versions, we must conservatively treat all // values as live. bool Live = (RawFlags & 0x2) || Version < 3; bool Local = (RawFlags & 0x4); return GlobalValueSummary::GVFlags(Linkage, NotEligibleToImport, Live, Local); } static GlobalValue::VisibilityTypes getDecodedVisibility(unsigned Val) { switch (Val) { default: // Map unknown visibilities to default. case 0: return GlobalValue::DefaultVisibility; case 1: return GlobalValue::HiddenVisibility; case 2: return GlobalValue::ProtectedVisibility; } } static GlobalValue::DLLStorageClassTypes getDecodedDLLStorageClass(unsigned Val) { switch (Val) { default: // Map unknown values to default. case 0: return GlobalValue::DefaultStorageClass; case 1: return GlobalValue::DLLImportStorageClass; case 2: return GlobalValue::DLLExportStorageClass; } } static bool getDecodedDSOLocal(unsigned Val) { switch(Val) { default: // Map unknown values to preemptable. case 0: return false; case 1: return true; } } static GlobalVariable::ThreadLocalMode getDecodedThreadLocalMode(unsigned Val) { switch (Val) { case 0: return GlobalVariable::NotThreadLocal; default: // Map unknown non-zero value to general dynamic. case 1: return GlobalVariable::GeneralDynamicTLSModel; case 2: return GlobalVariable::LocalDynamicTLSModel; case 3: return GlobalVariable::InitialExecTLSModel; case 4: return GlobalVariable::LocalExecTLSModel; } } static GlobalVariable::UnnamedAddr getDecodedUnnamedAddrType(unsigned Val) { switch (Val) { default: // Map unknown to UnnamedAddr::None. case 0: return GlobalVariable::UnnamedAddr::None; case 1: return GlobalVariable::UnnamedAddr::Global; case 2: return GlobalVariable::UnnamedAddr::Local; } } static int getDecodedCastOpcode(unsigned Val) { switch (Val) { default: return -1; case bitc::CAST_TRUNC : return Instruction::Trunc; case bitc::CAST_ZEXT : return Instruction::ZExt; case bitc::CAST_SEXT : return Instruction::SExt; case bitc::CAST_FPTOUI : return Instruction::FPToUI; case bitc::CAST_FPTOSI : return Instruction::FPToSI; case bitc::CAST_UITOFP : return Instruction::UIToFP; case bitc::CAST_SITOFP : return Instruction::SIToFP; case bitc::CAST_FPTRUNC : return Instruction::FPTrunc; case bitc::CAST_FPEXT : return Instruction::FPExt; case bitc::CAST_PTRTOINT: return Instruction::PtrToInt; case bitc::CAST_INTTOPTR: return Instruction::IntToPtr; case bitc::CAST_BITCAST : return Instruction::BitCast; case bitc::CAST_ADDRSPACECAST: return Instruction::AddrSpaceCast; } } static int getDecodedBinaryOpcode(unsigned Val, Type *Ty) { bool IsFP = Ty->isFPOrFPVectorTy(); // BinOps are only valid for int/fp or vector of int/fp types if (!IsFP && !Ty->isIntOrIntVectorTy()) return -1; switch (Val) { default: return -1; case bitc::BINOP_ADD: return IsFP ? Instruction::FAdd : Instruction::Add; case bitc::BINOP_SUB: return IsFP ? Instruction::FSub : Instruction::Sub; case bitc::BINOP_MUL: return IsFP ? Instruction::FMul : Instruction::Mul; case bitc::BINOP_UDIV: return IsFP ? -1 : Instruction::UDiv; case bitc::BINOP_SDIV: return IsFP ? Instruction::FDiv : Instruction::SDiv; case bitc::BINOP_UREM: return IsFP ? -1 : Instruction::URem; case bitc::BINOP_SREM: return IsFP ? Instruction::FRem : Instruction::SRem; case bitc::BINOP_SHL: return IsFP ? -1 : Instruction::Shl; case bitc::BINOP_LSHR: return IsFP ? -1 : Instruction::LShr; case bitc::BINOP_ASHR: return IsFP ? -1 : Instruction::AShr; case bitc::BINOP_AND: return IsFP ? -1 : Instruction::And; case bitc::BINOP_OR: return IsFP ? -1 : Instruction::Or; case bitc::BINOP_XOR: return IsFP ? -1 : Instruction::Xor; } } static AtomicRMWInst::BinOp getDecodedRMWOperation(unsigned Val) { switch (Val) { default: return AtomicRMWInst::BAD_BINOP; case bitc::RMW_XCHG: return AtomicRMWInst::Xchg; case bitc::RMW_ADD: return AtomicRMWInst::Add; case bitc::RMW_SUB: return AtomicRMWInst::Sub; case bitc::RMW_AND: return AtomicRMWInst::And; case bitc::RMW_NAND: return AtomicRMWInst::Nand; case bitc::RMW_OR: return AtomicRMWInst::Or; case bitc::RMW_XOR: return AtomicRMWInst::Xor; case bitc::RMW_MAX: return AtomicRMWInst::Max; case bitc::RMW_MIN: return AtomicRMWInst::Min; case bitc::RMW_UMAX: return AtomicRMWInst::UMax; case bitc::RMW_UMIN: return AtomicRMWInst::UMin; } } static AtomicOrdering getDecodedOrdering(unsigned Val) { switch (Val) { case bitc::ORDERING_NOTATOMIC: return AtomicOrdering::NotAtomic; case bitc::ORDERING_UNORDERED: return AtomicOrdering::Unordered; case bitc::ORDERING_MONOTONIC: return AtomicOrdering::Monotonic; case bitc::ORDERING_ACQUIRE: return AtomicOrdering::Acquire; case bitc::ORDERING_RELEASE: return AtomicOrdering::Release; case bitc::ORDERING_ACQREL: return AtomicOrdering::AcquireRelease; default: // Map unknown orderings to sequentially-consistent. case bitc::ORDERING_SEQCST: return AtomicOrdering::SequentiallyConsistent; } } static Comdat::SelectionKind getDecodedComdatSelectionKind(unsigned Val) { switch (Val) { default: // Map unknown selection kinds to any. case bitc::COMDAT_SELECTION_KIND_ANY: return Comdat::Any; case bitc::COMDAT_SELECTION_KIND_EXACT_MATCH: return Comdat::ExactMatch; case bitc::COMDAT_SELECTION_KIND_LARGEST: return Comdat::Largest; case bitc::COMDAT_SELECTION_KIND_NO_DUPLICATES: return Comdat::NoDuplicates; case bitc::COMDAT_SELECTION_KIND_SAME_SIZE: return Comdat::SameSize; } } static FastMathFlags getDecodedFastMathFlags(unsigned Val) { FastMathFlags FMF; - if (0 != (Val & FastMathFlags::AllowReassoc)) + if (0 != (Val & bitc::UnsafeAlgebra)) + FMF.setFast(); + if (0 != (Val & bitc::AllowReassoc)) FMF.setAllowReassoc(); - if (0 != (Val & FastMathFlags::NoNaNs)) + if (0 != (Val & bitc::NoNaNs)) FMF.setNoNaNs(); - if (0 != (Val & FastMathFlags::NoInfs)) + if (0 != (Val & bitc::NoInfs)) FMF.setNoInfs(); - if (0 != (Val & FastMathFlags::NoSignedZeros)) + if (0 != (Val & bitc::NoSignedZeros)) FMF.setNoSignedZeros(); - if (0 != (Val & FastMathFlags::AllowReciprocal)) + if (0 != (Val & bitc::AllowReciprocal)) FMF.setAllowReciprocal(); - if (0 != (Val & FastMathFlags::AllowContract)) + if (0 != (Val & bitc::AllowContract)) FMF.setAllowContract(true); - if (0 != (Val & FastMathFlags::ApproxFunc)) + if (0 != (Val & bitc::ApproxFunc)) FMF.setApproxFunc(); return FMF; } static void upgradeDLLImportExportLinkage(GlobalValue *GV, unsigned Val) { switch (Val) { case 5: GV->setDLLStorageClass(GlobalValue::DLLImportStorageClass); break; case 6: GV->setDLLStorageClass(GlobalValue::DLLExportStorageClass); break; } } Type *BitcodeReader::getTypeByID(unsigned ID) { // The type table size is always specified correctly. if (ID >= TypeList.size()) return nullptr; if (Type *Ty = TypeList[ID]) return Ty; // If we have a forward reference, the only possible case is when it is to a // named struct. Just create a placeholder for now. return TypeList[ID] = createIdentifiedStructType(Context); } StructType *BitcodeReader::createIdentifiedStructType(LLVMContext &Context, StringRef Name) { auto *Ret = StructType::create(Context, Name); IdentifiedStructTypes.push_back(Ret); return Ret; } StructType *BitcodeReader::createIdentifiedStructType(LLVMContext &Context) { auto *Ret = StructType::create(Context); IdentifiedStructTypes.push_back(Ret); return Ret; } //===----------------------------------------------------------------------===// // Functions for parsing blocks from the bitcode file //===----------------------------------------------------------------------===// static uint64_t getRawAttributeMask(Attribute::AttrKind Val) { switch (Val) { case Attribute::EndAttrKinds: llvm_unreachable("Synthetic enumerators which should never get here"); case Attribute::None: return 0; case Attribute::ZExt: return 1 << 0; case Attribute::SExt: return 1 << 1; case Attribute::NoReturn: return 1 << 2; case Attribute::InReg: return 1 << 3; case Attribute::StructRet: return 1 << 4; case Attribute::NoUnwind: return 1 << 5; case Attribute::NoAlias: return 1 << 6; case Attribute::ByVal: return 1 << 7; case Attribute::Nest: return 1 << 8; case Attribute::ReadNone: return 1 << 9; case Attribute::ReadOnly: return 1 << 10; case Attribute::NoInline: return 1 << 11; case Attribute::AlwaysInline: return 1 << 12; case Attribute::OptimizeForSize: return 1 << 13; case Attribute::StackProtect: return 1 << 14; case Attribute::StackProtectReq: return 1 << 15; case Attribute::Alignment: return 31 << 16; case Attribute::NoCapture: return 1 << 21; case Attribute::NoRedZone: return 1 << 22; case Attribute::NoImplicitFloat: return 1 << 23; case Attribute::Naked: return 1 << 24; case Attribute::InlineHint: return 1 << 25; case Attribute::StackAlignment: return 7 << 26; case Attribute::ReturnsTwice: return 1 << 29; case Attribute::UWTable: return 1 << 30; case Attribute::NonLazyBind: return 1U << 31; case Attribute::SanitizeAddress: return 1ULL << 32; case Attribute::MinSize: return 1ULL << 33; case Attribute::NoDuplicate: return 1ULL << 34; case Attribute::StackProtectStrong: return 1ULL << 35; case Attribute::SanitizeThread: return 1ULL << 36; case Attribute::SanitizeMemory: return 1ULL << 37; case Attribute::NoBuiltin: return 1ULL << 38; case Attribute::Returned: return 1ULL << 39; case Attribute::Cold: return 1ULL << 40; case Attribute::Builtin: return 1ULL << 41; case Attribute::OptimizeNone: return 1ULL << 42; case Attribute::InAlloca: return 1ULL << 43; case Attribute::NonNull: return 1ULL << 44; case Attribute::JumpTable: return 1ULL << 45; case Attribute::Convergent: return 1ULL << 46; case Attribute::SafeStack: return 1ULL << 47; case Attribute::NoRecurse: return 1ULL << 48; case Attribute::InaccessibleMemOnly: return 1ULL << 49; case Attribute::InaccessibleMemOrArgMemOnly: return 1ULL << 50; case Attribute::SwiftSelf: return 1ULL << 51; case Attribute::SwiftError: return 1ULL << 52; case Attribute::WriteOnly: return 1ULL << 53; case Attribute::Speculatable: return 1ULL << 54; case Attribute::StrictFP: return 1ULL << 55; case Attribute::SanitizeHWAddress: return 1ULL << 56; case Attribute::Dereferenceable: llvm_unreachable("dereferenceable attribute not supported in raw format"); break; case Attribute::DereferenceableOrNull: llvm_unreachable("dereferenceable_or_null attribute not supported in raw " "format"); break; case Attribute::ArgMemOnly: llvm_unreachable("argmemonly attribute not supported in raw format"); break; case Attribute::AllocSize: llvm_unreachable("allocsize not supported in raw format"); break; } llvm_unreachable("Unsupported attribute type"); } static void addRawAttributeValue(AttrBuilder &B, uint64_t Val) { if (!Val) return; for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds; I = Attribute::AttrKind(I + 1)) { if (I == Attribute::Dereferenceable || I == Attribute::DereferenceableOrNull || I == Attribute::ArgMemOnly || I == Attribute::AllocSize) continue; if (uint64_t A = (Val & getRawAttributeMask(I))) { if (I == Attribute::Alignment) B.addAlignmentAttr(1ULL << ((A >> 16) - 1)); else if (I == Attribute::StackAlignment) B.addStackAlignmentAttr(1ULL << ((A >> 26)-1)); else B.addAttribute(I); } } } /// \brief This fills an AttrBuilder object with the LLVM attributes that have /// been decoded from the given integer. This function must stay in sync with /// 'encodeLLVMAttributesForBitcode'. static void decodeLLVMAttributesForBitcode(AttrBuilder &B, uint64_t EncodedAttrs) { // FIXME: Remove in 4.0. // The alignment is stored as a 16-bit raw value from bits 31--16. We shift // the bits above 31 down by 11 bits. unsigned Alignment = (EncodedAttrs & (0xffffULL << 16)) >> 16; assert((!Alignment || isPowerOf2_32(Alignment)) && "Alignment must be a power of two."); if (Alignment) B.addAlignmentAttr(Alignment); addRawAttributeValue(B, ((EncodedAttrs & (0xfffffULL << 32)) >> 11) | (EncodedAttrs & 0xffff)); } Error BitcodeReader::parseAttributeBlock() { if (Stream.EnterSubBlock(bitc::PARAMATTR_BLOCK_ID)) return error("Invalid record"); if (!MAttributes.empty()) return error("Invalid multiple blocks"); SmallVector Record; SmallVector Attrs; // Read all the records. while (true) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: return Error::success(); case BitstreamEntry::Record: // The interesting case. break; } // Read a record. Record.clear(); switch (Stream.readRecord(Entry.ID, Record)) { default: // Default behavior: ignore. break; case bitc::PARAMATTR_CODE_ENTRY_OLD: // ENTRY: [paramidx0, attr0, ...] // FIXME: Remove in 4.0. if (Record.size() & 1) return error("Invalid record"); for (unsigned i = 0, e = Record.size(); i != e; i += 2) { AttrBuilder B; decodeLLVMAttributesForBitcode(B, Record[i+1]); Attrs.push_back(AttributeList::get(Context, Record[i], B)); } MAttributes.push_back(AttributeList::get(Context, Attrs)); Attrs.clear(); break; case bitc::PARAMATTR_CODE_ENTRY: // ENTRY: [attrgrp0, attrgrp1, ...] for (unsigned i = 0, e = Record.size(); i != e; ++i) Attrs.push_back(MAttributeGroups[Record[i]]); MAttributes.push_back(AttributeList::get(Context, Attrs)); Attrs.clear(); break; } } } // Returns Attribute::None on unrecognized codes. static Attribute::AttrKind getAttrFromCode(uint64_t Code) { switch (Code) { default: return Attribute::None; case bitc::ATTR_KIND_ALIGNMENT: return Attribute::Alignment; case bitc::ATTR_KIND_ALWAYS_INLINE: return Attribute::AlwaysInline; case bitc::ATTR_KIND_ARGMEMONLY: return Attribute::ArgMemOnly; case bitc::ATTR_KIND_BUILTIN: return Attribute::Builtin; case bitc::ATTR_KIND_BY_VAL: return Attribute::ByVal; case bitc::ATTR_KIND_IN_ALLOCA: return Attribute::InAlloca; case bitc::ATTR_KIND_COLD: return Attribute::Cold; case bitc::ATTR_KIND_CONVERGENT: return Attribute::Convergent; case bitc::ATTR_KIND_INACCESSIBLEMEM_ONLY: return Attribute::InaccessibleMemOnly; case bitc::ATTR_KIND_INACCESSIBLEMEM_OR_ARGMEMONLY: return Attribute::InaccessibleMemOrArgMemOnly; case bitc::ATTR_KIND_INLINE_HINT: return Attribute::InlineHint; case bitc::ATTR_KIND_IN_REG: return Attribute::InReg; case bitc::ATTR_KIND_JUMP_TABLE: return Attribute::JumpTable; case bitc::ATTR_KIND_MIN_SIZE: return Attribute::MinSize; case bitc::ATTR_KIND_NAKED: return Attribute::Naked; case bitc::ATTR_KIND_NEST: return Attribute::Nest; case bitc::ATTR_KIND_NO_ALIAS: return Attribute::NoAlias; case bitc::ATTR_KIND_NO_BUILTIN: return Attribute::NoBuiltin; case bitc::ATTR_KIND_NO_CAPTURE: return Attribute::NoCapture; case bitc::ATTR_KIND_NO_DUPLICATE: return Attribute::NoDuplicate; case bitc::ATTR_KIND_NO_IMPLICIT_FLOAT: return Attribute::NoImplicitFloat; case bitc::ATTR_KIND_NO_INLINE: return Attribute::NoInline; case bitc::ATTR_KIND_NO_RECURSE: return Attribute::NoRecurse; case bitc::ATTR_KIND_NON_LAZY_BIND: return Attribute::NonLazyBind; case bitc::ATTR_KIND_NON_NULL: return Attribute::NonNull; case bitc::ATTR_KIND_DEREFERENCEABLE: return Attribute::Dereferenceable; case bitc::ATTR_KIND_DEREFERENCEABLE_OR_NULL: return Attribute::DereferenceableOrNull; case bitc::ATTR_KIND_ALLOC_SIZE: return Attribute::AllocSize; case bitc::ATTR_KIND_NO_RED_ZONE: return Attribute::NoRedZone; case bitc::ATTR_KIND_NO_RETURN: return Attribute::NoReturn; case bitc::ATTR_KIND_NO_UNWIND: return Attribute::NoUnwind; case bitc::ATTR_KIND_OPTIMIZE_FOR_SIZE: return Attribute::OptimizeForSize; case bitc::ATTR_KIND_OPTIMIZE_NONE: return Attribute::OptimizeNone; case bitc::ATTR_KIND_READ_NONE: return Attribute::ReadNone; case bitc::ATTR_KIND_READ_ONLY: return Attribute::ReadOnly; case bitc::ATTR_KIND_RETURNED: return Attribute::Returned; case bitc::ATTR_KIND_RETURNS_TWICE: return Attribute::ReturnsTwice; case bitc::ATTR_KIND_S_EXT: return Attribute::SExt; case bitc::ATTR_KIND_SPECULATABLE: return Attribute::Speculatable; case bitc::ATTR_KIND_STACK_ALIGNMENT: return Attribute::StackAlignment; case bitc::ATTR_KIND_STACK_PROTECT: return Attribute::StackProtect; case bitc::ATTR_KIND_STACK_PROTECT_REQ: return Attribute::StackProtectReq; case bitc::ATTR_KIND_STACK_PROTECT_STRONG: return Attribute::StackProtectStrong; case bitc::ATTR_KIND_SAFESTACK: return Attribute::SafeStack; case bitc::ATTR_KIND_STRICT_FP: return Attribute::StrictFP; case bitc::ATTR_KIND_STRUCT_RET: return Attribute::StructRet; case bitc::ATTR_KIND_SANITIZE_ADDRESS: return Attribute::SanitizeAddress; case bitc::ATTR_KIND_SANITIZE_HWADDRESS: return Attribute::SanitizeHWAddress; case bitc::ATTR_KIND_SANITIZE_THREAD: return Attribute::SanitizeThread; case bitc::ATTR_KIND_SANITIZE_MEMORY: return Attribute::SanitizeMemory; case bitc::ATTR_KIND_SWIFT_ERROR: return Attribute::SwiftError; case bitc::ATTR_KIND_SWIFT_SELF: return Attribute::SwiftSelf; case bitc::ATTR_KIND_UW_TABLE: return Attribute::UWTable; case bitc::ATTR_KIND_WRITEONLY: return Attribute::WriteOnly; case bitc::ATTR_KIND_Z_EXT: return Attribute::ZExt; } } Error BitcodeReader::parseAlignmentValue(uint64_t Exponent, unsigned &Alignment) { // Note: Alignment in bitcode files is incremented by 1, so that zero // can be used for default alignment. if (Exponent > Value::MaxAlignmentExponent + 1) return error("Invalid alignment value"); Alignment = (1 << static_cast(Exponent)) >> 1; return Error::success(); } Error BitcodeReader::parseAttrKind(uint64_t Code, Attribute::AttrKind *Kind) { *Kind = getAttrFromCode(Code); if (*Kind == Attribute::None) return error("Unknown attribute kind (" + Twine(Code) + ")"); return Error::success(); } Error BitcodeReader::parseAttributeGroupBlock() { if (Stream.EnterSubBlock(bitc::PARAMATTR_GROUP_BLOCK_ID)) return error("Invalid record"); if (!MAttributeGroups.empty()) return error("Invalid multiple blocks"); SmallVector Record; // Read all the records. while (true) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: return Error::success(); case BitstreamEntry::Record: // The interesting case. break; } // Read a record. Record.clear(); switch (Stream.readRecord(Entry.ID, Record)) { default: // Default behavior: ignore. break; case bitc::PARAMATTR_GRP_CODE_ENTRY: { // ENTRY: [grpid, idx, a0, a1, ...] if (Record.size() < 3) return error("Invalid record"); uint64_t GrpID = Record[0]; uint64_t Idx = Record[1]; // Index of the object this attribute refers to. AttrBuilder B; for (unsigned i = 2, e = Record.size(); i != e; ++i) { if (Record[i] == 0) { // Enum attribute Attribute::AttrKind Kind; if (Error Err = parseAttrKind(Record[++i], &Kind)) return Err; B.addAttribute(Kind); } else if (Record[i] == 1) { // Integer attribute Attribute::AttrKind Kind; if (Error Err = parseAttrKind(Record[++i], &Kind)) return Err; if (Kind == Attribute::Alignment) B.addAlignmentAttr(Record[++i]); else if (Kind == Attribute::StackAlignment) B.addStackAlignmentAttr(Record[++i]); else if (Kind == Attribute::Dereferenceable) B.addDereferenceableAttr(Record[++i]); else if (Kind == Attribute::DereferenceableOrNull) B.addDereferenceableOrNullAttr(Record[++i]); else if (Kind == Attribute::AllocSize) B.addAllocSizeAttrFromRawRepr(Record[++i]); } else { // String attribute assert((Record[i] == 3 || Record[i] == 4) && "Invalid attribute group entry"); bool HasValue = (Record[i++] == 4); SmallString<64> KindStr; SmallString<64> ValStr; while (Record[i] != 0 && i != e) KindStr += Record[i++]; assert(Record[i] == 0 && "Kind string not null terminated"); if (HasValue) { // Has a value associated with it. ++i; // Skip the '0' that terminates the "kind" string. while (Record[i] != 0 && i != e) ValStr += Record[i++]; assert(Record[i] == 0 && "Value string not null terminated"); } B.addAttribute(KindStr.str(), ValStr.str()); } } MAttributeGroups[GrpID] = AttributeList::get(Context, Idx, B); break; } } } } Error BitcodeReader::parseTypeTable() { if (Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID_NEW)) return error("Invalid record"); return parseTypeTableBody(); } Error BitcodeReader::parseTypeTableBody() { if (!TypeList.empty()) return error("Invalid multiple blocks"); SmallVector Record; unsigned NumRecords = 0; SmallString<64> TypeName; // Read all the records for this type table. while (true) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: if (NumRecords != TypeList.size()) return error("Malformed block"); return Error::success(); case BitstreamEntry::Record: // The interesting case. break; } // Read a record. Record.clear(); Type *ResultTy = nullptr; switch (Stream.readRecord(Entry.ID, Record)) { default: return error("Invalid value"); case bitc::TYPE_CODE_NUMENTRY: // TYPE_CODE_NUMENTRY: [numentries] // TYPE_CODE_NUMENTRY contains a count of the number of types in the // type list. This allows us to reserve space. if (Record.size() < 1) return error("Invalid record"); TypeList.resize(Record[0]); continue; case bitc::TYPE_CODE_VOID: // VOID ResultTy = Type::getVoidTy(Context); break; case bitc::TYPE_CODE_HALF: // HALF ResultTy = Type::getHalfTy(Context); break; case bitc::TYPE_CODE_FLOAT: // FLOAT ResultTy = Type::getFloatTy(Context); break; case bitc::TYPE_CODE_DOUBLE: // DOUBLE ResultTy = Type::getDoubleTy(Context); break; case bitc::TYPE_CODE_X86_FP80: // X86_FP80 ResultTy = Type::getX86_FP80Ty(Context); break; case bitc::TYPE_CODE_FP128: // FP128 ResultTy = Type::getFP128Ty(Context); break; case bitc::TYPE_CODE_PPC_FP128: // PPC_FP128 ResultTy = Type::getPPC_FP128Ty(Context); break; case bitc::TYPE_CODE_LABEL: // LABEL ResultTy = Type::getLabelTy(Context); break; case bitc::TYPE_CODE_METADATA: // METADATA ResultTy = Type::getMetadataTy(Context); break; case bitc::TYPE_CODE_X86_MMX: // X86_MMX ResultTy = Type::getX86_MMXTy(Context); break; case bitc::TYPE_CODE_TOKEN: // TOKEN ResultTy = Type::getTokenTy(Context); break; case bitc::TYPE_CODE_INTEGER: { // INTEGER: [width] if (Record.size() < 1) return error("Invalid record"); uint64_t NumBits = Record[0]; if (NumBits < IntegerType::MIN_INT_BITS || NumBits > IntegerType::MAX_INT_BITS) return error("Bitwidth for integer type out of range"); ResultTy = IntegerType::get(Context, NumBits); break; } case bitc::TYPE_CODE_POINTER: { // POINTER: [pointee type] or // [pointee type, address space] if (Record.size() < 1) return error("Invalid record"); unsigned AddressSpace = 0; if (Record.size() == 2) AddressSpace = Record[1]; ResultTy = getTypeByID(Record[0]); if (!ResultTy || !PointerType::isValidElementType(ResultTy)) return error("Invalid type"); ResultTy = PointerType::get(ResultTy, AddressSpace); break; } case bitc::TYPE_CODE_FUNCTION_OLD: { // FIXME: attrid is dead, remove it in LLVM 4.0 // FUNCTION: [vararg, attrid, retty, paramty x N] if (Record.size() < 3) return error("Invalid record"); SmallVector ArgTys; for (unsigned i = 3, e = Record.size(); i != e; ++i) { if (Type *T = getTypeByID(Record[i])) ArgTys.push_back(T); else break; } ResultTy = getTypeByID(Record[2]); if (!ResultTy || ArgTys.size() < Record.size()-3) return error("Invalid type"); ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]); break; } case bitc::TYPE_CODE_FUNCTION: { // FUNCTION: [vararg, retty, paramty x N] if (Record.size() < 2) return error("Invalid record"); SmallVector ArgTys; for (unsigned i = 2, e = Record.size(); i != e; ++i) { if (Type *T = getTypeByID(Record[i])) { if (!FunctionType::isValidArgumentType(T)) return error("Invalid function argument type"); ArgTys.push_back(T); } else break; } ResultTy = getTypeByID(Record[1]); if (!ResultTy || ArgTys.size() < Record.size()-2) return error("Invalid type"); ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]); break; } case bitc::TYPE_CODE_STRUCT_ANON: { // STRUCT: [ispacked, eltty x N] if (Record.size() < 1) return error("Invalid record"); SmallVector EltTys; for (unsigned i = 1, e = Record.size(); i != e; ++i) { if (Type *T = getTypeByID(Record[i])) EltTys.push_back(T); else break; } if (EltTys.size() != Record.size()-1) return error("Invalid type"); ResultTy = StructType::get(Context, EltTys, Record[0]); break; } case bitc::TYPE_CODE_STRUCT_NAME: // STRUCT_NAME: [strchr x N] if (convertToString(Record, 0, TypeName)) return error("Invalid record"); continue; case bitc::TYPE_CODE_STRUCT_NAMED: { // STRUCT: [ispacked, eltty x N] if (Record.size() < 1) return error("Invalid record"); if (NumRecords >= TypeList.size()) return error("Invalid TYPE table"); // Check to see if this was forward referenced, if so fill in the temp. StructType *Res = cast_or_null(TypeList[NumRecords]); if (Res) { Res->setName(TypeName); TypeList[NumRecords] = nullptr; } else // Otherwise, create a new struct. Res = createIdentifiedStructType(Context, TypeName); TypeName.clear(); SmallVector EltTys; for (unsigned i = 1, e = Record.size(); i != e; ++i) { if (Type *T = getTypeByID(Record[i])) EltTys.push_back(T); else break; } if (EltTys.size() != Record.size()-1) return error("Invalid record"); Res->setBody(EltTys, Record[0]); ResultTy = Res; break; } case bitc::TYPE_CODE_OPAQUE: { // OPAQUE: [] if (Record.size() != 1) return error("Invalid record"); if (NumRecords >= TypeList.size()) return error("Invalid TYPE table"); // Check to see if this was forward referenced, if so fill in the temp. StructType *Res = cast_or_null(TypeList[NumRecords]); if (Res) { Res->setName(TypeName); TypeList[NumRecords] = nullptr; } else // Otherwise, create a new struct with no body. Res = createIdentifiedStructType(Context, TypeName); TypeName.clear(); ResultTy = Res; break; } case bitc::TYPE_CODE_ARRAY: // ARRAY: [numelts, eltty] if (Record.size() < 2) return error("Invalid record"); ResultTy = getTypeByID(Record[1]); if (!ResultTy || !ArrayType::isValidElementType(ResultTy)) return error("Invalid type"); ResultTy = ArrayType::get(ResultTy, Record[0]); break; case bitc::TYPE_CODE_VECTOR: // VECTOR: [numelts, eltty] if (Record.size() < 2) return error("Invalid record"); if (Record[0] == 0) return error("Invalid vector length"); ResultTy = getTypeByID(Record[1]); if (!ResultTy || !StructType::isValidElementType(ResultTy)) return error("Invalid type"); ResultTy = VectorType::get(ResultTy, Record[0]); break; } if (NumRecords >= TypeList.size()) return error("Invalid TYPE table"); if (TypeList[NumRecords]) return error( "Invalid TYPE table: Only named structs can be forward referenced"); assert(ResultTy && "Didn't read a type?"); TypeList[NumRecords++] = ResultTy; } } Error BitcodeReader::parseOperandBundleTags() { if (Stream.EnterSubBlock(bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID)) return error("Invalid record"); if (!BundleTags.empty()) return error("Invalid multiple blocks"); SmallVector Record; while (true) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: return Error::success(); case BitstreamEntry::Record: // The interesting case. break; } // Tags are implicitly mapped to integers by their order. if (Stream.readRecord(Entry.ID, Record) != bitc::OPERAND_BUNDLE_TAG) return error("Invalid record"); // OPERAND_BUNDLE_TAG: [strchr x N] BundleTags.emplace_back(); if (convertToString(Record, 0, BundleTags.back())) return error("Invalid record"); Record.clear(); } } Error BitcodeReader::parseSyncScopeNames() { if (Stream.EnterSubBlock(bitc::SYNC_SCOPE_NAMES_BLOCK_ID)) return error("Invalid record"); if (!SSIDs.empty()) return error("Invalid multiple synchronization scope names blocks"); SmallVector Record; while (true) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: if (SSIDs.empty()) return error("Invalid empty synchronization scope names block"); return Error::success(); case BitstreamEntry::Record: // The interesting case. break; } // Synchronization scope names are implicitly mapped to synchronization // scope IDs by their order. if (Stream.readRecord(Entry.ID, Record) != bitc::SYNC_SCOPE_NAME) return error("Invalid record"); SmallString<16> SSN; if (convertToString(Record, 0, SSN)) return error("Invalid record"); SSIDs.push_back(Context.getOrInsertSyncScopeID(SSN)); Record.clear(); } } /// Associate a value with its name from the given index in the provided record. Expected BitcodeReader::recordValue(SmallVectorImpl &Record, unsigned NameIndex, Triple &TT) { SmallString<128> ValueName; if (convertToString(Record, NameIndex, ValueName)) return error("Invalid record"); unsigned ValueID = Record[0]; if (ValueID >= ValueList.size() || !ValueList[ValueID]) return error("Invalid record"); Value *V = ValueList[ValueID]; StringRef NameStr(ValueName.data(), ValueName.size()); if (NameStr.find_first_of(0) != StringRef::npos) return error("Invalid value name"); V->setName(NameStr); auto *GO = dyn_cast(V); if (GO) { if (GO->getComdat() == reinterpret_cast(1)) { if (TT.supportsCOMDAT()) GO->setComdat(TheModule->getOrInsertComdat(V->getName())); else GO->setComdat(nullptr); } } return V; } /// Helper to note and return the current location, and jump to the given /// offset. static uint64_t jumpToValueSymbolTable(uint64_t Offset, BitstreamCursor &Stream) { // Save the current parsing location so we can jump back at the end // of the VST read. uint64_t CurrentBit = Stream.GetCurrentBitNo(); Stream.JumpToBit(Offset * 32); #ifndef NDEBUG // Do some checking if we are in debug mode. BitstreamEntry Entry = Stream.advance(); assert(Entry.Kind == BitstreamEntry::SubBlock); assert(Entry.ID == bitc::VALUE_SYMTAB_BLOCK_ID); #else // In NDEBUG mode ignore the output so we don't get an unused variable // warning. Stream.advance(); #endif return CurrentBit; } void BitcodeReader::setDeferredFunctionInfo(unsigned FuncBitcodeOffsetDelta, Function *F, ArrayRef Record) { // Note that we subtract 1 here because the offset is relative to one word // before the start of the identification or module block, which was // historically always the start of the regular bitcode header. uint64_t FuncWordOffset = Record[1] - 1; uint64_t FuncBitOffset = FuncWordOffset * 32; DeferredFunctionInfo[F] = FuncBitOffset + FuncBitcodeOffsetDelta; // Set the LastFunctionBlockBit to point to the last function block. // Later when parsing is resumed after function materialization, // we can simply skip that last function block. if (FuncBitOffset > LastFunctionBlockBit) LastFunctionBlockBit = FuncBitOffset; } /// Read a new-style GlobalValue symbol table. Error BitcodeReader::parseGlobalValueSymbolTable() { unsigned FuncBitcodeOffsetDelta = Stream.getAbbrevIDWidth() + bitc::BlockIDWidth; if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID)) return error("Invalid record"); SmallVector Record; while (true) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); switch (Entry.Kind) { case BitstreamEntry::SubBlock: case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: return Error::success(); case BitstreamEntry::Record: break; } Record.clear(); switch (Stream.readRecord(Entry.ID, Record)) { case bitc::VST_CODE_FNENTRY: // [valueid, offset] setDeferredFunctionInfo(FuncBitcodeOffsetDelta, cast(ValueList[Record[0]]), Record); break; } } } /// Parse the value symbol table at either the current parsing location or /// at the given bit offset if provided. Error BitcodeReader::parseValueSymbolTable(uint64_t Offset) { uint64_t CurrentBit; // Pass in the Offset to distinguish between calling for the module-level // VST (where we want to jump to the VST offset) and the function-level // VST (where we don't). if (Offset > 0) { CurrentBit = jumpToValueSymbolTable(Offset, Stream); // If this module uses a string table, read this as a module-level VST. if (UseStrtab) { if (Error Err = parseGlobalValueSymbolTable()) return Err; Stream.JumpToBit(CurrentBit); return Error::success(); } // Otherwise, the VST will be in a similar format to a function-level VST, // and will contain symbol names. } // Compute the delta between the bitcode indices in the VST (the word offset // to the word-aligned ENTER_SUBBLOCK for the function block, and that // expected by the lazy reader. The reader's EnterSubBlock expects to have // already read the ENTER_SUBBLOCK code (size getAbbrevIDWidth) and BlockID // (size BlockIDWidth). Note that we access the stream's AbbrevID width here // just before entering the VST subblock because: 1) the EnterSubBlock // changes the AbbrevID width; 2) the VST block is nested within the same // outer MODULE_BLOCK as the FUNCTION_BLOCKs and therefore have the same // AbbrevID width before calling EnterSubBlock; and 3) when we want to // jump to the FUNCTION_BLOCK using this offset later, we don't want // to rely on the stream's AbbrevID width being that of the MODULE_BLOCK. unsigned FuncBitcodeOffsetDelta = Stream.getAbbrevIDWidth() + bitc::BlockIDWidth; if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID)) return error("Invalid record"); SmallVector Record; Triple TT(TheModule->getTargetTriple()); // Read all the records for this value table. SmallString<128> ValueName; while (true) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: if (Offset > 0) Stream.JumpToBit(CurrentBit); return Error::success(); case BitstreamEntry::Record: // The interesting case. break; } // Read a record. Record.clear(); switch (Stream.readRecord(Entry.ID, Record)) { default: // Default behavior: unknown type. break; case bitc::VST_CODE_ENTRY: { // VST_CODE_ENTRY: [valueid, namechar x N] Expected ValOrErr = recordValue(Record, 1, TT); if (Error Err = ValOrErr.takeError()) return Err; ValOrErr.get(); break; } case bitc::VST_CODE_FNENTRY: { // VST_CODE_FNENTRY: [valueid, offset, namechar x N] Expected ValOrErr = recordValue(Record, 2, TT); if (Error Err = ValOrErr.takeError()) return Err; Value *V = ValOrErr.get(); // Ignore function offsets emitted for aliases of functions in older // versions of LLVM. if (auto *F = dyn_cast(V)) setDeferredFunctionInfo(FuncBitcodeOffsetDelta, F, Record); break; } case bitc::VST_CODE_BBENTRY: { if (convertToString(Record, 1, ValueName)) return error("Invalid record"); BasicBlock *BB = getBasicBlock(Record[0]); if (!BB) return error("Invalid record"); BB->setName(StringRef(ValueName.data(), ValueName.size())); ValueName.clear(); break; } } } } /// Decode a signed value stored with the sign bit in the LSB for dense VBR /// encoding. uint64_t BitcodeReader::decodeSignRotatedValue(uint64_t V) { if ((V & 1) == 0) return V >> 1; if (V != 1) return -(V >> 1); // There is no such thing as -0 with integers. "-0" really means MININT. return 1ULL << 63; } /// Resolve all of the initializers for global values and aliases that we can. Error BitcodeReader::resolveGlobalAndIndirectSymbolInits() { std::vector> GlobalInitWorklist; std::vector> IndirectSymbolInitWorklist; std::vector> FunctionPrefixWorklist; std::vector> FunctionPrologueWorklist; std::vector> FunctionPersonalityFnWorklist; GlobalInitWorklist.swap(GlobalInits); IndirectSymbolInitWorklist.swap(IndirectSymbolInits); FunctionPrefixWorklist.swap(FunctionPrefixes); FunctionPrologueWorklist.swap(FunctionPrologues); FunctionPersonalityFnWorklist.swap(FunctionPersonalityFns); while (!GlobalInitWorklist.empty()) { unsigned ValID = GlobalInitWorklist.back().second; if (ValID >= ValueList.size()) { // Not ready to resolve this yet, it requires something later in the file. GlobalInits.push_back(GlobalInitWorklist.back()); } else { if (Constant *C = dyn_cast_or_null(ValueList[ValID])) GlobalInitWorklist.back().first->setInitializer(C); else return error("Expected a constant"); } GlobalInitWorklist.pop_back(); } while (!IndirectSymbolInitWorklist.empty()) { unsigned ValID = IndirectSymbolInitWorklist.back().second; if (ValID >= ValueList.size()) { IndirectSymbolInits.push_back(IndirectSymbolInitWorklist.back()); } else { Constant *C = dyn_cast_or_null(ValueList[ValID]); if (!C) return error("Expected a constant"); GlobalIndirectSymbol *GIS = IndirectSymbolInitWorklist.back().first; if (isa(GIS) && C->getType() != GIS->getType()) return error("Alias and aliasee types don't match"); GIS->setIndirectSymbol(C); } IndirectSymbolInitWorklist.pop_back(); } while (!FunctionPrefixWorklist.empty()) { unsigned ValID = FunctionPrefixWorklist.back().second; if (ValID >= ValueList.size()) { FunctionPrefixes.push_back(FunctionPrefixWorklist.back()); } else { if (Constant *C = dyn_cast_or_null(ValueList[ValID])) FunctionPrefixWorklist.back().first->setPrefixData(C); else return error("Expected a constant"); } FunctionPrefixWorklist.pop_back(); } while (!FunctionPrologueWorklist.empty()) { unsigned ValID = FunctionPrologueWorklist.back().second; if (ValID >= ValueList.size()) { FunctionPrologues.push_back(FunctionPrologueWorklist.back()); } else { if (Constant *C = dyn_cast_or_null(ValueList[ValID])) FunctionPrologueWorklist.back().first->setPrologueData(C); else return error("Expected a constant"); } FunctionPrologueWorklist.pop_back(); } while (!FunctionPersonalityFnWorklist.empty()) { unsigned ValID = FunctionPersonalityFnWorklist.back().second; if (ValID >= ValueList.size()) { FunctionPersonalityFns.push_back(FunctionPersonalityFnWorklist.back()); } else { if (Constant *C = dyn_cast_or_null(ValueList[ValID])) FunctionPersonalityFnWorklist.back().first->setPersonalityFn(C); else return error("Expected a constant"); } FunctionPersonalityFnWorklist.pop_back(); } return Error::success(); } static APInt readWideAPInt(ArrayRef Vals, unsigned TypeBits) { SmallVector Words(Vals.size()); transform(Vals, Words.begin(), BitcodeReader::decodeSignRotatedValue); return APInt(TypeBits, Words); } Error BitcodeReader::parseConstants() { if (Stream.EnterSubBlock(bitc::CONSTANTS_BLOCK_ID)) return error("Invalid record"); SmallVector Record; // Read all the records for this value table. Type *CurTy = Type::getInt32Ty(Context); unsigned NextCstNo = ValueList.size(); while (true) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: if (NextCstNo != ValueList.size()) return error("Invalid constant reference"); // Once all the constants have been read, go through and resolve forward // references. ValueList.resolveConstantForwardRefs(); return Error::success(); case BitstreamEntry::Record: // The interesting case. break; } // Read a record. Record.clear(); Type *VoidType = Type::getVoidTy(Context); Value *V = nullptr; unsigned BitCode = Stream.readRecord(Entry.ID, Record); switch (BitCode) { default: // Default behavior: unknown constant case bitc::CST_CODE_UNDEF: // UNDEF V = UndefValue::get(CurTy); break; case bitc::CST_CODE_SETTYPE: // SETTYPE: [typeid] if (Record.empty()) return error("Invalid record"); if (Record[0] >= TypeList.size() || !TypeList[Record[0]]) return error("Invalid record"); if (TypeList[Record[0]] == VoidType) return error("Invalid constant type"); CurTy = TypeList[Record[0]]; continue; // Skip the ValueList manipulation. case bitc::CST_CODE_NULL: // NULL V = Constant::getNullValue(CurTy); break; case bitc::CST_CODE_INTEGER: // INTEGER: [intval] if (!CurTy->isIntegerTy() || Record.empty()) return error("Invalid record"); V = ConstantInt::get(CurTy, decodeSignRotatedValue(Record[0])); break; case bitc::CST_CODE_WIDE_INTEGER: {// WIDE_INTEGER: [n x intval] if (!CurTy->isIntegerTy() || Record.empty()) return error("Invalid record"); APInt VInt = readWideAPInt(Record, cast(CurTy)->getBitWidth()); V = ConstantInt::get(Context, VInt); break; } case bitc::CST_CODE_FLOAT: { // FLOAT: [fpval] if (Record.empty()) return error("Invalid record"); if (CurTy->isHalfTy()) V = ConstantFP::get(Context, APFloat(APFloat::IEEEhalf(), APInt(16, (uint16_t)Record[0]))); else if (CurTy->isFloatTy()) V = ConstantFP::get(Context, APFloat(APFloat::IEEEsingle(), APInt(32, (uint32_t)Record[0]))); else if (CurTy->isDoubleTy()) V = ConstantFP::get(Context, APFloat(APFloat::IEEEdouble(), APInt(64, Record[0]))); else if (CurTy->isX86_FP80Ty()) { // Bits are not stored the same way as a normal i80 APInt, compensate. uint64_t Rearrange[2]; Rearrange[0] = (Record[1] & 0xffffLL) | (Record[0] << 16); Rearrange[1] = Record[0] >> 48; V = ConstantFP::get(Context, APFloat(APFloat::x87DoubleExtended(), APInt(80, Rearrange))); } else if (CurTy->isFP128Ty()) V = ConstantFP::get(Context, APFloat(APFloat::IEEEquad(), APInt(128, Record))); else if (CurTy->isPPC_FP128Ty()) V = ConstantFP::get(Context, APFloat(APFloat::PPCDoubleDouble(), APInt(128, Record))); else V = UndefValue::get(CurTy); break; } case bitc::CST_CODE_AGGREGATE: {// AGGREGATE: [n x value number] if (Record.empty()) return error("Invalid record"); unsigned Size = Record.size(); SmallVector Elts; if (StructType *STy = dyn_cast(CurTy)) { for (unsigned i = 0; i != Size; ++i) Elts.push_back(ValueList.getConstantFwdRef(Record[i], STy->getElementType(i))); V = ConstantStruct::get(STy, Elts); } else if (ArrayType *ATy = dyn_cast(CurTy)) { Type *EltTy = ATy->getElementType(); for (unsigned i = 0; i != Size; ++i) Elts.push_back(ValueList.getConstantFwdRef(Record[i], EltTy)); V = ConstantArray::get(ATy, Elts); } else if (VectorType *VTy = dyn_cast(CurTy)) { Type *EltTy = VTy->getElementType(); for (unsigned i = 0; i != Size; ++i) Elts.push_back(ValueList.getConstantFwdRef(Record[i], EltTy)); V = ConstantVector::get(Elts); } else { V = UndefValue::get(CurTy); } break; } case bitc::CST_CODE_STRING: // STRING: [values] case bitc::CST_CODE_CSTRING: { // CSTRING: [values] if (Record.empty()) return error("Invalid record"); SmallString<16> Elts(Record.begin(), Record.end()); V = ConstantDataArray::getString(Context, Elts, BitCode == bitc::CST_CODE_CSTRING); break; } case bitc::CST_CODE_DATA: {// DATA: [n x value] if (Record.empty()) return error("Invalid record"); Type *EltTy = cast(CurTy)->getElementType(); if (EltTy->isIntegerTy(8)) { SmallVector Elts(Record.begin(), Record.end()); if (isa(CurTy)) V = ConstantDataVector::get(Context, Elts); else V = ConstantDataArray::get(Context, Elts); } else if (EltTy->isIntegerTy(16)) { SmallVector Elts(Record.begin(), Record.end()); if (isa(CurTy)) V = ConstantDataVector::get(Context, Elts); else V = ConstantDataArray::get(Context, Elts); } else if (EltTy->isIntegerTy(32)) { SmallVector Elts(Record.begin(), Record.end()); if (isa(CurTy)) V = ConstantDataVector::get(Context, Elts); else V = ConstantDataArray::get(Context, Elts); } else if (EltTy->isIntegerTy(64)) { SmallVector Elts(Record.begin(), Record.end()); if (isa(CurTy)) V = ConstantDataVector::get(Context, Elts); else V = ConstantDataArray::get(Context, Elts); } else if (EltTy->isHalfTy()) { SmallVector Elts(Record.begin(), Record.end()); if (isa(CurTy)) V = ConstantDataVector::getFP(Context, Elts); else V = ConstantDataArray::getFP(Context, Elts); } else if (EltTy->isFloatTy()) { SmallVector Elts(Record.begin(), Record.end()); if (isa(CurTy)) V = ConstantDataVector::getFP(Context, Elts); else V = ConstantDataArray::getFP(Context, Elts); } else if (EltTy->isDoubleTy()) { SmallVector Elts(Record.begin(), Record.end()); if (isa(CurTy)) V = ConstantDataVector::getFP(Context, Elts); else V = ConstantDataArray::getFP(Context, Elts); } else { return error("Invalid type for value"); } break; } case bitc::CST_CODE_CE_BINOP: { // CE_BINOP: [opcode, opval, opval] if (Record.size() < 3) return error("Invalid record"); int Opc = getDecodedBinaryOpcode(Record[0], CurTy); if (Opc < 0) { V = UndefValue::get(CurTy); // Unknown binop. } else { Constant *LHS = ValueList.getConstantFwdRef(Record[1], CurTy); Constant *RHS = ValueList.getConstantFwdRef(Record[2], CurTy); unsigned Flags = 0; if (Record.size() >= 4) { if (Opc == Instruction::Add || Opc == Instruction::Sub || Opc == Instruction::Mul || Opc == Instruction::Shl) { if (Record[3] & (1 << bitc::OBO_NO_SIGNED_WRAP)) Flags |= OverflowingBinaryOperator::NoSignedWrap; if (Record[3] & (1 << bitc::OBO_NO_UNSIGNED_WRAP)) Flags |= OverflowingBinaryOperator::NoUnsignedWrap; } else if (Opc == Instruction::SDiv || Opc == Instruction::UDiv || Opc == Instruction::LShr || Opc == Instruction::AShr) { if (Record[3] & (1 << bitc::PEO_EXACT)) Flags |= SDivOperator::IsExact; } } V = ConstantExpr::get(Opc, LHS, RHS, Flags); } break; } case bitc::CST_CODE_CE_CAST: { // CE_CAST: [opcode, opty, opval] if (Record.size() < 3) return error("Invalid record"); int Opc = getDecodedCastOpcode(Record[0]); if (Opc < 0) { V = UndefValue::get(CurTy); // Unknown cast. } else { Type *OpTy = getTypeByID(Record[1]); if (!OpTy) return error("Invalid record"); Constant *Op = ValueList.getConstantFwdRef(Record[2], OpTy); V = UpgradeBitCastExpr(Opc, Op, CurTy); if (!V) V = ConstantExpr::getCast(Opc, Op, CurTy); } break; } case bitc::CST_CODE_CE_INBOUNDS_GEP: // [ty, n x operands] case bitc::CST_CODE_CE_GEP: // [ty, n x operands] case bitc::CST_CODE_CE_GEP_WITH_INRANGE_INDEX: { // [ty, flags, n x // operands] unsigned OpNum = 0; Type *PointeeType = nullptr; if (BitCode == bitc::CST_CODE_CE_GEP_WITH_INRANGE_INDEX || Record.size() % 2) PointeeType = getTypeByID(Record[OpNum++]); bool InBounds = false; Optional InRangeIndex; if (BitCode == bitc::CST_CODE_CE_GEP_WITH_INRANGE_INDEX) { uint64_t Op = Record[OpNum++]; InBounds = Op & 1; InRangeIndex = Op >> 1; } else if (BitCode == bitc::CST_CODE_CE_INBOUNDS_GEP) InBounds = true; SmallVector Elts; while (OpNum != Record.size()) { Type *ElTy = getTypeByID(Record[OpNum++]); if (!ElTy) return error("Invalid record"); Elts.push_back(ValueList.getConstantFwdRef(Record[OpNum++], ElTy)); } if (PointeeType && PointeeType != cast(Elts[0]->getType()->getScalarType()) ->getElementType()) return error("Explicit gep operator type does not match pointee type " "of pointer operand"); if (Elts.size() < 1) return error("Invalid gep with no operands"); ArrayRef Indices(Elts.begin() + 1, Elts.end()); V = ConstantExpr::getGetElementPtr(PointeeType, Elts[0], Indices, InBounds, InRangeIndex); break; } case bitc::CST_CODE_CE_SELECT: { // CE_SELECT: [opval#, opval#, opval#] if (Record.size() < 3) return error("Invalid record"); Type *SelectorTy = Type::getInt1Ty(Context); // The selector might be an i1 or an // Get the type from the ValueList before getting a forward ref. if (VectorType *VTy = dyn_cast(CurTy)) if (Value *V = ValueList[Record[0]]) if (SelectorTy != V->getType()) SelectorTy = VectorType::get(SelectorTy, VTy->getNumElements()); V = ConstantExpr::getSelect(ValueList.getConstantFwdRef(Record[0], SelectorTy), ValueList.getConstantFwdRef(Record[1],CurTy), ValueList.getConstantFwdRef(Record[2],CurTy)); break; } case bitc::CST_CODE_CE_EXTRACTELT : { // CE_EXTRACTELT: [opty, opval, opty, opval] if (Record.size() < 3) return error("Invalid record"); VectorType *OpTy = dyn_cast_or_null(getTypeByID(Record[0])); if (!OpTy) return error("Invalid record"); Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy); Constant *Op1 = nullptr; if (Record.size() == 4) { Type *IdxTy = getTypeByID(Record[2]); if (!IdxTy) return error("Invalid record"); Op1 = ValueList.getConstantFwdRef(Record[3], IdxTy); } else // TODO: Remove with llvm 4.0 Op1 = ValueList.getConstantFwdRef(Record[2], Type::getInt32Ty(Context)); if (!Op1) return error("Invalid record"); V = ConstantExpr::getExtractElement(Op0, Op1); break; } case bitc::CST_CODE_CE_INSERTELT : { // CE_INSERTELT: [opval, opval, opty, opval] VectorType *OpTy = dyn_cast(CurTy); if (Record.size() < 3 || !OpTy) return error("Invalid record"); Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy); Constant *Op1 = ValueList.getConstantFwdRef(Record[1], OpTy->getElementType()); Constant *Op2 = nullptr; if (Record.size() == 4) { Type *IdxTy = getTypeByID(Record[2]); if (!IdxTy) return error("Invalid record"); Op2 = ValueList.getConstantFwdRef(Record[3], IdxTy); } else // TODO: Remove with llvm 4.0 Op2 = ValueList.getConstantFwdRef(Record[2], Type::getInt32Ty(Context)); if (!Op2) return error("Invalid record"); V = ConstantExpr::getInsertElement(Op0, Op1, Op2); break; } case bitc::CST_CODE_CE_SHUFFLEVEC: { // CE_SHUFFLEVEC: [opval, opval, opval] VectorType *OpTy = dyn_cast(CurTy); if (Record.size() < 3 || !OpTy) return error("Invalid record"); Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy); Constant *Op1 = ValueList.getConstantFwdRef(Record[1], OpTy); Type *ShufTy = VectorType::get(Type::getInt32Ty(Context), OpTy->getNumElements()); Constant *Op2 = ValueList.getConstantFwdRef(Record[2], ShufTy); V = ConstantExpr::getShuffleVector(Op0, Op1, Op2); break; } case bitc::CST_CODE_CE_SHUFVEC_EX: { // [opty, opval, opval, opval] VectorType *RTy = dyn_cast(CurTy); VectorType *OpTy = dyn_cast_or_null(getTypeByID(Record[0])); if (Record.size() < 4 || !RTy || !OpTy) return error("Invalid record"); Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy); Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy); Type *ShufTy = VectorType::get(Type::getInt32Ty(Context), RTy->getNumElements()); Constant *Op2 = ValueList.getConstantFwdRef(Record[3], ShufTy); V = ConstantExpr::getShuffleVector(Op0, Op1, Op2); break; } case bitc::CST_CODE_CE_CMP: { // CE_CMP: [opty, opval, opval, pred] if (Record.size() < 4) return error("Invalid record"); Type *OpTy = getTypeByID(Record[0]); if (!OpTy) return error("Invalid record"); Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy); Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy); if (OpTy->isFPOrFPVectorTy()) V = ConstantExpr::getFCmp(Record[3], Op0, Op1); else V = ConstantExpr::getICmp(Record[3], Op0, Op1); break; } // This maintains backward compatibility, pre-asm dialect keywords. // FIXME: Remove with the 4.0 release. case bitc::CST_CODE_INLINEASM_OLD: { if (Record.size() < 2) return error("Invalid record"); std::string AsmStr, ConstrStr; bool HasSideEffects = Record[0] & 1; bool IsAlignStack = Record[0] >> 1; unsigned AsmStrSize = Record[1]; if (2+AsmStrSize >= Record.size()) return error("Invalid record"); unsigned ConstStrSize = Record[2+AsmStrSize]; if (3+AsmStrSize+ConstStrSize > Record.size()) return error("Invalid record"); for (unsigned i = 0; i != AsmStrSize; ++i) AsmStr += (char)Record[2+i]; for (unsigned i = 0; i != ConstStrSize; ++i) ConstrStr += (char)Record[3+AsmStrSize+i]; PointerType *PTy = cast(CurTy); V = InlineAsm::get(cast(PTy->getElementType()), AsmStr, ConstrStr, HasSideEffects, IsAlignStack); break; } // This version adds support for the asm dialect keywords (e.g., // inteldialect). case bitc::CST_CODE_INLINEASM: { if (Record.size() < 2) return error("Invalid record"); std::string AsmStr, ConstrStr; bool HasSideEffects = Record[0] & 1; bool IsAlignStack = (Record[0] >> 1) & 1; unsigned AsmDialect = Record[0] >> 2; unsigned AsmStrSize = Record[1]; if (2+AsmStrSize >= Record.size()) return error("Invalid record"); unsigned ConstStrSize = Record[2+AsmStrSize]; if (3+AsmStrSize+ConstStrSize > Record.size()) return error("Invalid record"); for (unsigned i = 0; i != AsmStrSize; ++i) AsmStr += (char)Record[2+i]; for (unsigned i = 0; i != ConstStrSize; ++i) ConstrStr += (char)Record[3+AsmStrSize+i]; PointerType *PTy = cast(CurTy); V = InlineAsm::get(cast(PTy->getElementType()), AsmStr, ConstrStr, HasSideEffects, IsAlignStack, InlineAsm::AsmDialect(AsmDialect)); break; } case bitc::CST_CODE_BLOCKADDRESS:{ if (Record.size() < 3) return error("Invalid record"); Type *FnTy = getTypeByID(Record[0]); if (!FnTy) return error("Invalid record"); Function *Fn = dyn_cast_or_null(ValueList.getConstantFwdRef(Record[1],FnTy)); if (!Fn) return error("Invalid record"); // If the function is already parsed we can insert the block address right // away. BasicBlock *BB; unsigned BBID = Record[2]; if (!BBID) // Invalid reference to entry block. return error("Invalid ID"); if (!Fn->empty()) { Function::iterator BBI = Fn->begin(), BBE = Fn->end(); for (size_t I = 0, E = BBID; I != E; ++I) { if (BBI == BBE) return error("Invalid ID"); ++BBI; } BB = &*BBI; } else { // Otherwise insert a placeholder and remember it so it can be inserted // when the function is parsed. auto &FwdBBs = BasicBlockFwdRefs[Fn]; if (FwdBBs.empty()) BasicBlockFwdRefQueue.push_back(Fn); if (FwdBBs.size() < BBID + 1) FwdBBs.resize(BBID + 1); if (!FwdBBs[BBID]) FwdBBs[BBID] = BasicBlock::Create(Context); BB = FwdBBs[BBID]; } V = BlockAddress::get(Fn, BB); break; } } ValueList.assignValue(V, NextCstNo); ++NextCstNo; } } Error BitcodeReader::parseUseLists() { if (Stream.EnterSubBlock(bitc::USELIST_BLOCK_ID)) return error("Invalid record"); // Read all the records. SmallVector Record; while (true) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: return Error::success(); case BitstreamEntry::Record: // The interesting case. break; } // Read a use list record. Record.clear(); bool IsBB = false; switch (Stream.readRecord(Entry.ID, Record)) { default: // Default behavior: unknown type. break; case bitc::USELIST_CODE_BB: IsBB = true; LLVM_FALLTHROUGH; case bitc::USELIST_CODE_DEFAULT: { unsigned RecordLength = Record.size(); if (RecordLength < 3) // Records should have at least an ID and two indexes. return error("Invalid record"); unsigned ID = Record.back(); Record.pop_back(); Value *V; if (IsBB) { assert(ID < FunctionBBs.size() && "Basic block not found"); V = FunctionBBs[ID]; } else V = ValueList[ID]; unsigned NumUses = 0; SmallDenseMap Order; for (const Use &U : V->materialized_uses()) { if (++NumUses > Record.size()) break; Order[&U] = Record[NumUses - 1]; } if (Order.size() != Record.size() || NumUses > Record.size()) // Mismatches can happen if the functions are being materialized lazily // (out-of-order), or a value has been upgraded. break; V->sortUseList([&](const Use &L, const Use &R) { return Order.lookup(&L) < Order.lookup(&R); }); break; } } } } /// When we see the block for metadata, remember where it is and then skip it. /// This lets us lazily deserialize the metadata. Error BitcodeReader::rememberAndSkipMetadata() { // Save the current stream state. uint64_t CurBit = Stream.GetCurrentBitNo(); DeferredMetadataInfo.push_back(CurBit); // Skip over the block for now. if (Stream.SkipBlock()) return error("Invalid record"); return Error::success(); } Error BitcodeReader::materializeMetadata() { for (uint64_t BitPos : DeferredMetadataInfo) { // Move the bit stream to the saved position. Stream.JumpToBit(BitPos); if (Error Err = MDLoader->parseModuleMetadata()) return Err; } // Upgrade "Linker Options" module flag to "llvm.linker.options" module-level // metadata. if (Metadata *Val = TheModule->getModuleFlag("Linker Options")) { NamedMDNode *LinkerOpts = TheModule->getOrInsertNamedMetadata("llvm.linker.options"); for (const MDOperand &MDOptions : cast(Val)->operands()) LinkerOpts->addOperand(cast(MDOptions)); } DeferredMetadataInfo.clear(); return Error::success(); } void BitcodeReader::setStripDebugInfo() { StripDebugInfo = true; } /// When we see the block for a function body, remember where it is and then /// skip it. This lets us lazily deserialize the functions. Error BitcodeReader::rememberAndSkipFunctionBody() { // Get the function we are talking about. if (FunctionsWithBodies.empty()) return error("Insufficient function protos"); Function *Fn = FunctionsWithBodies.back(); FunctionsWithBodies.pop_back(); // Save the current stream state. uint64_t CurBit = Stream.GetCurrentBitNo(); assert( (DeferredFunctionInfo[Fn] == 0 || DeferredFunctionInfo[Fn] == CurBit) && "Mismatch between VST and scanned function offsets"); DeferredFunctionInfo[Fn] = CurBit; // Skip over the function block for now. if (Stream.SkipBlock()) return error("Invalid record"); return Error::success(); } Error BitcodeReader::globalCleanup() { // Patch the initializers for globals and aliases up. if (Error Err = resolveGlobalAndIndirectSymbolInits()) return Err; if (!GlobalInits.empty() || !IndirectSymbolInits.empty()) return error("Malformed global initializer set"); // Look for intrinsic functions which need to be upgraded at some point for (Function &F : *TheModule) { MDLoader->upgradeDebugIntrinsics(F); Function *NewFn; if (UpgradeIntrinsicFunction(&F, NewFn)) UpgradedIntrinsics[&F] = NewFn; else if (auto Remangled = Intrinsic::remangleIntrinsicFunction(&F)) // Some types could be renamed during loading if several modules are // loaded in the same LLVMContext (LTO scenario). In this case we should // remangle intrinsics names as well. RemangledIntrinsics[&F] = Remangled.getValue(); } // Look for global variables which need to be renamed. for (GlobalVariable &GV : TheModule->globals()) UpgradeGlobalVariable(&GV); // Force deallocation of memory for these vectors to favor the client that // want lazy deserialization. std::vector>().swap(GlobalInits); std::vector>().swap( IndirectSymbolInits); return Error::success(); } /// Support for lazy parsing of function bodies. This is required if we /// either have an old bitcode file without a VST forward declaration record, /// or if we have an anonymous function being materialized, since anonymous /// functions do not have a name and are therefore not in the VST. Error BitcodeReader::rememberAndSkipFunctionBodies() { Stream.JumpToBit(NextUnreadBit); if (Stream.AtEndOfStream()) return error("Could not find function in stream"); if (!SeenFirstFunctionBody) return error("Trying to materialize functions before seeing function blocks"); // An old bitcode file with the symbol table at the end would have // finished the parse greedily. assert(SeenValueSymbolTable); SmallVector Record; while (true) { BitstreamEntry Entry = Stream.advance(); switch (Entry.Kind) { default: return error("Expect SubBlock"); case BitstreamEntry::SubBlock: switch (Entry.ID) { default: return error("Expect function block"); case bitc::FUNCTION_BLOCK_ID: if (Error Err = rememberAndSkipFunctionBody()) return Err; NextUnreadBit = Stream.GetCurrentBitNo(); return Error::success(); } } } } bool BitcodeReaderBase::readBlockInfo() { Optional NewBlockInfo = Stream.ReadBlockInfoBlock(); if (!NewBlockInfo) return true; BlockInfo = std::move(*NewBlockInfo); return false; } Error BitcodeReader::parseComdatRecord(ArrayRef Record) { // v1: [selection_kind, name] // v2: [strtab_offset, strtab_size, selection_kind] StringRef Name; std::tie(Name, Record) = readNameFromStrtab(Record); if (Record.empty()) return error("Invalid record"); Comdat::SelectionKind SK = getDecodedComdatSelectionKind(Record[0]); std::string OldFormatName; if (!UseStrtab) { if (Record.size() < 2) return error("Invalid record"); unsigned ComdatNameSize = Record[1]; OldFormatName.reserve(ComdatNameSize); for (unsigned i = 0; i != ComdatNameSize; ++i) OldFormatName += (char)Record[2 + i]; Name = OldFormatName; } Comdat *C = TheModule->getOrInsertComdat(Name); C->setSelectionKind(SK); ComdatList.push_back(C); return Error::success(); } Error BitcodeReader::parseGlobalVarRecord(ArrayRef Record) { // v1: [pointer type, isconst, initid, linkage, alignment, section, // visibility, threadlocal, unnamed_addr, externally_initialized, // dllstorageclass, comdat, attributes, preemption specifier] (name in VST) // v2: [strtab_offset, strtab_size, v1] StringRef Name; std::tie(Name, Record) = readNameFromStrtab(Record); if (Record.size() < 6) return error("Invalid record"); Type *Ty = getTypeByID(Record[0]); if (!Ty) return error("Invalid record"); bool isConstant = Record[1] & 1; bool explicitType = Record[1] & 2; unsigned AddressSpace; if (explicitType) { AddressSpace = Record[1] >> 2; } else { if (!Ty->isPointerTy()) return error("Invalid type for value"); AddressSpace = cast(Ty)->getAddressSpace(); Ty = cast(Ty)->getElementType(); } uint64_t RawLinkage = Record[3]; GlobalValue::LinkageTypes Linkage = getDecodedLinkage(RawLinkage); unsigned Alignment; if (Error Err = parseAlignmentValue(Record[4], Alignment)) return Err; std::string Section; if (Record[5]) { if (Record[5] - 1 >= SectionTable.size()) return error("Invalid ID"); Section = SectionTable[Record[5] - 1]; } GlobalValue::VisibilityTypes Visibility = GlobalValue::DefaultVisibility; // Local linkage must have default visibility. if (Record.size() > 6 && !GlobalValue::isLocalLinkage(Linkage)) // FIXME: Change to an error if non-default in 4.0. Visibility = getDecodedVisibility(Record[6]); GlobalVariable::ThreadLocalMode TLM = GlobalVariable::NotThreadLocal; if (Record.size() > 7) TLM = getDecodedThreadLocalMode(Record[7]); GlobalValue::UnnamedAddr UnnamedAddr = GlobalValue::UnnamedAddr::None; if (Record.size() > 8) UnnamedAddr = getDecodedUnnamedAddrType(Record[8]); bool ExternallyInitialized = false; if (Record.size() > 9) ExternallyInitialized = Record[9]; GlobalVariable *NewGV = new GlobalVariable(*TheModule, Ty, isConstant, Linkage, nullptr, Name, nullptr, TLM, AddressSpace, ExternallyInitialized); NewGV->setAlignment(Alignment); if (!Section.empty()) NewGV->setSection(Section); NewGV->setVisibility(Visibility); NewGV->setUnnamedAddr(UnnamedAddr); if (Record.size() > 10) NewGV->setDLLStorageClass(getDecodedDLLStorageClass(Record[10])); else upgradeDLLImportExportLinkage(NewGV, RawLinkage); ValueList.push_back(NewGV); // Remember which value to use for the global initializer. if (unsigned InitID = Record[2]) GlobalInits.push_back(std::make_pair(NewGV, InitID - 1)); if (Record.size() > 11) { if (unsigned ComdatID = Record[11]) { if (ComdatID > ComdatList.size()) return error("Invalid global variable comdat ID"); NewGV->setComdat(ComdatList[ComdatID - 1]); } } else if (hasImplicitComdat(RawLinkage)) { NewGV->setComdat(reinterpret_cast(1)); } if (Record.size() > 12) { auto AS = getAttributes(Record[12]).getFnAttributes(); NewGV->setAttributes(AS); } if (Record.size() > 13) { NewGV->setDSOLocal(getDecodedDSOLocal(Record[13])); } return Error::success(); } Error BitcodeReader::parseFunctionRecord(ArrayRef Record) { // v1: [type, callingconv, isproto, linkage, paramattr, alignment, section, // visibility, gc, unnamed_addr, prologuedata, dllstorageclass, comdat, // prefixdata, personalityfn, preemption specifier] (name in VST) // v2: [strtab_offset, strtab_size, v1] StringRef Name; std::tie(Name, Record) = readNameFromStrtab(Record); if (Record.size() < 8) return error("Invalid record"); Type *Ty = getTypeByID(Record[0]); if (!Ty) return error("Invalid record"); if (auto *PTy = dyn_cast(Ty)) Ty = PTy->getElementType(); auto *FTy = dyn_cast(Ty); if (!FTy) return error("Invalid type for value"); auto CC = static_cast(Record[1]); if (CC & ~CallingConv::MaxID) return error("Invalid calling convention ID"); Function *Func = Function::Create(FTy, GlobalValue::ExternalLinkage, Name, TheModule); Func->setCallingConv(CC); bool isProto = Record[2]; uint64_t RawLinkage = Record[3]; Func->setLinkage(getDecodedLinkage(RawLinkage)); Func->setAttributes(getAttributes(Record[4])); unsigned Alignment; if (Error Err = parseAlignmentValue(Record[5], Alignment)) return Err; Func->setAlignment(Alignment); if (Record[6]) { if (Record[6] - 1 >= SectionTable.size()) return error("Invalid ID"); Func->setSection(SectionTable[Record[6] - 1]); } // Local linkage must have default visibility. if (!Func->hasLocalLinkage()) // FIXME: Change to an error if non-default in 4.0. Func->setVisibility(getDecodedVisibility(Record[7])); if (Record.size() > 8 && Record[8]) { if (Record[8] - 1 >= GCTable.size()) return error("Invalid ID"); Func->setGC(GCTable[Record[8] - 1]); } GlobalValue::UnnamedAddr UnnamedAddr = GlobalValue::UnnamedAddr::None; if (Record.size() > 9) UnnamedAddr = getDecodedUnnamedAddrType(Record[9]); Func->setUnnamedAddr(UnnamedAddr); if (Record.size() > 10 && Record[10] != 0) FunctionPrologues.push_back(std::make_pair(Func, Record[10] - 1)); if (Record.size() > 11) Func->setDLLStorageClass(getDecodedDLLStorageClass(Record[11])); else upgradeDLLImportExportLinkage(Func, RawLinkage); if (Record.size() > 12) { if (unsigned ComdatID = Record[12]) { if (ComdatID > ComdatList.size()) return error("Invalid function comdat ID"); Func->setComdat(ComdatList[ComdatID - 1]); } } else if (hasImplicitComdat(RawLinkage)) { Func->setComdat(reinterpret_cast(1)); } if (Record.size() > 13 && Record[13] != 0) FunctionPrefixes.push_back(std::make_pair(Func, Record[13] - 1)); if (Record.size() > 14 && Record[14] != 0) FunctionPersonalityFns.push_back(std::make_pair(Func, Record[14] - 1)); if (Record.size() > 15) { Func->setDSOLocal(getDecodedDSOLocal(Record[15])); } ValueList.push_back(Func); // If this is a function with a body, remember the prototype we are // creating now, so that we can match up the body with them later. if (!isProto) { Func->setIsMaterializable(true); FunctionsWithBodies.push_back(Func); DeferredFunctionInfo[Func] = 0; } return Error::success(); } Error BitcodeReader::parseGlobalIndirectSymbolRecord( unsigned BitCode, ArrayRef Record) { // v1 ALIAS_OLD: [alias type, aliasee val#, linkage] (name in VST) // v1 ALIAS: [alias type, addrspace, aliasee val#, linkage, visibility, // dllstorageclass, threadlocal, unnamed_addr, // preemption specifier] (name in VST) // v1 IFUNC: [alias type, addrspace, aliasee val#, linkage, // visibility, dllstorageclass, threadlocal, unnamed_addr, // preemption specifier] (name in VST) // v2: [strtab_offset, strtab_size, v1] StringRef Name; std::tie(Name, Record) = readNameFromStrtab(Record); bool NewRecord = BitCode != bitc::MODULE_CODE_ALIAS_OLD; if (Record.size() < (3 + (unsigned)NewRecord)) return error("Invalid record"); unsigned OpNum = 0; Type *Ty = getTypeByID(Record[OpNum++]); if (!Ty) return error("Invalid record"); unsigned AddrSpace; if (!NewRecord) { auto *PTy = dyn_cast(Ty); if (!PTy) return error("Invalid type for value"); Ty = PTy->getElementType(); AddrSpace = PTy->getAddressSpace(); } else { AddrSpace = Record[OpNum++]; } auto Val = Record[OpNum++]; auto Linkage = Record[OpNum++]; GlobalIndirectSymbol *NewGA; if (BitCode == bitc::MODULE_CODE_ALIAS || BitCode == bitc::MODULE_CODE_ALIAS_OLD) NewGA = GlobalAlias::create(Ty, AddrSpace, getDecodedLinkage(Linkage), Name, TheModule); else NewGA = GlobalIFunc::create(Ty, AddrSpace, getDecodedLinkage(Linkage), Name, nullptr, TheModule); // Old bitcode files didn't have visibility field. // Local linkage must have default visibility. if (OpNum != Record.size()) { auto VisInd = OpNum++; if (!NewGA->hasLocalLinkage()) // FIXME: Change to an error if non-default in 4.0. NewGA->setVisibility(getDecodedVisibility(Record[VisInd])); } if (OpNum != Record.size()) NewGA->setDLLStorageClass(getDecodedDLLStorageClass(Record[OpNum++])); else upgradeDLLImportExportLinkage(NewGA, Linkage); if (OpNum != Record.size()) NewGA->setThreadLocalMode(getDecodedThreadLocalMode(Record[OpNum++])); if (OpNum != Record.size()) NewGA->setUnnamedAddr(getDecodedUnnamedAddrType(Record[OpNum++])); if (OpNum != Record.size()) NewGA->setDSOLocal(getDecodedDSOLocal(Record[OpNum++])); ValueList.push_back(NewGA); IndirectSymbolInits.push_back(std::make_pair(NewGA, Val)); return Error::success(); } Error BitcodeReader::parseModule(uint64_t ResumeBit, bool ShouldLazyLoadMetadata) { if (ResumeBit) Stream.JumpToBit(ResumeBit); else if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID)) return error("Invalid record"); SmallVector Record; // Read all the records for this module. while (true) { BitstreamEntry Entry = Stream.advance(); switch (Entry.Kind) { case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: return globalCleanup(); case BitstreamEntry::SubBlock: switch (Entry.ID) { default: // Skip unknown content. if (Stream.SkipBlock()) return error("Invalid record"); break; case bitc::BLOCKINFO_BLOCK_ID: if (readBlockInfo()) return error("Malformed block"); break; case bitc::PARAMATTR_BLOCK_ID: if (Error Err = parseAttributeBlock()) return Err; break; case bitc::PARAMATTR_GROUP_BLOCK_ID: if (Error Err = parseAttributeGroupBlock()) return Err; break; case bitc::TYPE_BLOCK_ID_NEW: if (Error Err = parseTypeTable()) return Err; break; case bitc::VALUE_SYMTAB_BLOCK_ID: if (!SeenValueSymbolTable) { // Either this is an old form VST without function index and an // associated VST forward declaration record (which would have caused // the VST to be jumped to and parsed before it was encountered // normally in the stream), or there were no function blocks to // trigger an earlier parsing of the VST. assert(VSTOffset == 0 || FunctionsWithBodies.empty()); if (Error Err = parseValueSymbolTable()) return Err; SeenValueSymbolTable = true; } else { // We must have had a VST forward declaration record, which caused // the parser to jump to and parse the VST earlier. assert(VSTOffset > 0); if (Stream.SkipBlock()) return error("Invalid record"); } break; case bitc::CONSTANTS_BLOCK_ID: if (Error Err = parseConstants()) return Err; if (Error Err = resolveGlobalAndIndirectSymbolInits()) return Err; break; case bitc::METADATA_BLOCK_ID: if (ShouldLazyLoadMetadata) { if (Error Err = rememberAndSkipMetadata()) return Err; break; } assert(DeferredMetadataInfo.empty() && "Unexpected deferred metadata"); if (Error Err = MDLoader->parseModuleMetadata()) return Err; break; case bitc::METADATA_KIND_BLOCK_ID: if (Error Err = MDLoader->parseMetadataKinds()) return Err; break; case bitc::FUNCTION_BLOCK_ID: // If this is the first function body we've seen, reverse the // FunctionsWithBodies list. if (!SeenFirstFunctionBody) { std::reverse(FunctionsWithBodies.begin(), FunctionsWithBodies.end()); if (Error Err = globalCleanup()) return Err; SeenFirstFunctionBody = true; } if (VSTOffset > 0) { // If we have a VST forward declaration record, make sure we // parse the VST now if we haven't already. It is needed to // set up the DeferredFunctionInfo vector for lazy reading. if (!SeenValueSymbolTable) { if (Error Err = BitcodeReader::parseValueSymbolTable(VSTOffset)) return Err; SeenValueSymbolTable = true; // Fall through so that we record the NextUnreadBit below. // This is necessary in case we have an anonymous function that // is later materialized. Since it will not have a VST entry we // need to fall back to the lazy parse to find its offset. } else { // If we have a VST forward declaration record, but have already // parsed the VST (just above, when the first function body was // encountered here), then we are resuming the parse after // materializing functions. The ResumeBit points to the // start of the last function block recorded in the // DeferredFunctionInfo map. Skip it. if (Stream.SkipBlock()) return error("Invalid record"); continue; } } // Support older bitcode files that did not have the function // index in the VST, nor a VST forward declaration record, as // well as anonymous functions that do not have VST entries. // Build the DeferredFunctionInfo vector on the fly. if (Error Err = rememberAndSkipFunctionBody()) return Err; // Suspend parsing when we reach the function bodies. Subsequent // materialization calls will resume it when necessary. If the bitcode // file is old, the symbol table will be at the end instead and will not // have been seen yet. In this case, just finish the parse now. if (SeenValueSymbolTable) { NextUnreadBit = Stream.GetCurrentBitNo(); // After the VST has been parsed, we need to make sure intrinsic name // are auto-upgraded. return globalCleanup(); } break; case bitc::USELIST_BLOCK_ID: if (Error Err = parseUseLists()) return Err; break; case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID: if (Error Err = parseOperandBundleTags()) return Err; break; case bitc::SYNC_SCOPE_NAMES_BLOCK_ID: if (Error Err = parseSyncScopeNames()) return Err; break; } continue; case BitstreamEntry::Record: // The interesting case. break; } // Read a record. auto BitCode = Stream.readRecord(Entry.ID, Record); switch (BitCode) { default: break; // Default behavior, ignore unknown content. case bitc::MODULE_CODE_VERSION: { Expected VersionOrErr = parseVersionRecord(Record); if (!VersionOrErr) return VersionOrErr.takeError(); UseRelativeIDs = *VersionOrErr >= 1; break; } case bitc::MODULE_CODE_TRIPLE: { // TRIPLE: [strchr x N] std::string S; if (convertToString(Record, 0, S)) return error("Invalid record"); TheModule->setTargetTriple(S); break; } case bitc::MODULE_CODE_DATALAYOUT: { // DATALAYOUT: [strchr x N] std::string S; if (convertToString(Record, 0, S)) return error("Invalid record"); TheModule->setDataLayout(S); break; } case bitc::MODULE_CODE_ASM: { // ASM: [strchr x N] std::string S; if (convertToString(Record, 0, S)) return error("Invalid record"); TheModule->setModuleInlineAsm(S); break; } case bitc::MODULE_CODE_DEPLIB: { // DEPLIB: [strchr x N] // FIXME: Remove in 4.0. std::string S; if (convertToString(Record, 0, S)) return error("Invalid record"); // Ignore value. break; } case bitc::MODULE_CODE_SECTIONNAME: { // SECTIONNAME: [strchr x N] std::string S; if (convertToString(Record, 0, S)) return error("Invalid record"); SectionTable.push_back(S); break; } case bitc::MODULE_CODE_GCNAME: { // SECTIONNAME: [strchr x N] std::string S; if (convertToString(Record, 0, S)) return error("Invalid record"); GCTable.push_back(S); break; } case bitc::MODULE_CODE_COMDAT: if (Error Err = parseComdatRecord(Record)) return Err; break; case bitc::MODULE_CODE_GLOBALVAR: if (Error Err = parseGlobalVarRecord(Record)) return Err; break; case bitc::MODULE_CODE_FUNCTION: if (Error Err = parseFunctionRecord(Record)) return Err; break; case bitc::MODULE_CODE_IFUNC: case bitc::MODULE_CODE_ALIAS: case bitc::MODULE_CODE_ALIAS_OLD: if (Error Err = parseGlobalIndirectSymbolRecord(BitCode, Record)) return Err; break; /// MODULE_CODE_VSTOFFSET: [offset] case bitc::MODULE_CODE_VSTOFFSET: if (Record.size() < 1) return error("Invalid record"); // Note that we subtract 1 here because the offset is relative to one word // before the start of the identification or module block, which was // historically always the start of the regular bitcode header. VSTOffset = Record[0] - 1; break; /// MODULE_CODE_SOURCE_FILENAME: [namechar x N] case bitc::MODULE_CODE_SOURCE_FILENAME: SmallString<128> ValueName; if (convertToString(Record, 0, ValueName)) return error("Invalid record"); TheModule->setSourceFileName(ValueName); break; } Record.clear(); } } Error BitcodeReader::parseBitcodeInto(Module *M, bool ShouldLazyLoadMetadata, bool IsImporting) { TheModule = M; MDLoader = MetadataLoader(Stream, *M, ValueList, IsImporting, [&](unsigned ID) { return getTypeByID(ID); }); return parseModule(0, ShouldLazyLoadMetadata); } Error BitcodeReader::typeCheckLoadStoreInst(Type *ValType, Type *PtrType) { if (!isa(PtrType)) return error("Load/Store operand is not a pointer type"); Type *ElemType = cast(PtrType)->getElementType(); if (ValType && ValType != ElemType) return error("Explicit load/store type does not match pointee " "type of pointer operand"); if (!PointerType::isLoadableOrStorableType(ElemType)) return error("Cannot load/store from pointer"); return Error::success(); } /// Lazily parse the specified function body block. Error BitcodeReader::parseFunctionBody(Function *F) { if (Stream.EnterSubBlock(bitc::FUNCTION_BLOCK_ID)) return error("Invalid record"); // Unexpected unresolved metadata when parsing function. if (MDLoader->hasFwdRefs()) return error("Invalid function metadata: incoming forward references"); InstructionList.clear(); unsigned ModuleValueListSize = ValueList.size(); unsigned ModuleMDLoaderSize = MDLoader->size(); // Add all the function arguments to the value table. for (Argument &I : F->args()) ValueList.push_back(&I); unsigned NextValueNo = ValueList.size(); BasicBlock *CurBB = nullptr; unsigned CurBBNo = 0; DebugLoc LastLoc; auto getLastInstruction = [&]() -> Instruction * { if (CurBB && !CurBB->empty()) return &CurBB->back(); else if (CurBBNo && FunctionBBs[CurBBNo - 1] && !FunctionBBs[CurBBNo - 1]->empty()) return &FunctionBBs[CurBBNo - 1]->back(); return nullptr; }; std::vector OperandBundles; // Read all the records. SmallVector Record; while (true) { BitstreamEntry Entry = Stream.advance(); switch (Entry.Kind) { case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: goto OutOfRecordLoop; case BitstreamEntry::SubBlock: switch (Entry.ID) { default: // Skip unknown content. if (Stream.SkipBlock()) return error("Invalid record"); break; case bitc::CONSTANTS_BLOCK_ID: if (Error Err = parseConstants()) return Err; NextValueNo = ValueList.size(); break; case bitc::VALUE_SYMTAB_BLOCK_ID: if (Error Err = parseValueSymbolTable()) return Err; break; case bitc::METADATA_ATTACHMENT_ID: if (Error Err = MDLoader->parseMetadataAttachment(*F, InstructionList)) return Err; break; case bitc::METADATA_BLOCK_ID: assert(DeferredMetadataInfo.empty() && "Must read all module-level metadata before function-level"); if (Error Err = MDLoader->parseFunctionMetadata()) return Err; break; case bitc::USELIST_BLOCK_ID: if (Error Err = parseUseLists()) return Err; break; } continue; case BitstreamEntry::Record: // The interesting case. break; } // Read a record. Record.clear(); Instruction *I = nullptr; unsigned BitCode = Stream.readRecord(Entry.ID, Record); switch (BitCode) { default: // Default behavior: reject return error("Invalid value"); case bitc::FUNC_CODE_DECLAREBLOCKS: { // DECLAREBLOCKS: [nblocks] if (Record.size() < 1 || Record[0] == 0) return error("Invalid record"); // Create all the basic blocks for the function. FunctionBBs.resize(Record[0]); // See if anything took the address of blocks in this function. auto BBFRI = BasicBlockFwdRefs.find(F); if (BBFRI == BasicBlockFwdRefs.end()) { for (unsigned i = 0, e = FunctionBBs.size(); i != e; ++i) FunctionBBs[i] = BasicBlock::Create(Context, "", F); } else { auto &BBRefs = BBFRI->second; // Check for invalid basic block references. if (BBRefs.size() > FunctionBBs.size()) return error("Invalid ID"); assert(!BBRefs.empty() && "Unexpected empty array"); assert(!BBRefs.front() && "Invalid reference to entry block"); for (unsigned I = 0, E = FunctionBBs.size(), RE = BBRefs.size(); I != E; ++I) if (I < RE && BBRefs[I]) { BBRefs[I]->insertInto(F); FunctionBBs[I] = BBRefs[I]; } else { FunctionBBs[I] = BasicBlock::Create(Context, "", F); } // Erase from the table. BasicBlockFwdRefs.erase(BBFRI); } CurBB = FunctionBBs[0]; continue; } case bitc::FUNC_CODE_DEBUG_LOC_AGAIN: // DEBUG_LOC_AGAIN // This record indicates that the last instruction is at the same // location as the previous instruction with a location. I = getLastInstruction(); if (!I) return error("Invalid record"); I->setDebugLoc(LastLoc); I = nullptr; continue; case bitc::FUNC_CODE_DEBUG_LOC: { // DEBUG_LOC: [line, col, scope, ia] I = getLastInstruction(); if (!I || Record.size() < 4) return error("Invalid record"); unsigned Line = Record[0], Col = Record[1]; unsigned ScopeID = Record[2], IAID = Record[3]; MDNode *Scope = nullptr, *IA = nullptr; if (ScopeID) { Scope = MDLoader->getMDNodeFwdRefOrNull(ScopeID - 1); if (!Scope) return error("Invalid record"); } if (IAID) { IA = MDLoader->getMDNodeFwdRefOrNull(IAID - 1); if (!IA) return error("Invalid record"); } LastLoc = DebugLoc::get(Line, Col, Scope, IA); I->setDebugLoc(LastLoc); I = nullptr; continue; } case bitc::FUNC_CODE_INST_BINOP: { // BINOP: [opval, ty, opval, opcode] unsigned OpNum = 0; Value *LHS, *RHS; if (getValueTypePair(Record, OpNum, NextValueNo, LHS) || popValue(Record, OpNum, NextValueNo, LHS->getType(), RHS) || OpNum+1 > Record.size()) return error("Invalid record"); int Opc = getDecodedBinaryOpcode(Record[OpNum++], LHS->getType()); if (Opc == -1) return error("Invalid record"); I = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS); InstructionList.push_back(I); if (OpNum < Record.size()) { if (Opc == Instruction::Add || Opc == Instruction::Sub || Opc == Instruction::Mul || Opc == Instruction::Shl) { if (Record[OpNum] & (1 << bitc::OBO_NO_SIGNED_WRAP)) cast(I)->setHasNoSignedWrap(true); if (Record[OpNum] & (1 << bitc::OBO_NO_UNSIGNED_WRAP)) cast(I)->setHasNoUnsignedWrap(true); } else if (Opc == Instruction::SDiv || Opc == Instruction::UDiv || Opc == Instruction::LShr || Opc == Instruction::AShr) { if (Record[OpNum] & (1 << bitc::PEO_EXACT)) cast(I)->setIsExact(true); } else if (isa(I)) { FastMathFlags FMF = getDecodedFastMathFlags(Record[OpNum]); if (FMF.any()) I->setFastMathFlags(FMF); } } break; } case bitc::FUNC_CODE_INST_CAST: { // CAST: [opval, opty, destty, castopc] unsigned OpNum = 0; Value *Op; if (getValueTypePair(Record, OpNum, NextValueNo, Op) || OpNum+2 != Record.size()) return error("Invalid record"); Type *ResTy = getTypeByID(Record[OpNum]); int Opc = getDecodedCastOpcode(Record[OpNum + 1]); if (Opc == -1 || !ResTy) return error("Invalid record"); Instruction *Temp = nullptr; if ((I = UpgradeBitCastInst(Opc, Op, ResTy, Temp))) { if (Temp) { InstructionList.push_back(Temp); CurBB->getInstList().push_back(Temp); } } else { auto CastOp = (Instruction::CastOps)Opc; if (!CastInst::castIsValid(CastOp, Op, ResTy)) return error("Invalid cast"); I = CastInst::Create(CastOp, Op, ResTy); } InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_INBOUNDS_GEP_OLD: case bitc::FUNC_CODE_INST_GEP_OLD: case bitc::FUNC_CODE_INST_GEP: { // GEP: type, [n x operands] unsigned OpNum = 0; Type *Ty; bool InBounds; if (BitCode == bitc::FUNC_CODE_INST_GEP) { InBounds = Record[OpNum++]; Ty = getTypeByID(Record[OpNum++]); } else { InBounds = BitCode == bitc::FUNC_CODE_INST_INBOUNDS_GEP_OLD; Ty = nullptr; } Value *BasePtr; if (getValueTypePair(Record, OpNum, NextValueNo, BasePtr)) return error("Invalid record"); if (!Ty) Ty = cast(BasePtr->getType()->getScalarType()) ->getElementType(); else if (Ty != cast(BasePtr->getType()->getScalarType()) ->getElementType()) return error( "Explicit gep type does not match pointee type of pointer operand"); SmallVector GEPIdx; while (OpNum != Record.size()) { Value *Op; if (getValueTypePair(Record, OpNum, NextValueNo, Op)) return error("Invalid record"); GEPIdx.push_back(Op); } I = GetElementPtrInst::Create(Ty, BasePtr, GEPIdx); InstructionList.push_back(I); if (InBounds) cast(I)->setIsInBounds(true); break; } case bitc::FUNC_CODE_INST_EXTRACTVAL: { // EXTRACTVAL: [opty, opval, n x indices] unsigned OpNum = 0; Value *Agg; if (getValueTypePair(Record, OpNum, NextValueNo, Agg)) return error("Invalid record"); unsigned RecSize = Record.size(); if (OpNum == RecSize) return error("EXTRACTVAL: Invalid instruction with 0 indices"); SmallVector EXTRACTVALIdx; Type *CurTy = Agg->getType(); for (; OpNum != RecSize; ++OpNum) { bool IsArray = CurTy->isArrayTy(); bool IsStruct = CurTy->isStructTy(); uint64_t Index = Record[OpNum]; if (!IsStruct && !IsArray) return error("EXTRACTVAL: Invalid type"); if ((unsigned)Index != Index) return error("Invalid value"); if (IsStruct && Index >= CurTy->subtypes().size()) return error("EXTRACTVAL: Invalid struct index"); if (IsArray && Index >= CurTy->getArrayNumElements()) return error("EXTRACTVAL: Invalid array index"); EXTRACTVALIdx.push_back((unsigned)Index); if (IsStruct) CurTy = CurTy->subtypes()[Index]; else CurTy = CurTy->subtypes()[0]; } I = ExtractValueInst::Create(Agg, EXTRACTVALIdx); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_INSERTVAL: { // INSERTVAL: [opty, opval, opty, opval, n x indices] unsigned OpNum = 0; Value *Agg; if (getValueTypePair(Record, OpNum, NextValueNo, Agg)) return error("Invalid record"); Value *Val; if (getValueTypePair(Record, OpNum, NextValueNo, Val)) return error("Invalid record"); unsigned RecSize = Record.size(); if (OpNum == RecSize) return error("INSERTVAL: Invalid instruction with 0 indices"); SmallVector INSERTVALIdx; Type *CurTy = Agg->getType(); for (; OpNum != RecSize; ++OpNum) { bool IsArray = CurTy->isArrayTy(); bool IsStruct = CurTy->isStructTy(); uint64_t Index = Record[OpNum]; if (!IsStruct && !IsArray) return error("INSERTVAL: Invalid type"); if ((unsigned)Index != Index) return error("Invalid value"); if (IsStruct && Index >= CurTy->subtypes().size()) return error("INSERTVAL: Invalid struct index"); if (IsArray && Index >= CurTy->getArrayNumElements()) return error("INSERTVAL: Invalid array index"); INSERTVALIdx.push_back((unsigned)Index); if (IsStruct) CurTy = CurTy->subtypes()[Index]; else CurTy = CurTy->subtypes()[0]; } if (CurTy != Val->getType()) return error("Inserted value type doesn't match aggregate type"); I = InsertValueInst::Create(Agg, Val, INSERTVALIdx); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_SELECT: { // SELECT: [opval, ty, opval, opval] // obsolete form of select // handles select i1 ... in old bitcode unsigned OpNum = 0; Value *TrueVal, *FalseVal, *Cond; if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) || popValue(Record, OpNum, NextValueNo, TrueVal->getType(), FalseVal) || popValue(Record, OpNum, NextValueNo, Type::getInt1Ty(Context), Cond)) return error("Invalid record"); I = SelectInst::Create(Cond, TrueVal, FalseVal); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_VSELECT: {// VSELECT: [ty,opval,opval,predty,pred] // new form of select // handles select i1 or select [N x i1] unsigned OpNum = 0; Value *TrueVal, *FalseVal, *Cond; if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) || popValue(Record, OpNum, NextValueNo, TrueVal->getType(), FalseVal) || getValueTypePair(Record, OpNum, NextValueNo, Cond)) return error("Invalid record"); // select condition can be either i1 or [N x i1] if (VectorType* vector_type = dyn_cast(Cond->getType())) { // expect if (vector_type->getElementType() != Type::getInt1Ty(Context)) return error("Invalid type for value"); } else { // expect i1 if (Cond->getType() != Type::getInt1Ty(Context)) return error("Invalid type for value"); } I = SelectInst::Create(Cond, TrueVal, FalseVal); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_EXTRACTELT: { // EXTRACTELT: [opty, opval, opval] unsigned OpNum = 0; Value *Vec, *Idx; if (getValueTypePair(Record, OpNum, NextValueNo, Vec) || getValueTypePair(Record, OpNum, NextValueNo, Idx)) return error("Invalid record"); if (!Vec->getType()->isVectorTy()) return error("Invalid type for value"); I = ExtractElementInst::Create(Vec, Idx); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_INSERTELT: { // INSERTELT: [ty, opval,opval,opval] unsigned OpNum = 0; Value *Vec, *Elt, *Idx; if (getValueTypePair(Record, OpNum, NextValueNo, Vec)) return error("Invalid record"); if (!Vec->getType()->isVectorTy()) return error("Invalid type for value"); if (popValue(Record, OpNum, NextValueNo, cast(Vec->getType())->getElementType(), Elt) || getValueTypePair(Record, OpNum, NextValueNo, Idx)) return error("Invalid record"); I = InsertElementInst::Create(Vec, Elt, Idx); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_SHUFFLEVEC: {// SHUFFLEVEC: [opval,ty,opval,opval] unsigned OpNum = 0; Value *Vec1, *Vec2, *Mask; if (getValueTypePair(Record, OpNum, NextValueNo, Vec1) || popValue(Record, OpNum, NextValueNo, Vec1->getType(), Vec2)) return error("Invalid record"); if (getValueTypePair(Record, OpNum, NextValueNo, Mask)) return error("Invalid record"); if (!Vec1->getType()->isVectorTy() || !Vec2->getType()->isVectorTy()) return error("Invalid type for value"); I = new ShuffleVectorInst(Vec1, Vec2, Mask); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_CMP: // CMP: [opty, opval, opval, pred] // Old form of ICmp/FCmp returning bool // Existed to differentiate between icmp/fcmp and vicmp/vfcmp which were // both legal on vectors but had different behaviour. case bitc::FUNC_CODE_INST_CMP2: { // CMP2: [opty, opval, opval, pred] // FCmp/ICmp returning bool or vector of bool unsigned OpNum = 0; Value *LHS, *RHS; if (getValueTypePair(Record, OpNum, NextValueNo, LHS) || popValue(Record, OpNum, NextValueNo, LHS->getType(), RHS)) return error("Invalid record"); unsigned PredVal = Record[OpNum]; bool IsFP = LHS->getType()->isFPOrFPVectorTy(); FastMathFlags FMF; if (IsFP && Record.size() > OpNum+1) FMF = getDecodedFastMathFlags(Record[++OpNum]); if (OpNum+1 != Record.size()) return error("Invalid record"); if (LHS->getType()->isFPOrFPVectorTy()) I = new FCmpInst((FCmpInst::Predicate)PredVal, LHS, RHS); else I = new ICmpInst((ICmpInst::Predicate)PredVal, LHS, RHS); if (FMF.any()) I->setFastMathFlags(FMF); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_RET: // RET: [opty,opval] { unsigned Size = Record.size(); if (Size == 0) { I = ReturnInst::Create(Context); InstructionList.push_back(I); break; } unsigned OpNum = 0; Value *Op = nullptr; if (getValueTypePair(Record, OpNum, NextValueNo, Op)) return error("Invalid record"); if (OpNum != Record.size()) return error("Invalid record"); I = ReturnInst::Create(Context, Op); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_BR: { // BR: [bb#, bb#, opval] or [bb#] if (Record.size() != 1 && Record.size() != 3) return error("Invalid record"); BasicBlock *TrueDest = getBasicBlock(Record[0]); if (!TrueDest) return error("Invalid record"); if (Record.size() == 1) { I = BranchInst::Create(TrueDest); InstructionList.push_back(I); } else { BasicBlock *FalseDest = getBasicBlock(Record[1]); Value *Cond = getValue(Record, 2, NextValueNo, Type::getInt1Ty(Context)); if (!FalseDest || !Cond) return error("Invalid record"); I = BranchInst::Create(TrueDest, FalseDest, Cond); InstructionList.push_back(I); } break; } case bitc::FUNC_CODE_INST_CLEANUPRET: { // CLEANUPRET: [val] or [val,bb#] if (Record.size() != 1 && Record.size() != 2) return error("Invalid record"); unsigned Idx = 0; Value *CleanupPad = getValue(Record, Idx++, NextValueNo, Type::getTokenTy(Context)); if (!CleanupPad) return error("Invalid record"); BasicBlock *UnwindDest = nullptr; if (Record.size() == 2) { UnwindDest = getBasicBlock(Record[Idx++]); if (!UnwindDest) return error("Invalid record"); } I = CleanupReturnInst::Create(CleanupPad, UnwindDest); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_CATCHRET: { // CATCHRET: [val,bb#] if (Record.size() != 2) return error("Invalid record"); unsigned Idx = 0; Value *CatchPad = getValue(Record, Idx++, NextValueNo, Type::getTokenTy(Context)); if (!CatchPad) return error("Invalid record"); BasicBlock *BB = getBasicBlock(Record[Idx++]); if (!BB) return error("Invalid record"); I = CatchReturnInst::Create(CatchPad, BB); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_CATCHSWITCH: { // CATCHSWITCH: [tok,num,(bb)*,bb?] // We must have, at minimum, the outer scope and the number of arguments. if (Record.size() < 2) return error("Invalid record"); unsigned Idx = 0; Value *ParentPad = getValue(Record, Idx++, NextValueNo, Type::getTokenTy(Context)); unsigned NumHandlers = Record[Idx++]; SmallVector Handlers; for (unsigned Op = 0; Op != NumHandlers; ++Op) { BasicBlock *BB = getBasicBlock(Record[Idx++]); if (!BB) return error("Invalid record"); Handlers.push_back(BB); } BasicBlock *UnwindDest = nullptr; if (Idx + 1 == Record.size()) { UnwindDest = getBasicBlock(Record[Idx++]); if (!UnwindDest) return error("Invalid record"); } if (Record.size() != Idx) return error("Invalid record"); auto *CatchSwitch = CatchSwitchInst::Create(ParentPad, UnwindDest, NumHandlers); for (BasicBlock *Handler : Handlers) CatchSwitch->addHandler(Handler); I = CatchSwitch; InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_CATCHPAD: case bitc::FUNC_CODE_INST_CLEANUPPAD: { // [tok,num,(ty,val)*] // We must have, at minimum, the outer scope and the number of arguments. if (Record.size() < 2) return error("Invalid record"); unsigned Idx = 0; Value *ParentPad = getValue(Record, Idx++, NextValueNo, Type::getTokenTy(Context)); unsigned NumArgOperands = Record[Idx++]; SmallVector Args; for (unsigned Op = 0; Op != NumArgOperands; ++Op) { Value *Val; if (getValueTypePair(Record, Idx, NextValueNo, Val)) return error("Invalid record"); Args.push_back(Val); } if (Record.size() != Idx) return error("Invalid record"); if (BitCode == bitc::FUNC_CODE_INST_CLEANUPPAD) I = CleanupPadInst::Create(ParentPad, Args); else I = CatchPadInst::Create(ParentPad, Args); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_SWITCH: { // SWITCH: [opty, op0, op1, ...] // Check magic if ((Record[0] >> 16) == SWITCH_INST_MAGIC) { // "New" SwitchInst format with case ranges. The changes to write this // format were reverted but we still recognize bitcode that uses it. // Hopefully someday we will have support for case ranges and can use // this format again. Type *OpTy = getTypeByID(Record[1]); unsigned ValueBitWidth = cast(OpTy)->getBitWidth(); Value *Cond = getValue(Record, 2, NextValueNo, OpTy); BasicBlock *Default = getBasicBlock(Record[3]); if (!OpTy || !Cond || !Default) return error("Invalid record"); unsigned NumCases = Record[4]; SwitchInst *SI = SwitchInst::Create(Cond, Default, NumCases); InstructionList.push_back(SI); unsigned CurIdx = 5; for (unsigned i = 0; i != NumCases; ++i) { SmallVector CaseVals; unsigned NumItems = Record[CurIdx++]; for (unsigned ci = 0; ci != NumItems; ++ci) { bool isSingleNumber = Record[CurIdx++]; APInt Low; unsigned ActiveWords = 1; if (ValueBitWidth > 64) ActiveWords = Record[CurIdx++]; Low = readWideAPInt(makeArrayRef(&Record[CurIdx], ActiveWords), ValueBitWidth); CurIdx += ActiveWords; if (!isSingleNumber) { ActiveWords = 1; if (ValueBitWidth > 64) ActiveWords = Record[CurIdx++]; APInt High = readWideAPInt( makeArrayRef(&Record[CurIdx], ActiveWords), ValueBitWidth); CurIdx += ActiveWords; // FIXME: It is not clear whether values in the range should be // compared as signed or unsigned values. The partially // implemented changes that used this format in the past used // unsigned comparisons. for ( ; Low.ule(High); ++Low) CaseVals.push_back(ConstantInt::get(Context, Low)); } else CaseVals.push_back(ConstantInt::get(Context, Low)); } BasicBlock *DestBB = getBasicBlock(Record[CurIdx++]); for (SmallVector::iterator cvi = CaseVals.begin(), cve = CaseVals.end(); cvi != cve; ++cvi) SI->addCase(*cvi, DestBB); } I = SI; break; } // Old SwitchInst format without case ranges. if (Record.size() < 3 || (Record.size() & 1) == 0) return error("Invalid record"); Type *OpTy = getTypeByID(Record[0]); Value *Cond = getValue(Record, 1, NextValueNo, OpTy); BasicBlock *Default = getBasicBlock(Record[2]); if (!OpTy || !Cond || !Default) return error("Invalid record"); unsigned NumCases = (Record.size()-3)/2; SwitchInst *SI = SwitchInst::Create(Cond, Default, NumCases); InstructionList.push_back(SI); for (unsigned i = 0, e = NumCases; i != e; ++i) { ConstantInt *CaseVal = dyn_cast_or_null(getFnValueByID(Record[3+i*2], OpTy)); BasicBlock *DestBB = getBasicBlock(Record[1+3+i*2]); if (!CaseVal || !DestBB) { delete SI; return error("Invalid record"); } SI->addCase(CaseVal, DestBB); } I = SI; break; } case bitc::FUNC_CODE_INST_INDIRECTBR: { // INDIRECTBR: [opty, op0, op1, ...] if (Record.size() < 2) return error("Invalid record"); Type *OpTy = getTypeByID(Record[0]); Value *Address = getValue(Record, 1, NextValueNo, OpTy); if (!OpTy || !Address) return error("Invalid record"); unsigned NumDests = Record.size()-2; IndirectBrInst *IBI = IndirectBrInst::Create(Address, NumDests); InstructionList.push_back(IBI); for (unsigned i = 0, e = NumDests; i != e; ++i) { if (BasicBlock *DestBB = getBasicBlock(Record[2+i])) { IBI->addDestination(DestBB); } else { delete IBI; return error("Invalid record"); } } I = IBI; break; } case bitc::FUNC_CODE_INST_INVOKE: { // INVOKE: [attrs, cc, normBB, unwindBB, fnty, op0,op1,op2, ...] if (Record.size() < 4) return error("Invalid record"); unsigned OpNum = 0; AttributeList PAL = getAttributes(Record[OpNum++]); unsigned CCInfo = Record[OpNum++]; BasicBlock *NormalBB = getBasicBlock(Record[OpNum++]); BasicBlock *UnwindBB = getBasicBlock(Record[OpNum++]); FunctionType *FTy = nullptr; if (CCInfo >> 13 & 1 && !(FTy = dyn_cast(getTypeByID(Record[OpNum++])))) return error("Explicit invoke type is not a function type"); Value *Callee; if (getValueTypePair(Record, OpNum, NextValueNo, Callee)) return error("Invalid record"); PointerType *CalleeTy = dyn_cast(Callee->getType()); if (!CalleeTy) return error("Callee is not a pointer"); if (!FTy) { FTy = dyn_cast(CalleeTy->getElementType()); if (!FTy) return error("Callee is not of pointer to function type"); } else if (CalleeTy->getElementType() != FTy) return error("Explicit invoke type does not match pointee type of " "callee operand"); if (Record.size() < FTy->getNumParams() + OpNum) return error("Insufficient operands to call"); SmallVector Ops; for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) { Ops.push_back(getValue(Record, OpNum, NextValueNo, FTy->getParamType(i))); if (!Ops.back()) return error("Invalid record"); } if (!FTy->isVarArg()) { if (Record.size() != OpNum) return error("Invalid record"); } else { // Read type/value pairs for varargs params. while (OpNum != Record.size()) { Value *Op; if (getValueTypePair(Record, OpNum, NextValueNo, Op)) return error("Invalid record"); Ops.push_back(Op); } } I = InvokeInst::Create(Callee, NormalBB, UnwindBB, Ops, OperandBundles); OperandBundles.clear(); InstructionList.push_back(I); cast(I)->setCallingConv( static_cast(CallingConv::MaxID & CCInfo)); cast(I)->setAttributes(PAL); break; } case bitc::FUNC_CODE_INST_RESUME: { // RESUME: [opval] unsigned Idx = 0; Value *Val = nullptr; if (getValueTypePair(Record, Idx, NextValueNo, Val)) return error("Invalid record"); I = ResumeInst::Create(Val); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_UNREACHABLE: // UNREACHABLE I = new UnreachableInst(Context); InstructionList.push_back(I); break; case bitc::FUNC_CODE_INST_PHI: { // PHI: [ty, val0,bb0, ...] if (Record.size() < 1 || ((Record.size()-1)&1)) return error("Invalid record"); Type *Ty = getTypeByID(Record[0]); if (!Ty) return error("Invalid record"); PHINode *PN = PHINode::Create(Ty, (Record.size()-1)/2); InstructionList.push_back(PN); for (unsigned i = 0, e = Record.size()-1; i != e; i += 2) { Value *V; // With the new function encoding, it is possible that operands have // negative IDs (for forward references). Use a signed VBR // representation to keep the encoding small. if (UseRelativeIDs) V = getValueSigned(Record, 1+i, NextValueNo, Ty); else V = getValue(Record, 1+i, NextValueNo, Ty); BasicBlock *BB = getBasicBlock(Record[2+i]); if (!V || !BB) return error("Invalid record"); PN->addIncoming(V, BB); } I = PN; break; } case bitc::FUNC_CODE_INST_LANDINGPAD: case bitc::FUNC_CODE_INST_LANDINGPAD_OLD: { // LANDINGPAD: [ty, val, val, num, (id0,val0 ...)?] unsigned Idx = 0; if (BitCode == bitc::FUNC_CODE_INST_LANDINGPAD) { if (Record.size() < 3) return error("Invalid record"); } else { assert(BitCode == bitc::FUNC_CODE_INST_LANDINGPAD_OLD); if (Record.size() < 4) return error("Invalid record"); } Type *Ty = getTypeByID(Record[Idx++]); if (!Ty) return error("Invalid record"); if (BitCode == bitc::FUNC_CODE_INST_LANDINGPAD_OLD) { Value *PersFn = nullptr; if (getValueTypePair(Record, Idx, NextValueNo, PersFn)) return error("Invalid record"); if (!F->hasPersonalityFn()) F->setPersonalityFn(cast(PersFn)); else if (F->getPersonalityFn() != cast(PersFn)) return error("Personality function mismatch"); } bool IsCleanup = !!Record[Idx++]; unsigned NumClauses = Record[Idx++]; LandingPadInst *LP = LandingPadInst::Create(Ty, NumClauses); LP->setCleanup(IsCleanup); for (unsigned J = 0; J != NumClauses; ++J) { LandingPadInst::ClauseType CT = LandingPadInst::ClauseType(Record[Idx++]); (void)CT; Value *Val; if (getValueTypePair(Record, Idx, NextValueNo, Val)) { delete LP; return error("Invalid record"); } assert((CT != LandingPadInst::Catch || !isa(Val->getType())) && "Catch clause has a invalid type!"); assert((CT != LandingPadInst::Filter || isa(Val->getType())) && "Filter clause has invalid type!"); LP->addClause(cast(Val)); } I = LP; InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_ALLOCA: { // ALLOCA: [instty, opty, op, align] if (Record.size() != 4) return error("Invalid record"); uint64_t AlignRecord = Record[3]; const uint64_t InAllocaMask = uint64_t(1) << 5; const uint64_t ExplicitTypeMask = uint64_t(1) << 6; const uint64_t SwiftErrorMask = uint64_t(1) << 7; const uint64_t FlagMask = InAllocaMask | ExplicitTypeMask | SwiftErrorMask; bool InAlloca = AlignRecord & InAllocaMask; bool SwiftError = AlignRecord & SwiftErrorMask; Type *Ty = getTypeByID(Record[0]); if ((AlignRecord & ExplicitTypeMask) == 0) { auto *PTy = dyn_cast_or_null(Ty); if (!PTy) return error("Old-style alloca with a non-pointer type"); Ty = PTy->getElementType(); } Type *OpTy = getTypeByID(Record[1]); Value *Size = getFnValueByID(Record[2], OpTy); unsigned Align; if (Error Err = parseAlignmentValue(AlignRecord & ~FlagMask, Align)) { return Err; } if (!Ty || !Size) return error("Invalid record"); // FIXME: Make this an optional field. const DataLayout &DL = TheModule->getDataLayout(); unsigned AS = DL.getAllocaAddrSpace(); AllocaInst *AI = new AllocaInst(Ty, AS, Size, Align); AI->setUsedWithInAlloca(InAlloca); AI->setSwiftError(SwiftError); I = AI; InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_LOAD: { // LOAD: [opty, op, align, vol] unsigned OpNum = 0; Value *Op; if (getValueTypePair(Record, OpNum, NextValueNo, Op) || (OpNum + 2 != Record.size() && OpNum + 3 != Record.size())) return error("Invalid record"); Type *Ty = nullptr; if (OpNum + 3 == Record.size()) Ty = getTypeByID(Record[OpNum++]); if (Error Err = typeCheckLoadStoreInst(Ty, Op->getType())) return Err; if (!Ty) Ty = cast(Op->getType())->getElementType(); unsigned Align; if (Error Err = parseAlignmentValue(Record[OpNum], Align)) return Err; I = new LoadInst(Ty, Op, "", Record[OpNum + 1], Align); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_LOADATOMIC: { // LOADATOMIC: [opty, op, align, vol, ordering, ssid] unsigned OpNum = 0; Value *Op; if (getValueTypePair(Record, OpNum, NextValueNo, Op) || (OpNum + 4 != Record.size() && OpNum + 5 != Record.size())) return error("Invalid record"); Type *Ty = nullptr; if (OpNum + 5 == Record.size()) Ty = getTypeByID(Record[OpNum++]); if (Error Err = typeCheckLoadStoreInst(Ty, Op->getType())) return Err; if (!Ty) Ty = cast(Op->getType())->getElementType(); AtomicOrdering Ordering = getDecodedOrdering(Record[OpNum + 2]); if (Ordering == AtomicOrdering::NotAtomic || Ordering == AtomicOrdering::Release || Ordering == AtomicOrdering::AcquireRelease) return error("Invalid record"); if (Ordering != AtomicOrdering::NotAtomic && Record[OpNum] == 0) return error("Invalid record"); SyncScope::ID SSID = getDecodedSyncScopeID(Record[OpNum + 3]); unsigned Align; if (Error Err = parseAlignmentValue(Record[OpNum], Align)) return Err; I = new LoadInst(Op, "", Record[OpNum+1], Align, Ordering, SSID); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_STORE: case bitc::FUNC_CODE_INST_STORE_OLD: { // STORE2:[ptrty, ptr, val, align, vol] unsigned OpNum = 0; Value *Val, *Ptr; if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) || (BitCode == bitc::FUNC_CODE_INST_STORE ? getValueTypePair(Record, OpNum, NextValueNo, Val) : popValue(Record, OpNum, NextValueNo, cast(Ptr->getType())->getElementType(), Val)) || OpNum + 2 != Record.size()) return error("Invalid record"); if (Error Err = typeCheckLoadStoreInst(Val->getType(), Ptr->getType())) return Err; unsigned Align; if (Error Err = parseAlignmentValue(Record[OpNum], Align)) return Err; I = new StoreInst(Val, Ptr, Record[OpNum+1], Align); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_STOREATOMIC: case bitc::FUNC_CODE_INST_STOREATOMIC_OLD: { // STOREATOMIC: [ptrty, ptr, val, align, vol, ordering, ssid] unsigned OpNum = 0; Value *Val, *Ptr; if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) || !isa(Ptr->getType()) || (BitCode == bitc::FUNC_CODE_INST_STOREATOMIC ? getValueTypePair(Record, OpNum, NextValueNo, Val) : popValue(Record, OpNum, NextValueNo, cast(Ptr->getType())->getElementType(), Val)) || OpNum + 4 != Record.size()) return error("Invalid record"); if (Error Err = typeCheckLoadStoreInst(Val->getType(), Ptr->getType())) return Err; AtomicOrdering Ordering = getDecodedOrdering(Record[OpNum + 2]); if (Ordering == AtomicOrdering::NotAtomic || Ordering == AtomicOrdering::Acquire || Ordering == AtomicOrdering::AcquireRelease) return error("Invalid record"); SyncScope::ID SSID = getDecodedSyncScopeID(Record[OpNum + 3]); if (Ordering != AtomicOrdering::NotAtomic && Record[OpNum] == 0) return error("Invalid record"); unsigned Align; if (Error Err = parseAlignmentValue(Record[OpNum], Align)) return Err; I = new StoreInst(Val, Ptr, Record[OpNum+1], Align, Ordering, SSID); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_CMPXCHG_OLD: case bitc::FUNC_CODE_INST_CMPXCHG: { // CMPXCHG:[ptrty, ptr, cmp, new, vol, successordering, ssid, // failureordering?, isweak?] unsigned OpNum = 0; Value *Ptr, *Cmp, *New; if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) || (BitCode == bitc::FUNC_CODE_INST_CMPXCHG ? getValueTypePair(Record, OpNum, NextValueNo, Cmp) : popValue(Record, OpNum, NextValueNo, cast(Ptr->getType())->getElementType(), Cmp)) || popValue(Record, OpNum, NextValueNo, Cmp->getType(), New) || Record.size() < OpNum + 3 || Record.size() > OpNum + 5) return error("Invalid record"); AtomicOrdering SuccessOrdering = getDecodedOrdering(Record[OpNum + 1]); if (SuccessOrdering == AtomicOrdering::NotAtomic || SuccessOrdering == AtomicOrdering::Unordered) return error("Invalid record"); SyncScope::ID SSID = getDecodedSyncScopeID(Record[OpNum + 2]); if (Error Err = typeCheckLoadStoreInst(Cmp->getType(), Ptr->getType())) return Err; AtomicOrdering FailureOrdering; if (Record.size() < 7) FailureOrdering = AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrdering); else FailureOrdering = getDecodedOrdering(Record[OpNum + 3]); I = new AtomicCmpXchgInst(Ptr, Cmp, New, SuccessOrdering, FailureOrdering, SSID); cast(I)->setVolatile(Record[OpNum]); if (Record.size() < 8) { // Before weak cmpxchgs existed, the instruction simply returned the // value loaded from memory, so bitcode files from that era will be // expecting the first component of a modern cmpxchg. CurBB->getInstList().push_back(I); I = ExtractValueInst::Create(I, 0); } else { cast(I)->setWeak(Record[OpNum+4]); } InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_ATOMICRMW: { // ATOMICRMW:[ptrty, ptr, val, op, vol, ordering, ssid] unsigned OpNum = 0; Value *Ptr, *Val; if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) || !isa(Ptr->getType()) || popValue(Record, OpNum, NextValueNo, cast(Ptr->getType())->getElementType(), Val) || OpNum+4 != Record.size()) return error("Invalid record"); AtomicRMWInst::BinOp Operation = getDecodedRMWOperation(Record[OpNum]); if (Operation < AtomicRMWInst::FIRST_BINOP || Operation > AtomicRMWInst::LAST_BINOP) return error("Invalid record"); AtomicOrdering Ordering = getDecodedOrdering(Record[OpNum + 2]); if (Ordering == AtomicOrdering::NotAtomic || Ordering == AtomicOrdering::Unordered) return error("Invalid record"); SyncScope::ID SSID = getDecodedSyncScopeID(Record[OpNum + 3]); I = new AtomicRMWInst(Operation, Ptr, Val, Ordering, SSID); cast(I)->setVolatile(Record[OpNum+1]); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_FENCE: { // FENCE:[ordering, ssid] if (2 != Record.size()) return error("Invalid record"); AtomicOrdering Ordering = getDecodedOrdering(Record[0]); if (Ordering == AtomicOrdering::NotAtomic || Ordering == AtomicOrdering::Unordered || Ordering == AtomicOrdering::Monotonic) return error("Invalid record"); SyncScope::ID SSID = getDecodedSyncScopeID(Record[1]); I = new FenceInst(Context, Ordering, SSID); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_CALL: { // CALL: [paramattrs, cc, fmf, fnty, fnid, arg0, arg1...] if (Record.size() < 3) return error("Invalid record"); unsigned OpNum = 0; AttributeList PAL = getAttributes(Record[OpNum++]); unsigned CCInfo = Record[OpNum++]; FastMathFlags FMF; if ((CCInfo >> bitc::CALL_FMF) & 1) { FMF = getDecodedFastMathFlags(Record[OpNum++]); if (!FMF.any()) return error("Fast math flags indicator set for call with no FMF"); } FunctionType *FTy = nullptr; if (CCInfo >> bitc::CALL_EXPLICIT_TYPE & 1 && !(FTy = dyn_cast(getTypeByID(Record[OpNum++])))) return error("Explicit call type is not a function type"); Value *Callee; if (getValueTypePair(Record, OpNum, NextValueNo, Callee)) return error("Invalid record"); PointerType *OpTy = dyn_cast(Callee->getType()); if (!OpTy) return error("Callee is not a pointer type"); if (!FTy) { FTy = dyn_cast(OpTy->getElementType()); if (!FTy) return error("Callee is not of pointer to function type"); } else if (OpTy->getElementType() != FTy) return error("Explicit call type does not match pointee type of " "callee operand"); if (Record.size() < FTy->getNumParams() + OpNum) return error("Insufficient operands to call"); SmallVector Args; // Read the fixed params. for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) { if (FTy->getParamType(i)->isLabelTy()) Args.push_back(getBasicBlock(Record[OpNum])); else Args.push_back(getValue(Record, OpNum, NextValueNo, FTy->getParamType(i))); if (!Args.back()) return error("Invalid record"); } // Read type/value pairs for varargs params. if (!FTy->isVarArg()) { if (OpNum != Record.size()) return error("Invalid record"); } else { while (OpNum != Record.size()) { Value *Op; if (getValueTypePair(Record, OpNum, NextValueNo, Op)) return error("Invalid record"); Args.push_back(Op); } } I = CallInst::Create(FTy, Callee, Args, OperandBundles); OperandBundles.clear(); InstructionList.push_back(I); cast(I)->setCallingConv( static_cast((0x7ff & CCInfo) >> bitc::CALL_CCONV)); CallInst::TailCallKind TCK = CallInst::TCK_None; if (CCInfo & 1 << bitc::CALL_TAIL) TCK = CallInst::TCK_Tail; if (CCInfo & (1 << bitc::CALL_MUSTTAIL)) TCK = CallInst::TCK_MustTail; if (CCInfo & (1 << bitc::CALL_NOTAIL)) TCK = CallInst::TCK_NoTail; cast(I)->setTailCallKind(TCK); cast(I)->setAttributes(PAL); if (FMF.any()) { if (!isa(I)) return error("Fast-math-flags specified for call without " "floating-point scalar or vector return type"); I->setFastMathFlags(FMF); } break; } case bitc::FUNC_CODE_INST_VAARG: { // VAARG: [valistty, valist, instty] if (Record.size() < 3) return error("Invalid record"); Type *OpTy = getTypeByID(Record[0]); Value *Op = getValue(Record, 1, NextValueNo, OpTy); Type *ResTy = getTypeByID(Record[2]); if (!OpTy || !Op || !ResTy) return error("Invalid record"); I = new VAArgInst(Op, ResTy); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_OPERAND_BUNDLE: { // A call or an invoke can be optionally prefixed with some variable // number of operand bundle blocks. These blocks are read into // OperandBundles and consumed at the next call or invoke instruction. if (Record.size() < 1 || Record[0] >= BundleTags.size()) return error("Invalid record"); std::vector Inputs; unsigned OpNum = 1; while (OpNum != Record.size()) { Value *Op; if (getValueTypePair(Record, OpNum, NextValueNo, Op)) return error("Invalid record"); Inputs.push_back(Op); } OperandBundles.emplace_back(BundleTags[Record[0]], std::move(Inputs)); continue; } } // Add instruction to end of current BB. If there is no current BB, reject // this file. if (!CurBB) { I->deleteValue(); return error("Invalid instruction with no BB"); } if (!OperandBundles.empty()) { I->deleteValue(); return error("Operand bundles found with no consumer"); } CurBB->getInstList().push_back(I); // If this was a terminator instruction, move to the next block. if (isa(I)) { ++CurBBNo; CurBB = CurBBNo < FunctionBBs.size() ? FunctionBBs[CurBBNo] : nullptr; } // Non-void values get registered in the value table for future use. if (I && !I->getType()->isVoidTy()) ValueList.assignValue(I, NextValueNo++); } OutOfRecordLoop: if (!OperandBundles.empty()) return error("Operand bundles found with no consumer"); // Check the function list for unresolved values. if (Argument *A = dyn_cast(ValueList.back())) { if (!A->getParent()) { // We found at least one unresolved value. Nuke them all to avoid leaks. for (unsigned i = ModuleValueListSize, e = ValueList.size(); i != e; ++i){ if ((A = dyn_cast_or_null(ValueList[i])) && !A->getParent()) { A->replaceAllUsesWith(UndefValue::get(A->getType())); delete A; } } return error("Never resolved value found in function"); } } // Unexpected unresolved metadata about to be dropped. if (MDLoader->hasFwdRefs()) return error("Invalid function metadata: outgoing forward refs"); // Trim the value list down to the size it was before we parsed this function. ValueList.shrinkTo(ModuleValueListSize); MDLoader->shrinkTo(ModuleMDLoaderSize); std::vector().swap(FunctionBBs); return Error::success(); } /// Find the function body in the bitcode stream Error BitcodeReader::findFunctionInStream( Function *F, DenseMap::iterator DeferredFunctionInfoIterator) { while (DeferredFunctionInfoIterator->second == 0) { // This is the fallback handling for the old format bitcode that // didn't contain the function index in the VST, or when we have // an anonymous function which would not have a VST entry. // Assert that we have one of those two cases. assert(VSTOffset == 0 || !F->hasName()); // Parse the next body in the stream and set its position in the // DeferredFunctionInfo map. if (Error Err = rememberAndSkipFunctionBodies()) return Err; } return Error::success(); } SyncScope::ID BitcodeReader::getDecodedSyncScopeID(unsigned Val) { if (Val == SyncScope::SingleThread || Val == SyncScope::System) return SyncScope::ID(Val); if (Val >= SSIDs.size()) return SyncScope::System; // Map unknown synchronization scopes to system. return SSIDs[Val]; } //===----------------------------------------------------------------------===// // GVMaterializer implementation //===----------------------------------------------------------------------===// Error BitcodeReader::materialize(GlobalValue *GV) { Function *F = dyn_cast(GV); // If it's not a function or is already material, ignore the request. if (!F || !F->isMaterializable()) return Error::success(); DenseMap::iterator DFII = DeferredFunctionInfo.find(F); assert(DFII != DeferredFunctionInfo.end() && "Deferred function not found!"); // If its position is recorded as 0, its body is somewhere in the stream // but we haven't seen it yet. if (DFII->second == 0) if (Error Err = findFunctionInStream(F, DFII)) return Err; // Materialize metadata before parsing any function bodies. if (Error Err = materializeMetadata()) return Err; // Move the bit stream to the saved position of the deferred function body. Stream.JumpToBit(DFII->second); if (Error Err = parseFunctionBody(F)) return Err; F->setIsMaterializable(false); if (StripDebugInfo) stripDebugInfo(*F); // Upgrade any old intrinsic calls in the function. for (auto &I : UpgradedIntrinsics) { for (auto UI = I.first->materialized_user_begin(), UE = I.first->user_end(); UI != UE;) { User *U = *UI; ++UI; if (CallInst *CI = dyn_cast(U)) UpgradeIntrinsicCall(CI, I.second); } } // Update calls to the remangled intrinsics for (auto &I : RemangledIntrinsics) for (auto UI = I.first->materialized_user_begin(), UE = I.first->user_end(); UI != UE;) // Don't expect any other users than call sites CallSite(*UI++).setCalledFunction(I.second); // Finish fn->subprogram upgrade for materialized functions. if (DISubprogram *SP = MDLoader->lookupSubprogramForFunction(F)) F->setSubprogram(SP); // Check if the TBAA Metadata are valid, otherwise we will need to strip them. if (!MDLoader->isStrippingTBAA()) { for (auto &I : instructions(F)) { MDNode *TBAA = I.getMetadata(LLVMContext::MD_tbaa); if (!TBAA || TBAAVerifyHelper.visitTBAAMetadata(I, TBAA)) continue; MDLoader->setStripTBAA(true); stripTBAA(F->getParent()); } } // Bring in any functions that this function forward-referenced via // blockaddresses. return materializeForwardReferencedFunctions(); } Error BitcodeReader::materializeModule() { if (Error Err = materializeMetadata()) return Err; // Promise to materialize all forward references. WillMaterializeAllForwardRefs = true; // Iterate over the module, deserializing any functions that are still on // disk. for (Function &F : *TheModule) { if (Error Err = materialize(&F)) return Err; } // At this point, if there are any function bodies, parse the rest of // the bits in the module past the last function block we have recorded // through either lazy scanning or the VST. if (LastFunctionBlockBit || NextUnreadBit) if (Error Err = parseModule(LastFunctionBlockBit > NextUnreadBit ? LastFunctionBlockBit : NextUnreadBit)) return Err; // Check that all block address forward references got resolved (as we // promised above). if (!BasicBlockFwdRefs.empty()) return error("Never resolved function from blockaddress"); // Upgrade any intrinsic calls that slipped through (should not happen!) and // delete the old functions to clean up. We can't do this unless the entire // module is materialized because there could always be another function body // with calls to the old function. for (auto &I : UpgradedIntrinsics) { for (auto *U : I.first->users()) { if (CallInst *CI = dyn_cast(U)) UpgradeIntrinsicCall(CI, I.second); } if (!I.first->use_empty()) I.first->replaceAllUsesWith(I.second); I.first->eraseFromParent(); } UpgradedIntrinsics.clear(); // Do the same for remangled intrinsics for (auto &I : RemangledIntrinsics) { I.first->replaceAllUsesWith(I.second); I.first->eraseFromParent(); } RemangledIntrinsics.clear(); UpgradeDebugInfo(*TheModule); UpgradeModuleFlags(*TheModule); return Error::success(); } std::vector BitcodeReader::getIdentifiedStructTypes() const { return IdentifiedStructTypes; } ModuleSummaryIndexBitcodeReader::ModuleSummaryIndexBitcodeReader( BitstreamCursor Cursor, StringRef Strtab, ModuleSummaryIndex &TheIndex, StringRef ModulePath, unsigned ModuleId) : BitcodeReaderBase(std::move(Cursor), Strtab), TheIndex(TheIndex), ModulePath(ModulePath), ModuleId(ModuleId) {} ModuleSummaryIndex::ModuleInfo * ModuleSummaryIndexBitcodeReader::addThisModule() { return TheIndex.addModule(ModulePath, ModuleId); } std::pair ModuleSummaryIndexBitcodeReader::getValueInfoFromValueId(unsigned ValueId) { auto VGI = ValueIdToValueInfoMap[ValueId]; assert(VGI.first); return VGI; } void ModuleSummaryIndexBitcodeReader::setValueGUID( uint64_t ValueID, StringRef ValueName, GlobalValue::LinkageTypes Linkage, StringRef SourceFileName) { std::string GlobalId = GlobalValue::getGlobalIdentifier(ValueName, Linkage, SourceFileName); auto ValueGUID = GlobalValue::getGUID(GlobalId); auto OriginalNameID = ValueGUID; if (GlobalValue::isLocalLinkage(Linkage)) OriginalNameID = GlobalValue::getGUID(ValueName); if (PrintSummaryGUIDs) dbgs() << "GUID " << ValueGUID << "(" << OriginalNameID << ") is " << ValueName << "\n"; ValueIdToValueInfoMap[ValueID] = std::make_pair(TheIndex.getOrInsertValueInfo(ValueGUID), OriginalNameID); } // Specialized value symbol table parser used when reading module index // blocks where we don't actually create global values. The parsed information // is saved in the bitcode reader for use when later parsing summaries. Error ModuleSummaryIndexBitcodeReader::parseValueSymbolTable( uint64_t Offset, DenseMap &ValueIdToLinkageMap) { // With a strtab the VST is not required to parse the summary. if (UseStrtab) return Error::success(); assert(Offset > 0 && "Expected non-zero VST offset"); uint64_t CurrentBit = jumpToValueSymbolTable(Offset, Stream); if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID)) return error("Invalid record"); SmallVector Record; // Read all the records for this value table. SmallString<128> ValueName; while (true) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: // Done parsing VST, jump back to wherever we came from. Stream.JumpToBit(CurrentBit); return Error::success(); case BitstreamEntry::Record: // The interesting case. break; } // Read a record. Record.clear(); switch (Stream.readRecord(Entry.ID, Record)) { default: // Default behavior: ignore (e.g. VST_CODE_BBENTRY records). break; case bitc::VST_CODE_ENTRY: { // VST_CODE_ENTRY: [valueid, namechar x N] if (convertToString(Record, 1, ValueName)) return error("Invalid record"); unsigned ValueID = Record[0]; assert(!SourceFileName.empty()); auto VLI = ValueIdToLinkageMap.find(ValueID); assert(VLI != ValueIdToLinkageMap.end() && "No linkage found for VST entry?"); auto Linkage = VLI->second; setValueGUID(ValueID, ValueName, Linkage, SourceFileName); ValueName.clear(); break; } case bitc::VST_CODE_FNENTRY: { // VST_CODE_FNENTRY: [valueid, offset, namechar x N] if (convertToString(Record, 2, ValueName)) return error("Invalid record"); unsigned ValueID = Record[0]; assert(!SourceFileName.empty()); auto VLI = ValueIdToLinkageMap.find(ValueID); assert(VLI != ValueIdToLinkageMap.end() && "No linkage found for VST entry?"); auto Linkage = VLI->second; setValueGUID(ValueID, ValueName, Linkage, SourceFileName); ValueName.clear(); break; } case bitc::VST_CODE_COMBINED_ENTRY: { // VST_CODE_COMBINED_ENTRY: [valueid, refguid] unsigned ValueID = Record[0]; GlobalValue::GUID RefGUID = Record[1]; // The "original name", which is the second value of the pair will be // overriden later by a FS_COMBINED_ORIGINAL_NAME in the combined index. ValueIdToValueInfoMap[ValueID] = std::make_pair(TheIndex.getOrInsertValueInfo(RefGUID), RefGUID); break; } } } } // Parse just the blocks needed for building the index out of the module. // At the end of this routine the module Index is populated with a map // from global value id to GlobalValueSummary objects. Error ModuleSummaryIndexBitcodeReader::parseModule() { if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID)) return error("Invalid record"); SmallVector Record; DenseMap ValueIdToLinkageMap; unsigned ValueId = 0; // Read the index for this module. while (true) { BitstreamEntry Entry = Stream.advance(); switch (Entry.Kind) { case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: return Error::success(); case BitstreamEntry::SubBlock: switch (Entry.ID) { default: // Skip unknown content. if (Stream.SkipBlock()) return error("Invalid record"); break; case bitc::BLOCKINFO_BLOCK_ID: // Need to parse these to get abbrev ids (e.g. for VST) if (readBlockInfo()) return error("Malformed block"); break; case bitc::VALUE_SYMTAB_BLOCK_ID: // Should have been parsed earlier via VSTOffset, unless there // is no summary section. assert(((SeenValueSymbolTable && VSTOffset > 0) || !SeenGlobalValSummary) && "Expected early VST parse via VSTOffset record"); if (Stream.SkipBlock()) return error("Invalid record"); break; case bitc::GLOBALVAL_SUMMARY_BLOCK_ID: case bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID: assert(!SeenValueSymbolTable && "Already read VST when parsing summary block?"); // We might not have a VST if there were no values in the // summary. An empty summary block generated when we are // performing ThinLTO compiles so we don't later invoke // the regular LTO process on them. if (VSTOffset > 0) { if (Error Err = parseValueSymbolTable(VSTOffset, ValueIdToLinkageMap)) return Err; SeenValueSymbolTable = true; } SeenGlobalValSummary = true; if (Error Err = parseEntireSummary(Entry.ID)) return Err; break; case bitc::MODULE_STRTAB_BLOCK_ID: if (Error Err = parseModuleStringTable()) return Err; break; } continue; case BitstreamEntry::Record: { Record.clear(); auto BitCode = Stream.readRecord(Entry.ID, Record); switch (BitCode) { default: break; // Default behavior, ignore unknown content. case bitc::MODULE_CODE_VERSION: { if (Error Err = parseVersionRecord(Record).takeError()) return Err; break; } /// MODULE_CODE_SOURCE_FILENAME: [namechar x N] case bitc::MODULE_CODE_SOURCE_FILENAME: { SmallString<128> ValueName; if (convertToString(Record, 0, ValueName)) return error("Invalid record"); SourceFileName = ValueName.c_str(); break; } /// MODULE_CODE_HASH: [5*i32] case bitc::MODULE_CODE_HASH: { if (Record.size() != 5) return error("Invalid hash length " + Twine(Record.size()).str()); auto &Hash = addThisModule()->second.second; int Pos = 0; for (auto &Val : Record) { assert(!(Val >> 32) && "Unexpected high bits set"); Hash[Pos++] = Val; } break; } /// MODULE_CODE_VSTOFFSET: [offset] case bitc::MODULE_CODE_VSTOFFSET: if (Record.size() < 1) return error("Invalid record"); // Note that we subtract 1 here because the offset is relative to one // word before the start of the identification or module block, which // was historically always the start of the regular bitcode header. VSTOffset = Record[0] - 1; break; // v1 GLOBALVAR: [pointer type, isconst, initid, linkage, ...] // v1 FUNCTION: [type, callingconv, isproto, linkage, ...] // v1 ALIAS: [alias type, addrspace, aliasee val#, linkage, ...] // v2: [strtab offset, strtab size, v1] case bitc::MODULE_CODE_GLOBALVAR: case bitc::MODULE_CODE_FUNCTION: case bitc::MODULE_CODE_ALIAS: { StringRef Name; ArrayRef GVRecord; std::tie(Name, GVRecord) = readNameFromStrtab(Record); if (GVRecord.size() <= 3) return error("Invalid record"); uint64_t RawLinkage = GVRecord[3]; GlobalValue::LinkageTypes Linkage = getDecodedLinkage(RawLinkage); if (!UseStrtab) { ValueIdToLinkageMap[ValueId++] = Linkage; break; } setValueGUID(ValueId++, Name, Linkage, SourceFileName); break; } } } continue; } } } std::vector ModuleSummaryIndexBitcodeReader::makeRefList(ArrayRef Record) { std::vector Ret; Ret.reserve(Record.size()); for (uint64_t RefValueId : Record) Ret.push_back(getValueInfoFromValueId(RefValueId).first); return Ret; } std::vector ModuleSummaryIndexBitcodeReader::makeCallList( ArrayRef Record, bool IsOldProfileFormat, bool HasProfile) { std::vector Ret; Ret.reserve(Record.size()); for (unsigned I = 0, E = Record.size(); I != E; ++I) { CalleeInfo::HotnessType Hotness = CalleeInfo::HotnessType::Unknown; ValueInfo Callee = getValueInfoFromValueId(Record[I]).first; if (IsOldProfileFormat) { I += 1; // Skip old callsitecount field if (HasProfile) I += 1; // Skip old profilecount field } else if (HasProfile) Hotness = static_cast(Record[++I]); Ret.push_back(FunctionSummary::EdgeTy{Callee, CalleeInfo{Hotness}}); } return Ret; } // Eagerly parse the entire summary block. This populates the GlobalValueSummary // objects in the index. Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { if (Stream.EnterSubBlock(ID)) return error("Invalid record"); SmallVector Record; // Parse version { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); if (Entry.Kind != BitstreamEntry::Record) return error("Invalid Summary Block: record for version expected"); if (Stream.readRecord(Entry.ID, Record) != bitc::FS_VERSION) return error("Invalid Summary Block: version expected"); } const uint64_t Version = Record[0]; const bool IsOldProfileFormat = Version == 1; if (Version < 1 || Version > 4) return error("Invalid summary version " + Twine(Version) + ", 1, 2, 3 or 4 expected"); Record.clear(); // Keep around the last seen summary to be used when we see an optional // "OriginalName" attachement. GlobalValueSummary *LastSeenSummary = nullptr; GlobalValue::GUID LastSeenGUID = 0; // We can expect to see any number of type ID information records before // each function summary records; these variables store the information // collected so far so that it can be used to create the summary object. std::vector PendingTypeTests; std::vector PendingTypeTestAssumeVCalls, PendingTypeCheckedLoadVCalls; std::vector PendingTypeTestAssumeConstVCalls, PendingTypeCheckedLoadConstVCalls; while (true) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: return Error::success(); case BitstreamEntry::Record: // The interesting case. break; } // Read a record. The record format depends on whether this // is a per-module index or a combined index file. In the per-module // case the records contain the associated value's ID for correlation // with VST entries. In the combined index the correlation is done // via the bitcode offset of the summary records (which were saved // in the combined index VST entries). The records also contain // information used for ThinLTO renaming and importing. Record.clear(); auto BitCode = Stream.readRecord(Entry.ID, Record); switch (BitCode) { default: // Default behavior: ignore. break; case bitc::FS_VALUE_GUID: { // [valueid, refguid] uint64_t ValueID = Record[0]; GlobalValue::GUID RefGUID = Record[1]; ValueIdToValueInfoMap[ValueID] = std::make_pair(TheIndex.getOrInsertValueInfo(RefGUID), RefGUID); break; } // FS_PERMODULE: [valueid, flags, instcount, fflags, numrefs, // numrefs x valueid, n x (valueid)] // FS_PERMODULE_PROFILE: [valueid, flags, instcount, fflags, numrefs, // numrefs x valueid, // n x (valueid, hotness)] case bitc::FS_PERMODULE: case bitc::FS_PERMODULE_PROFILE: { unsigned ValueID = Record[0]; uint64_t RawFlags = Record[1]; unsigned InstCount = Record[2]; uint64_t RawFunFlags = 0; unsigned NumRefs = Record[3]; int RefListStartIndex = 4; if (Version >= 4) { RawFunFlags = Record[3]; NumRefs = Record[4]; RefListStartIndex = 5; } auto Flags = getDecodedGVSummaryFlags(RawFlags, Version); // The module path string ref set in the summary must be owned by the // index's module string table. Since we don't have a module path // string table section in the per-module index, we create a single // module path string table entry with an empty (0) ID to take // ownership. int CallGraphEdgeStartIndex = RefListStartIndex + NumRefs; assert(Record.size() >= RefListStartIndex + NumRefs && "Record size inconsistent with number of references"); std::vector Refs = makeRefList( ArrayRef(Record).slice(RefListStartIndex, NumRefs)); bool HasProfile = (BitCode == bitc::FS_PERMODULE_PROFILE); std::vector Calls = makeCallList( ArrayRef(Record).slice(CallGraphEdgeStartIndex), IsOldProfileFormat, HasProfile); auto FS = llvm::make_unique( Flags, InstCount, getDecodedFFlags(RawFunFlags), std::move(Refs), std::move(Calls), std::move(PendingTypeTests), std::move(PendingTypeTestAssumeVCalls), std::move(PendingTypeCheckedLoadVCalls), std::move(PendingTypeTestAssumeConstVCalls), std::move(PendingTypeCheckedLoadConstVCalls)); PendingTypeTests.clear(); PendingTypeTestAssumeVCalls.clear(); PendingTypeCheckedLoadVCalls.clear(); PendingTypeTestAssumeConstVCalls.clear(); PendingTypeCheckedLoadConstVCalls.clear(); auto VIAndOriginalGUID = getValueInfoFromValueId(ValueID); FS->setModulePath(addThisModule()->first()); FS->setOriginalName(VIAndOriginalGUID.second); TheIndex.addGlobalValueSummary(VIAndOriginalGUID.first, std::move(FS)); break; } // FS_ALIAS: [valueid, flags, valueid] // Aliases must be emitted (and parsed) after all FS_PERMODULE entries, as // they expect all aliasee summaries to be available. case bitc::FS_ALIAS: { unsigned ValueID = Record[0]; uint64_t RawFlags = Record[1]; unsigned AliaseeID = Record[2]; auto Flags = getDecodedGVSummaryFlags(RawFlags, Version); auto AS = llvm::make_unique(Flags); // The module path string ref set in the summary must be owned by the // index's module string table. Since we don't have a module path // string table section in the per-module index, we create a single // module path string table entry with an empty (0) ID to take // ownership. AS->setModulePath(addThisModule()->first()); GlobalValue::GUID AliaseeGUID = getValueInfoFromValueId(AliaseeID).first.getGUID(); auto AliaseeInModule = TheIndex.findSummaryInModule(AliaseeGUID, ModulePath); if (!AliaseeInModule) return error("Alias expects aliasee summary to be parsed"); AS->setAliasee(AliaseeInModule); AS->setAliaseeGUID(AliaseeGUID); auto GUID = getValueInfoFromValueId(ValueID); AS->setOriginalName(GUID.second); TheIndex.addGlobalValueSummary(GUID.first, std::move(AS)); break; } // FS_PERMODULE_GLOBALVAR_INIT_REFS: [valueid, flags, n x valueid] case bitc::FS_PERMODULE_GLOBALVAR_INIT_REFS: { unsigned ValueID = Record[0]; uint64_t RawFlags = Record[1]; auto Flags = getDecodedGVSummaryFlags(RawFlags, Version); std::vector Refs = makeRefList(ArrayRef(Record).slice(2)); auto FS = llvm::make_unique(Flags, std::move(Refs)); FS->setModulePath(addThisModule()->first()); auto GUID = getValueInfoFromValueId(ValueID); FS->setOriginalName(GUID.second); TheIndex.addGlobalValueSummary(GUID.first, std::move(FS)); break; } // FS_COMBINED: [valueid, modid, flags, instcount, fflags, numrefs, // numrefs x valueid, n x (valueid)] // FS_COMBINED_PROFILE: [valueid, modid, flags, instcount, fflags, numrefs, // numrefs x valueid, n x (valueid, hotness)] case bitc::FS_COMBINED: case bitc::FS_COMBINED_PROFILE: { unsigned ValueID = Record[0]; uint64_t ModuleId = Record[1]; uint64_t RawFlags = Record[2]; unsigned InstCount = Record[3]; uint64_t RawFunFlags = 0; unsigned NumRefs = Record[4]; int RefListStartIndex = 5; if (Version >= 4) { RawFunFlags = Record[4]; NumRefs = Record[5]; RefListStartIndex = 6; } auto Flags = getDecodedGVSummaryFlags(RawFlags, Version); int CallGraphEdgeStartIndex = RefListStartIndex + NumRefs; assert(Record.size() >= RefListStartIndex + NumRefs && "Record size inconsistent with number of references"); std::vector Refs = makeRefList( ArrayRef(Record).slice(RefListStartIndex, NumRefs)); bool HasProfile = (BitCode == bitc::FS_COMBINED_PROFILE); std::vector Edges = makeCallList( ArrayRef(Record).slice(CallGraphEdgeStartIndex), IsOldProfileFormat, HasProfile); ValueInfo VI = getValueInfoFromValueId(ValueID).first; auto FS = llvm::make_unique( Flags, InstCount, getDecodedFFlags(RawFunFlags), std::move(Refs), std::move(Edges), std::move(PendingTypeTests), std::move(PendingTypeTestAssumeVCalls), std::move(PendingTypeCheckedLoadVCalls), std::move(PendingTypeTestAssumeConstVCalls), std::move(PendingTypeCheckedLoadConstVCalls)); PendingTypeTests.clear(); PendingTypeTestAssumeVCalls.clear(); PendingTypeCheckedLoadVCalls.clear(); PendingTypeTestAssumeConstVCalls.clear(); PendingTypeCheckedLoadConstVCalls.clear(); LastSeenSummary = FS.get(); LastSeenGUID = VI.getGUID(); FS->setModulePath(ModuleIdMap[ModuleId]); TheIndex.addGlobalValueSummary(VI, std::move(FS)); break; } // FS_COMBINED_ALIAS: [valueid, modid, flags, valueid] // Aliases must be emitted (and parsed) after all FS_COMBINED entries, as // they expect all aliasee summaries to be available. case bitc::FS_COMBINED_ALIAS: { unsigned ValueID = Record[0]; uint64_t ModuleId = Record[1]; uint64_t RawFlags = Record[2]; unsigned AliaseeValueId = Record[3]; auto Flags = getDecodedGVSummaryFlags(RawFlags, Version); auto AS = llvm::make_unique(Flags); LastSeenSummary = AS.get(); AS->setModulePath(ModuleIdMap[ModuleId]); auto AliaseeGUID = getValueInfoFromValueId(AliaseeValueId).first.getGUID(); auto AliaseeInModule = TheIndex.findSummaryInModule(AliaseeGUID, AS->modulePath()); AS->setAliasee(AliaseeInModule); AS->setAliaseeGUID(AliaseeGUID); ValueInfo VI = getValueInfoFromValueId(ValueID).first; LastSeenGUID = VI.getGUID(); TheIndex.addGlobalValueSummary(VI, std::move(AS)); break; } // FS_COMBINED_GLOBALVAR_INIT_REFS: [valueid, modid, flags, n x valueid] case bitc::FS_COMBINED_GLOBALVAR_INIT_REFS: { unsigned ValueID = Record[0]; uint64_t ModuleId = Record[1]; uint64_t RawFlags = Record[2]; auto Flags = getDecodedGVSummaryFlags(RawFlags, Version); std::vector Refs = makeRefList(ArrayRef(Record).slice(3)); auto FS = llvm::make_unique(Flags, std::move(Refs)); LastSeenSummary = FS.get(); FS->setModulePath(ModuleIdMap[ModuleId]); ValueInfo VI = getValueInfoFromValueId(ValueID).first; LastSeenGUID = VI.getGUID(); TheIndex.addGlobalValueSummary(VI, std::move(FS)); break; } // FS_COMBINED_ORIGINAL_NAME: [original_name] case bitc::FS_COMBINED_ORIGINAL_NAME: { uint64_t OriginalName = Record[0]; if (!LastSeenSummary) return error("Name attachment that does not follow a combined record"); LastSeenSummary->setOriginalName(OriginalName); TheIndex.addOriginalName(LastSeenGUID, OriginalName); // Reset the LastSeenSummary LastSeenSummary = nullptr; LastSeenGUID = 0; break; } case bitc::FS_TYPE_TESTS: assert(PendingTypeTests.empty()); PendingTypeTests.insert(PendingTypeTests.end(), Record.begin(), Record.end()); break; case bitc::FS_TYPE_TEST_ASSUME_VCALLS: assert(PendingTypeTestAssumeVCalls.empty()); for (unsigned I = 0; I != Record.size(); I += 2) PendingTypeTestAssumeVCalls.push_back({Record[I], Record[I+1]}); break; case bitc::FS_TYPE_CHECKED_LOAD_VCALLS: assert(PendingTypeCheckedLoadVCalls.empty()); for (unsigned I = 0; I != Record.size(); I += 2) PendingTypeCheckedLoadVCalls.push_back({Record[I], Record[I+1]}); break; case bitc::FS_TYPE_TEST_ASSUME_CONST_VCALL: PendingTypeTestAssumeConstVCalls.push_back( {{Record[0], Record[1]}, {Record.begin() + 2, Record.end()}}); break; case bitc::FS_TYPE_CHECKED_LOAD_CONST_VCALL: PendingTypeCheckedLoadConstVCalls.push_back( {{Record[0], Record[1]}, {Record.begin() + 2, Record.end()}}); break; case bitc::FS_CFI_FUNCTION_DEFS: { std::set &CfiFunctionDefs = TheIndex.cfiFunctionDefs(); for (unsigned I = 0; I != Record.size(); I += 2) CfiFunctionDefs.insert( {Strtab.data() + Record[I], static_cast(Record[I + 1])}); break; } case bitc::FS_CFI_FUNCTION_DECLS: { std::set &CfiFunctionDecls = TheIndex.cfiFunctionDecls(); for (unsigned I = 0; I != Record.size(); I += 2) CfiFunctionDecls.insert( {Strtab.data() + Record[I], static_cast(Record[I + 1])}); break; } } } llvm_unreachable("Exit infinite loop"); } // Parse the module string table block into the Index. // This populates the ModulePathStringTable map in the index. Error ModuleSummaryIndexBitcodeReader::parseModuleStringTable() { if (Stream.EnterSubBlock(bitc::MODULE_STRTAB_BLOCK_ID)) return error("Invalid record"); SmallVector Record; SmallString<128> ModulePath; ModuleSummaryIndex::ModuleInfo *LastSeenModule = nullptr; while (true) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: return Error::success(); case BitstreamEntry::Record: // The interesting case. break; } Record.clear(); switch (Stream.readRecord(Entry.ID, Record)) { default: // Default behavior: ignore. break; case bitc::MST_CODE_ENTRY: { // MST_ENTRY: [modid, namechar x N] uint64_t ModuleId = Record[0]; if (convertToString(Record, 1, ModulePath)) return error("Invalid record"); LastSeenModule = TheIndex.addModule(ModulePath, ModuleId); ModuleIdMap[ModuleId] = LastSeenModule->first(); ModulePath.clear(); break; } /// MST_CODE_HASH: [5*i32] case bitc::MST_CODE_HASH: { if (Record.size() != 5) return error("Invalid hash length " + Twine(Record.size()).str()); if (!LastSeenModule) return error("Invalid hash that does not follow a module path"); int Pos = 0; for (auto &Val : Record) { assert(!(Val >> 32) && "Unexpected high bits set"); LastSeenModule->second.second[Pos++] = Val; } // Reset LastSeenModule to avoid overriding the hash unexpectedly. LastSeenModule = nullptr; break; } } } llvm_unreachable("Exit infinite loop"); } namespace { // FIXME: This class is only here to support the transition to llvm::Error. It // will be removed once this transition is complete. Clients should prefer to // deal with the Error value directly, rather than converting to error_code. class BitcodeErrorCategoryType : public std::error_category { const char *name() const noexcept override { return "llvm.bitcode"; } std::string message(int IE) const override { BitcodeError E = static_cast(IE); switch (E) { case BitcodeError::CorruptedBitcode: return "Corrupted bitcode"; } llvm_unreachable("Unknown error type!"); } }; } // end anonymous namespace static ManagedStatic ErrorCategory; const std::error_category &llvm::BitcodeErrorCategory() { return *ErrorCategory; } static Expected readBlobInRecord(BitstreamCursor &Stream, unsigned Block, unsigned RecordID) { if (Stream.EnterSubBlock(Block)) return error("Invalid record"); StringRef Strtab; while (true) { BitstreamEntry Entry = Stream.advance(); switch (Entry.Kind) { case BitstreamEntry::EndBlock: return Strtab; case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::SubBlock: if (Stream.SkipBlock()) return error("Malformed block"); break; case BitstreamEntry::Record: StringRef Blob; SmallVector Record; if (Stream.readRecord(Entry.ID, Record, &Blob) == RecordID) Strtab = Blob; break; } } } //===----------------------------------------------------------------------===// // External interface //===----------------------------------------------------------------------===// Expected> llvm::getBitcodeModuleList(MemoryBufferRef Buffer) { auto FOrErr = getBitcodeFileContents(Buffer); if (!FOrErr) return FOrErr.takeError(); return std::move(FOrErr->Mods); } Expected llvm::getBitcodeFileContents(MemoryBufferRef Buffer) { Expected StreamOrErr = initStream(Buffer); if (!StreamOrErr) return StreamOrErr.takeError(); BitstreamCursor &Stream = *StreamOrErr; BitcodeFileContents F; while (true) { uint64_t BCBegin = Stream.getCurrentByteNo(); // We may be consuming bitcode from a client that leaves garbage at the end // of the bitcode stream (e.g. Apple's ar tool). If we are close enough to // the end that there cannot possibly be another module, stop looking. if (BCBegin + 8 >= Stream.getBitcodeBytes().size()) return F; BitstreamEntry Entry = Stream.advance(); switch (Entry.Kind) { case BitstreamEntry::EndBlock: case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::SubBlock: { uint64_t IdentificationBit = -1ull; if (Entry.ID == bitc::IDENTIFICATION_BLOCK_ID) { IdentificationBit = Stream.GetCurrentBitNo() - BCBegin * 8; if (Stream.SkipBlock()) return error("Malformed block"); Entry = Stream.advance(); if (Entry.Kind != BitstreamEntry::SubBlock || Entry.ID != bitc::MODULE_BLOCK_ID) return error("Malformed block"); } if (Entry.ID == bitc::MODULE_BLOCK_ID) { uint64_t ModuleBit = Stream.GetCurrentBitNo() - BCBegin * 8; if (Stream.SkipBlock()) return error("Malformed block"); F.Mods.push_back({Stream.getBitcodeBytes().slice( BCBegin, Stream.getCurrentByteNo() - BCBegin), Buffer.getBufferIdentifier(), IdentificationBit, ModuleBit}); continue; } if (Entry.ID == bitc::STRTAB_BLOCK_ID) { Expected Strtab = readBlobInRecord(Stream, bitc::STRTAB_BLOCK_ID, bitc::STRTAB_BLOB); if (!Strtab) return Strtab.takeError(); // This string table is used by every preceding bitcode module that does // not have its own string table. A bitcode file may have multiple // string tables if it was created by binary concatenation, for example // with "llvm-cat -b". for (auto I = F.Mods.rbegin(), E = F.Mods.rend(); I != E; ++I) { if (!I->Strtab.empty()) break; I->Strtab = *Strtab; } // Similarly, the string table is used by every preceding symbol table; // normally there will be just one unless the bitcode file was created // by binary concatenation. if (!F.Symtab.empty() && F.StrtabForSymtab.empty()) F.StrtabForSymtab = *Strtab; continue; } if (Entry.ID == bitc::SYMTAB_BLOCK_ID) { Expected SymtabOrErr = readBlobInRecord(Stream, bitc::SYMTAB_BLOCK_ID, bitc::SYMTAB_BLOB); if (!SymtabOrErr) return SymtabOrErr.takeError(); // We can expect the bitcode file to have multiple symbol tables if it // was created by binary concatenation. In that case we silently // ignore any subsequent symbol tables, which is fine because this is a // low level function. The client is expected to notice that the number // of modules in the symbol table does not match the number of modules // in the input file and regenerate the symbol table. if (F.Symtab.empty()) F.Symtab = *SymtabOrErr; continue; } if (Stream.SkipBlock()) return error("Malformed block"); continue; } case BitstreamEntry::Record: Stream.skipRecord(Entry.ID); continue; } } } /// \brief Get a lazy one-at-time loading module from bitcode. /// /// This isn't always used in a lazy context. In particular, it's also used by /// \a parseModule(). If this is truly lazy, then we need to eagerly pull /// in forward-referenced functions from block address references. /// /// \param[in] MaterializeAll Set to \c true if we should materialize /// everything. Expected> BitcodeModule::getModuleImpl(LLVMContext &Context, bool MaterializeAll, bool ShouldLazyLoadMetadata, bool IsImporting) { BitstreamCursor Stream(Buffer); std::string ProducerIdentification; if (IdentificationBit != -1ull) { Stream.JumpToBit(IdentificationBit); Expected ProducerIdentificationOrErr = readIdentificationBlock(Stream); if (!ProducerIdentificationOrErr) return ProducerIdentificationOrErr.takeError(); ProducerIdentification = *ProducerIdentificationOrErr; } Stream.JumpToBit(ModuleBit); auto *R = new BitcodeReader(std::move(Stream), Strtab, ProducerIdentification, Context); std::unique_ptr M = llvm::make_unique(ModuleIdentifier, Context); M->setMaterializer(R); // Delay parsing Metadata if ShouldLazyLoadMetadata is true. if (Error Err = R->parseBitcodeInto(M.get(), ShouldLazyLoadMetadata, IsImporting)) return std::move(Err); if (MaterializeAll) { // Read in the entire module, and destroy the BitcodeReader. if (Error Err = M->materializeAll()) return std::move(Err); } else { // Resolve forward references from blockaddresses. if (Error Err = R->materializeForwardReferencedFunctions()) return std::move(Err); } return std::move(M); } Expected> BitcodeModule::getLazyModule(LLVMContext &Context, bool ShouldLazyLoadMetadata, bool IsImporting) { return getModuleImpl(Context, false, ShouldLazyLoadMetadata, IsImporting); } // Parse the specified bitcode buffer and merge the index into CombinedIndex. // We don't use ModuleIdentifier here because the client may need to control the // module path used in the combined summary (e.g. when reading summaries for // regular LTO modules). Error BitcodeModule::readSummary(ModuleSummaryIndex &CombinedIndex, StringRef ModulePath, uint64_t ModuleId) { BitstreamCursor Stream(Buffer); Stream.JumpToBit(ModuleBit); ModuleSummaryIndexBitcodeReader R(std::move(Stream), Strtab, CombinedIndex, ModulePath, ModuleId); return R.parseModule(); } // Parse the specified bitcode buffer, returning the function info index. Expected> BitcodeModule::getSummary() { BitstreamCursor Stream(Buffer); Stream.JumpToBit(ModuleBit); auto Index = llvm::make_unique(); ModuleSummaryIndexBitcodeReader R(std::move(Stream), Strtab, *Index, ModuleIdentifier, 0); if (Error Err = R.parseModule()) return std::move(Err); return std::move(Index); } // Check if the given bitcode buffer contains a global value summary block. Expected BitcodeModule::getLTOInfo() { BitstreamCursor Stream(Buffer); Stream.JumpToBit(ModuleBit); if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID)) return error("Invalid record"); while (true) { BitstreamEntry Entry = Stream.advance(); switch (Entry.Kind) { case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: return BitcodeLTOInfo{/*IsThinLTO=*/false, /*HasSummary=*/false}; case BitstreamEntry::SubBlock: if (Entry.ID == bitc::GLOBALVAL_SUMMARY_BLOCK_ID) return BitcodeLTOInfo{/*IsThinLTO=*/true, /*HasSummary=*/true}; if (Entry.ID == bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID) return BitcodeLTOInfo{/*IsThinLTO=*/false, /*HasSummary=*/true}; // Ignore other sub-blocks. if (Stream.SkipBlock()) return error("Malformed block"); continue; case BitstreamEntry::Record: Stream.skipRecord(Entry.ID); continue; } } } static Expected getSingleModule(MemoryBufferRef Buffer) { Expected> MsOrErr = getBitcodeModuleList(Buffer); if (!MsOrErr) return MsOrErr.takeError(); if (MsOrErr->size() != 1) return error("Expected a single module"); return (*MsOrErr)[0]; } Expected> llvm::getLazyBitcodeModule(MemoryBufferRef Buffer, LLVMContext &Context, bool ShouldLazyLoadMetadata, bool IsImporting) { Expected BM = getSingleModule(Buffer); if (!BM) return BM.takeError(); return BM->getLazyModule(Context, ShouldLazyLoadMetadata, IsImporting); } Expected> llvm::getOwningLazyBitcodeModule( std::unique_ptr &&Buffer, LLVMContext &Context, bool ShouldLazyLoadMetadata, bool IsImporting) { auto MOrErr = getLazyBitcodeModule(*Buffer, Context, ShouldLazyLoadMetadata, IsImporting); if (MOrErr) (*MOrErr)->setOwnedMemoryBuffer(std::move(Buffer)); return MOrErr; } Expected> BitcodeModule::parseModule(LLVMContext &Context) { return getModuleImpl(Context, true, false, false); // TODO: Restore the use-lists to the in-memory state when the bitcode was // written. We must defer until the Module has been fully materialized. } Expected> llvm::parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context) { Expected BM = getSingleModule(Buffer); if (!BM) return BM.takeError(); return BM->parseModule(Context); } Expected llvm::getBitcodeTargetTriple(MemoryBufferRef Buffer) { Expected StreamOrErr = initStream(Buffer); if (!StreamOrErr) return StreamOrErr.takeError(); return readTriple(*StreamOrErr); } Expected llvm::isBitcodeContainingObjCCategory(MemoryBufferRef Buffer) { Expected StreamOrErr = initStream(Buffer); if (!StreamOrErr) return StreamOrErr.takeError(); return hasObjCCategory(*StreamOrErr); } Expected llvm::getBitcodeProducerString(MemoryBufferRef Buffer) { Expected StreamOrErr = initStream(Buffer); if (!StreamOrErr) return StreamOrErr.takeError(); return readIdentificationCode(*StreamOrErr); } Error llvm::readModuleSummaryIndex(MemoryBufferRef Buffer, ModuleSummaryIndex &CombinedIndex, uint64_t ModuleId) { Expected BM = getSingleModule(Buffer); if (!BM) return BM.takeError(); return BM->readSummary(CombinedIndex, BM->getModuleIdentifier(), ModuleId); } Expected> llvm::getModuleSummaryIndex(MemoryBufferRef Buffer) { Expected BM = getSingleModule(Buffer); if (!BM) return BM.takeError(); return BM->getSummary(); } Expected llvm::getBitcodeLTOInfo(MemoryBufferRef Buffer) { Expected BM = getSingleModule(Buffer); if (!BM) return BM.takeError(); return BM->getLTOInfo(); } Expected> llvm::getModuleSummaryIndexForFile(StringRef Path, bool IgnoreEmptyThinLTOIndexFile) { ErrorOr> FileOrErr = MemoryBuffer::getFileOrSTDIN(Path); if (!FileOrErr) return errorCodeToError(FileOrErr.getError()); if (IgnoreEmptyThinLTOIndexFile && !(*FileOrErr)->getBufferSize()) return nullptr; return getModuleSummaryIndex(**FileOrErr); } Index: head/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp =================================================================== --- head/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp (revision 329982) +++ head/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp (revision 329983) @@ -1,4260 +1,4260 @@ //===- Bitcode/Writer/BitcodeWriter.cpp - Bitcode Writer ------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // Bitcode writer implementation. // //===----------------------------------------------------------------------===// #include "llvm/Bitcode/BitcodeWriter.h" #include "ValueEnumerator.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/Bitcode/BitCodes.h" #include "llvm/Bitcode/BitstreamWriter.h" #include "llvm/Bitcode/LLVMBitCodes.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Comdat.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalIFunc.h" #include "llvm/IR/GlobalObject.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" #include "llvm/IR/UseListOrder.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueSymbolTable.h" #include "llvm/MC/StringTableBuilder.h" #include "llvm/Object/IRSymtab.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SHA1.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include #include #include #include #include #include #include #include #include #include using namespace llvm; static cl::opt IndexThreshold("bitcode-mdindex-threshold", cl::Hidden, cl::init(25), cl::desc("Number of metadatas above which we emit an index " "to enable lazy-loading")); namespace { /// These are manifest constants used by the bitcode writer. They do not need to /// be kept in sync with the reader, but need to be consistent within this file. enum { // VALUE_SYMTAB_BLOCK abbrev id's. VST_ENTRY_8_ABBREV = bitc::FIRST_APPLICATION_ABBREV, VST_ENTRY_7_ABBREV, VST_ENTRY_6_ABBREV, VST_BBENTRY_6_ABBREV, // CONSTANTS_BLOCK abbrev id's. CONSTANTS_SETTYPE_ABBREV = bitc::FIRST_APPLICATION_ABBREV, CONSTANTS_INTEGER_ABBREV, CONSTANTS_CE_CAST_Abbrev, CONSTANTS_NULL_Abbrev, // FUNCTION_BLOCK abbrev id's. FUNCTION_INST_LOAD_ABBREV = bitc::FIRST_APPLICATION_ABBREV, FUNCTION_INST_BINOP_ABBREV, FUNCTION_INST_BINOP_FLAGS_ABBREV, FUNCTION_INST_CAST_ABBREV, FUNCTION_INST_RET_VOID_ABBREV, FUNCTION_INST_RET_VAL_ABBREV, FUNCTION_INST_UNREACHABLE_ABBREV, FUNCTION_INST_GEP_ABBREV, }; /// Abstract class to manage the bitcode writing, subclassed for each bitcode /// file type. class BitcodeWriterBase { protected: /// The stream created and owned by the client. BitstreamWriter &Stream; StringTableBuilder &StrtabBuilder; public: /// Constructs a BitcodeWriterBase object that writes to the provided /// \p Stream. BitcodeWriterBase(BitstreamWriter &Stream, StringTableBuilder &StrtabBuilder) : Stream(Stream), StrtabBuilder(StrtabBuilder) {} protected: void writeBitcodeHeader(); void writeModuleVersion(); }; void BitcodeWriterBase::writeModuleVersion() { // VERSION: [version#] Stream.EmitRecord(bitc::MODULE_CODE_VERSION, ArrayRef{2}); } /// Base class to manage the module bitcode writing, currently subclassed for /// ModuleBitcodeWriter and ThinLinkBitcodeWriter. class ModuleBitcodeWriterBase : public BitcodeWriterBase { protected: /// The Module to write to bitcode. const Module &M; /// Enumerates ids for all values in the module. ValueEnumerator VE; /// Optional per-module index to write for ThinLTO. const ModuleSummaryIndex *Index; /// Map that holds the correspondence between GUIDs in the summary index, /// that came from indirect call profiles, and a value id generated by this /// class to use in the VST and summary block records. std::map GUIDToValueIdMap; /// Tracks the last value id recorded in the GUIDToValueMap. unsigned GlobalValueId; /// Saves the offset of the VSTOffset record that must eventually be /// backpatched with the offset of the actual VST. uint64_t VSTOffsetPlaceholder = 0; public: /// Constructs a ModuleBitcodeWriterBase object for the given Module, /// writing to the provided \p Buffer. ModuleBitcodeWriterBase(const Module *M, StringTableBuilder &StrtabBuilder, BitstreamWriter &Stream, bool ShouldPreserveUseListOrder, const ModuleSummaryIndex *Index) : BitcodeWriterBase(Stream, StrtabBuilder), M(*M), VE(*M, ShouldPreserveUseListOrder), Index(Index) { // Assign ValueIds to any callee values in the index that came from // indirect call profiles and were recorded as a GUID not a Value* // (which would have been assigned an ID by the ValueEnumerator). // The starting ValueId is just after the number of values in the // ValueEnumerator, so that they can be emitted in the VST. GlobalValueId = VE.getValues().size(); if (!Index) return; for (const auto &GUIDSummaryLists : *Index) // Examine all summaries for this GUID. for (auto &Summary : GUIDSummaryLists.second.SummaryList) if (auto FS = dyn_cast(Summary.get())) // For each call in the function summary, see if the call // is to a GUID (which means it is for an indirect call, // otherwise we would have a Value for it). If so, synthesize // a value id. for (auto &CallEdge : FS->calls()) if (!CallEdge.first.getValue()) assignValueId(CallEdge.first.getGUID()); } protected: void writePerModuleGlobalValueSummary(); private: void writePerModuleFunctionSummaryRecord(SmallVector &NameVals, GlobalValueSummary *Summary, unsigned ValueID, unsigned FSCallsAbbrev, unsigned FSCallsProfileAbbrev, const Function &F); void writeModuleLevelReferences(const GlobalVariable &V, SmallVector &NameVals, unsigned FSModRefsAbbrev); void assignValueId(GlobalValue::GUID ValGUID) { GUIDToValueIdMap[ValGUID] = ++GlobalValueId; } unsigned getValueId(GlobalValue::GUID ValGUID) { const auto &VMI = GUIDToValueIdMap.find(ValGUID); // Expect that any GUID value had a value Id assigned by an // earlier call to assignValueId. assert(VMI != GUIDToValueIdMap.end() && "GUID does not have assigned value Id"); return VMI->second; } // Helper to get the valueId for the type of value recorded in VI. unsigned getValueId(ValueInfo VI) { if (!VI.getValue()) return getValueId(VI.getGUID()); return VE.getValueID(VI.getValue()); } std::map &valueIds() { return GUIDToValueIdMap; } }; /// Class to manage the bitcode writing for a module. class ModuleBitcodeWriter : public ModuleBitcodeWriterBase { /// Pointer to the buffer allocated by caller for bitcode writing. const SmallVectorImpl &Buffer; /// True if a module hash record should be written. bool GenerateHash; /// If non-null, when GenerateHash is true, the resulting hash is written /// into ModHash. ModuleHash *ModHash; SHA1 Hasher; /// The start bit of the identification block. uint64_t BitcodeStartBit; public: /// Constructs a ModuleBitcodeWriter object for the given Module, /// writing to the provided \p Buffer. ModuleBitcodeWriter(const Module *M, SmallVectorImpl &Buffer, StringTableBuilder &StrtabBuilder, BitstreamWriter &Stream, bool ShouldPreserveUseListOrder, const ModuleSummaryIndex *Index, bool GenerateHash, ModuleHash *ModHash = nullptr) : ModuleBitcodeWriterBase(M, StrtabBuilder, Stream, ShouldPreserveUseListOrder, Index), Buffer(Buffer), GenerateHash(GenerateHash), ModHash(ModHash), BitcodeStartBit(Stream.GetCurrentBitNo()) {} /// Emit the current module to the bitstream. void write(); private: uint64_t bitcodeStartBit() { return BitcodeStartBit; } size_t addToStrtab(StringRef Str); void writeAttributeGroupTable(); void writeAttributeTable(); void writeTypeTable(); void writeComdats(); void writeValueSymbolTableForwardDecl(); void writeModuleInfo(); void writeValueAsMetadata(const ValueAsMetadata *MD, SmallVectorImpl &Record); void writeMDTuple(const MDTuple *N, SmallVectorImpl &Record, unsigned Abbrev); unsigned createDILocationAbbrev(); void writeDILocation(const DILocation *N, SmallVectorImpl &Record, unsigned &Abbrev); unsigned createGenericDINodeAbbrev(); void writeGenericDINode(const GenericDINode *N, SmallVectorImpl &Record, unsigned &Abbrev); void writeDISubrange(const DISubrange *N, SmallVectorImpl &Record, unsigned Abbrev); void writeDIEnumerator(const DIEnumerator *N, SmallVectorImpl &Record, unsigned Abbrev); void writeDIBasicType(const DIBasicType *N, SmallVectorImpl &Record, unsigned Abbrev); void writeDIDerivedType(const DIDerivedType *N, SmallVectorImpl &Record, unsigned Abbrev); void writeDICompositeType(const DICompositeType *N, SmallVectorImpl &Record, unsigned Abbrev); void writeDISubroutineType(const DISubroutineType *N, SmallVectorImpl &Record, unsigned Abbrev); void writeDIFile(const DIFile *N, SmallVectorImpl &Record, unsigned Abbrev); void writeDICompileUnit(const DICompileUnit *N, SmallVectorImpl &Record, unsigned Abbrev); void writeDISubprogram(const DISubprogram *N, SmallVectorImpl &Record, unsigned Abbrev); void writeDILexicalBlock(const DILexicalBlock *N, SmallVectorImpl &Record, unsigned Abbrev); void writeDILexicalBlockFile(const DILexicalBlockFile *N, SmallVectorImpl &Record, unsigned Abbrev); void writeDINamespace(const DINamespace *N, SmallVectorImpl &Record, unsigned Abbrev); void writeDIMacro(const DIMacro *N, SmallVectorImpl &Record, unsigned Abbrev); void writeDIMacroFile(const DIMacroFile *N, SmallVectorImpl &Record, unsigned Abbrev); void writeDIModule(const DIModule *N, SmallVectorImpl &Record, unsigned Abbrev); void writeDITemplateTypeParameter(const DITemplateTypeParameter *N, SmallVectorImpl &Record, unsigned Abbrev); void writeDITemplateValueParameter(const DITemplateValueParameter *N, SmallVectorImpl &Record, unsigned Abbrev); void writeDIGlobalVariable(const DIGlobalVariable *N, SmallVectorImpl &Record, unsigned Abbrev); void writeDILocalVariable(const DILocalVariable *N, SmallVectorImpl &Record, unsigned Abbrev); void writeDIExpression(const DIExpression *N, SmallVectorImpl &Record, unsigned Abbrev); void writeDIGlobalVariableExpression(const DIGlobalVariableExpression *N, SmallVectorImpl &Record, unsigned Abbrev); void writeDIObjCProperty(const DIObjCProperty *N, SmallVectorImpl &Record, unsigned Abbrev); void writeDIImportedEntity(const DIImportedEntity *N, SmallVectorImpl &Record, unsigned Abbrev); unsigned createNamedMetadataAbbrev(); void writeNamedMetadata(SmallVectorImpl &Record); unsigned createMetadataStringsAbbrev(); void writeMetadataStrings(ArrayRef Strings, SmallVectorImpl &Record); void writeMetadataRecords(ArrayRef MDs, SmallVectorImpl &Record, std::vector *MDAbbrevs = nullptr, std::vector *IndexPos = nullptr); void writeModuleMetadata(); void writeFunctionMetadata(const Function &F); void writeFunctionMetadataAttachment(const Function &F); void writeGlobalVariableMetadataAttachment(const GlobalVariable &GV); void pushGlobalMetadataAttachment(SmallVectorImpl &Record, const GlobalObject &GO); void writeModuleMetadataKinds(); void writeOperandBundleTags(); void writeSyncScopeNames(); void writeConstants(unsigned FirstVal, unsigned LastVal, bool isGlobal); void writeModuleConstants(); bool pushValueAndType(const Value *V, unsigned InstID, SmallVectorImpl &Vals); void writeOperandBundles(ImmutableCallSite CS, unsigned InstID); void pushValue(const Value *V, unsigned InstID, SmallVectorImpl &Vals); void pushValueSigned(const Value *V, unsigned InstID, SmallVectorImpl &Vals); void writeInstruction(const Instruction &I, unsigned InstID, SmallVectorImpl &Vals); void writeFunctionLevelValueSymbolTable(const ValueSymbolTable &VST); void writeGlobalValueSymbolTable( DenseMap &FunctionToBitcodeIndex); void writeUseList(UseListOrder &&Order); void writeUseListBlock(const Function *F); void writeFunction(const Function &F, DenseMap &FunctionToBitcodeIndex); void writeBlockInfo(); void writeModuleHash(size_t BlockStartPos); unsigned getEncodedSyncScopeID(SyncScope::ID SSID) { return unsigned(SSID); } }; /// Class to manage the bitcode writing for a combined index. class IndexBitcodeWriter : public BitcodeWriterBase { /// The combined index to write to bitcode. const ModuleSummaryIndex &Index; /// When writing a subset of the index for distributed backends, client /// provides a map of modules to the corresponding GUIDs/summaries to write. const std::map *ModuleToSummariesForIndex; /// Map that holds the correspondence between the GUID used in the combined /// index and a value id generated by this class to use in references. std::map GUIDToValueIdMap; /// Tracks the last value id recorded in the GUIDToValueMap. unsigned GlobalValueId = 0; public: /// Constructs a IndexBitcodeWriter object for the given combined index, /// writing to the provided \p Buffer. When writing a subset of the index /// for a distributed backend, provide a \p ModuleToSummariesForIndex map. IndexBitcodeWriter(BitstreamWriter &Stream, StringTableBuilder &StrtabBuilder, const ModuleSummaryIndex &Index, const std::map *ModuleToSummariesForIndex = nullptr) : BitcodeWriterBase(Stream, StrtabBuilder), Index(Index), ModuleToSummariesForIndex(ModuleToSummariesForIndex) { // Assign unique value ids to all summaries to be written, for use // in writing out the call graph edges. Save the mapping from GUID // to the new global value id to use when writing those edges, which // are currently saved in the index in terms of GUID. forEachSummary([&](GVInfo I, bool) { GUIDToValueIdMap[I.first] = ++GlobalValueId; }); } /// The below iterator returns the GUID and associated summary. using GVInfo = std::pair; /// Calls the callback for each value GUID and summary to be written to /// bitcode. This hides the details of whether they are being pulled from the /// entire index or just those in a provided ModuleToSummariesForIndex map. template void forEachSummary(Functor Callback) { if (ModuleToSummariesForIndex) { for (auto &M : *ModuleToSummariesForIndex) for (auto &Summary : M.second) { Callback(Summary, false); // Ensure aliasee is handled, e.g. for assigning a valueId, // even if we are not importing the aliasee directly (the // imported alias will contain a copy of aliasee). if (auto *AS = dyn_cast(Summary.getSecond())) Callback({AS->getAliaseeGUID(), &AS->getAliasee()}, true); } } else { for (auto &Summaries : Index) for (auto &Summary : Summaries.second.SummaryList) Callback({Summaries.first, Summary.get()}, false); } } /// Calls the callback for each entry in the modulePaths StringMap that /// should be written to the module path string table. This hides the details /// of whether they are being pulled from the entire index or just those in a /// provided ModuleToSummariesForIndex map. template void forEachModule(Functor Callback) { if (ModuleToSummariesForIndex) { for (const auto &M : *ModuleToSummariesForIndex) { const auto &MPI = Index.modulePaths().find(M.first); if (MPI == Index.modulePaths().end()) { // This should only happen if the bitcode file was empty, in which // case we shouldn't be importing (the ModuleToSummariesForIndex // would only include the module we are writing and index for). assert(ModuleToSummariesForIndex->size() == 1); continue; } Callback(*MPI); } } else { for (const auto &MPSE : Index.modulePaths()) Callback(MPSE); } } /// Main entry point for writing a combined index to bitcode. void write(); private: void writeModStrings(); void writeCombinedGlobalValueSummary(); Optional getValueId(GlobalValue::GUID ValGUID) { auto VMI = GUIDToValueIdMap.find(ValGUID); if (VMI == GUIDToValueIdMap.end()) return None; return VMI->second; } std::map &valueIds() { return GUIDToValueIdMap; } }; } // end anonymous namespace static unsigned getEncodedCastOpcode(unsigned Opcode) { switch (Opcode) { default: llvm_unreachable("Unknown cast instruction!"); case Instruction::Trunc : return bitc::CAST_TRUNC; case Instruction::ZExt : return bitc::CAST_ZEXT; case Instruction::SExt : return bitc::CAST_SEXT; case Instruction::FPToUI : return bitc::CAST_FPTOUI; case Instruction::FPToSI : return bitc::CAST_FPTOSI; case Instruction::UIToFP : return bitc::CAST_UITOFP; case Instruction::SIToFP : return bitc::CAST_SITOFP; case Instruction::FPTrunc : return bitc::CAST_FPTRUNC; case Instruction::FPExt : return bitc::CAST_FPEXT; case Instruction::PtrToInt: return bitc::CAST_PTRTOINT; case Instruction::IntToPtr: return bitc::CAST_INTTOPTR; case Instruction::BitCast : return bitc::CAST_BITCAST; case Instruction::AddrSpaceCast: return bitc::CAST_ADDRSPACECAST; } } static unsigned getEncodedBinaryOpcode(unsigned Opcode) { switch (Opcode) { default: llvm_unreachable("Unknown binary instruction!"); case Instruction::Add: case Instruction::FAdd: return bitc::BINOP_ADD; case Instruction::Sub: case Instruction::FSub: return bitc::BINOP_SUB; case Instruction::Mul: case Instruction::FMul: return bitc::BINOP_MUL; case Instruction::UDiv: return bitc::BINOP_UDIV; case Instruction::FDiv: case Instruction::SDiv: return bitc::BINOP_SDIV; case Instruction::URem: return bitc::BINOP_UREM; case Instruction::FRem: case Instruction::SRem: return bitc::BINOP_SREM; case Instruction::Shl: return bitc::BINOP_SHL; case Instruction::LShr: return bitc::BINOP_LSHR; case Instruction::AShr: return bitc::BINOP_ASHR; case Instruction::And: return bitc::BINOP_AND; case Instruction::Or: return bitc::BINOP_OR; case Instruction::Xor: return bitc::BINOP_XOR; } } static unsigned getEncodedRMWOperation(AtomicRMWInst::BinOp Op) { switch (Op) { default: llvm_unreachable("Unknown RMW operation!"); case AtomicRMWInst::Xchg: return bitc::RMW_XCHG; case AtomicRMWInst::Add: return bitc::RMW_ADD; case AtomicRMWInst::Sub: return bitc::RMW_SUB; case AtomicRMWInst::And: return bitc::RMW_AND; case AtomicRMWInst::Nand: return bitc::RMW_NAND; case AtomicRMWInst::Or: return bitc::RMW_OR; case AtomicRMWInst::Xor: return bitc::RMW_XOR; case AtomicRMWInst::Max: return bitc::RMW_MAX; case AtomicRMWInst::Min: return bitc::RMW_MIN; case AtomicRMWInst::UMax: return bitc::RMW_UMAX; case AtomicRMWInst::UMin: return bitc::RMW_UMIN; } } static unsigned getEncodedOrdering(AtomicOrdering Ordering) { switch (Ordering) { case AtomicOrdering::NotAtomic: return bitc::ORDERING_NOTATOMIC; case AtomicOrdering::Unordered: return bitc::ORDERING_UNORDERED; case AtomicOrdering::Monotonic: return bitc::ORDERING_MONOTONIC; case AtomicOrdering::Acquire: return bitc::ORDERING_ACQUIRE; case AtomicOrdering::Release: return bitc::ORDERING_RELEASE; case AtomicOrdering::AcquireRelease: return bitc::ORDERING_ACQREL; case AtomicOrdering::SequentiallyConsistent: return bitc::ORDERING_SEQCST; } llvm_unreachable("Invalid ordering"); } static void writeStringRecord(BitstreamWriter &Stream, unsigned Code, StringRef Str, unsigned AbbrevToUse) { SmallVector Vals; // Code: [strchar x N] for (unsigned i = 0, e = Str.size(); i != e; ++i) { if (AbbrevToUse && !BitCodeAbbrevOp::isChar6(Str[i])) AbbrevToUse = 0; Vals.push_back(Str[i]); } // Emit the finished record. Stream.EmitRecord(Code, Vals, AbbrevToUse); } static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { switch (Kind) { case Attribute::Alignment: return bitc::ATTR_KIND_ALIGNMENT; case Attribute::AllocSize: return bitc::ATTR_KIND_ALLOC_SIZE; case Attribute::AlwaysInline: return bitc::ATTR_KIND_ALWAYS_INLINE; case Attribute::ArgMemOnly: return bitc::ATTR_KIND_ARGMEMONLY; case Attribute::Builtin: return bitc::ATTR_KIND_BUILTIN; case Attribute::ByVal: return bitc::ATTR_KIND_BY_VAL; case Attribute::Convergent: return bitc::ATTR_KIND_CONVERGENT; case Attribute::InAlloca: return bitc::ATTR_KIND_IN_ALLOCA; case Attribute::Cold: return bitc::ATTR_KIND_COLD; case Attribute::InaccessibleMemOnly: return bitc::ATTR_KIND_INACCESSIBLEMEM_ONLY; case Attribute::InaccessibleMemOrArgMemOnly: return bitc::ATTR_KIND_INACCESSIBLEMEM_OR_ARGMEMONLY; case Attribute::InlineHint: return bitc::ATTR_KIND_INLINE_HINT; case Attribute::InReg: return bitc::ATTR_KIND_IN_REG; case Attribute::JumpTable: return bitc::ATTR_KIND_JUMP_TABLE; case Attribute::MinSize: return bitc::ATTR_KIND_MIN_SIZE; case Attribute::Naked: return bitc::ATTR_KIND_NAKED; case Attribute::Nest: return bitc::ATTR_KIND_NEST; case Attribute::NoAlias: return bitc::ATTR_KIND_NO_ALIAS; case Attribute::NoBuiltin: return bitc::ATTR_KIND_NO_BUILTIN; case Attribute::NoCapture: return bitc::ATTR_KIND_NO_CAPTURE; case Attribute::NoDuplicate: return bitc::ATTR_KIND_NO_DUPLICATE; case Attribute::NoImplicitFloat: return bitc::ATTR_KIND_NO_IMPLICIT_FLOAT; case Attribute::NoInline: return bitc::ATTR_KIND_NO_INLINE; case Attribute::NoRecurse: return bitc::ATTR_KIND_NO_RECURSE; case Attribute::NonLazyBind: return bitc::ATTR_KIND_NON_LAZY_BIND; case Attribute::NonNull: return bitc::ATTR_KIND_NON_NULL; case Attribute::Dereferenceable: return bitc::ATTR_KIND_DEREFERENCEABLE; case Attribute::DereferenceableOrNull: return bitc::ATTR_KIND_DEREFERENCEABLE_OR_NULL; case Attribute::NoRedZone: return bitc::ATTR_KIND_NO_RED_ZONE; case Attribute::NoReturn: return bitc::ATTR_KIND_NO_RETURN; case Attribute::NoUnwind: return bitc::ATTR_KIND_NO_UNWIND; case Attribute::OptimizeForSize: return bitc::ATTR_KIND_OPTIMIZE_FOR_SIZE; case Attribute::OptimizeNone: return bitc::ATTR_KIND_OPTIMIZE_NONE; case Attribute::ReadNone: return bitc::ATTR_KIND_READ_NONE; case Attribute::ReadOnly: return bitc::ATTR_KIND_READ_ONLY; case Attribute::Returned: return bitc::ATTR_KIND_RETURNED; case Attribute::ReturnsTwice: return bitc::ATTR_KIND_RETURNS_TWICE; case Attribute::SExt: return bitc::ATTR_KIND_S_EXT; case Attribute::Speculatable: return bitc::ATTR_KIND_SPECULATABLE; case Attribute::StackAlignment: return bitc::ATTR_KIND_STACK_ALIGNMENT; case Attribute::StackProtect: return bitc::ATTR_KIND_STACK_PROTECT; case Attribute::StackProtectReq: return bitc::ATTR_KIND_STACK_PROTECT_REQ; case Attribute::StackProtectStrong: return bitc::ATTR_KIND_STACK_PROTECT_STRONG; case Attribute::SafeStack: return bitc::ATTR_KIND_SAFESTACK; case Attribute::StrictFP: return bitc::ATTR_KIND_STRICT_FP; case Attribute::StructRet: return bitc::ATTR_KIND_STRUCT_RET; case Attribute::SanitizeAddress: return bitc::ATTR_KIND_SANITIZE_ADDRESS; case Attribute::SanitizeHWAddress: return bitc::ATTR_KIND_SANITIZE_HWADDRESS; case Attribute::SanitizeThread: return bitc::ATTR_KIND_SANITIZE_THREAD; case Attribute::SanitizeMemory: return bitc::ATTR_KIND_SANITIZE_MEMORY; case Attribute::SwiftError: return bitc::ATTR_KIND_SWIFT_ERROR; case Attribute::SwiftSelf: return bitc::ATTR_KIND_SWIFT_SELF; case Attribute::UWTable: return bitc::ATTR_KIND_UW_TABLE; case Attribute::WriteOnly: return bitc::ATTR_KIND_WRITEONLY; case Attribute::ZExt: return bitc::ATTR_KIND_Z_EXT; case Attribute::EndAttrKinds: llvm_unreachable("Can not encode end-attribute kinds marker."); case Attribute::None: llvm_unreachable("Can not encode none-attribute."); } llvm_unreachable("Trying to encode unknown attribute"); } void ModuleBitcodeWriter::writeAttributeGroupTable() { const std::vector &AttrGrps = VE.getAttributeGroups(); if (AttrGrps.empty()) return; Stream.EnterSubblock(bitc::PARAMATTR_GROUP_BLOCK_ID, 3); SmallVector Record; for (ValueEnumerator::IndexAndAttrSet Pair : AttrGrps) { unsigned AttrListIndex = Pair.first; AttributeSet AS = Pair.second; Record.push_back(VE.getAttributeGroupID(Pair)); Record.push_back(AttrListIndex); for (Attribute Attr : AS) { if (Attr.isEnumAttribute()) { Record.push_back(0); Record.push_back(getAttrKindEncoding(Attr.getKindAsEnum())); } else if (Attr.isIntAttribute()) { Record.push_back(1); Record.push_back(getAttrKindEncoding(Attr.getKindAsEnum())); Record.push_back(Attr.getValueAsInt()); } else { StringRef Kind = Attr.getKindAsString(); StringRef Val = Attr.getValueAsString(); Record.push_back(Val.empty() ? 3 : 4); Record.append(Kind.begin(), Kind.end()); Record.push_back(0); if (!Val.empty()) { Record.append(Val.begin(), Val.end()); Record.push_back(0); } } } Stream.EmitRecord(bitc::PARAMATTR_GRP_CODE_ENTRY, Record); Record.clear(); } Stream.ExitBlock(); } void ModuleBitcodeWriter::writeAttributeTable() { const std::vector &Attrs = VE.getAttributeLists(); if (Attrs.empty()) return; Stream.EnterSubblock(bitc::PARAMATTR_BLOCK_ID, 3); SmallVector Record; for (unsigned i = 0, e = Attrs.size(); i != e; ++i) { AttributeList AL = Attrs[i]; for (unsigned i = AL.index_begin(), e = AL.index_end(); i != e; ++i) { AttributeSet AS = AL.getAttributes(i); if (AS.hasAttributes()) Record.push_back(VE.getAttributeGroupID({i, AS})); } Stream.EmitRecord(bitc::PARAMATTR_CODE_ENTRY, Record); Record.clear(); } Stream.ExitBlock(); } /// WriteTypeTable - Write out the type table for a module. void ModuleBitcodeWriter::writeTypeTable() { const ValueEnumerator::TypeList &TypeList = VE.getTypes(); Stream.EnterSubblock(bitc::TYPE_BLOCK_ID_NEW, 4 /*count from # abbrevs */); SmallVector TypeVals; uint64_t NumBits = VE.computeBitsRequiredForTypeIndicies(); // Abbrev for TYPE_CODE_POINTER. auto Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_POINTER)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits)); Abbv->Add(BitCodeAbbrevOp(0)); // Addrspace = 0 unsigned PtrAbbrev = Stream.EmitAbbrev(std::move(Abbv)); // Abbrev for TYPE_CODE_FUNCTION. Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_FUNCTION)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // isvararg Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits)); unsigned FunctionAbbrev = Stream.EmitAbbrev(std::move(Abbv)); // Abbrev for TYPE_CODE_STRUCT_ANON. Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_ANON)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // ispacked Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits)); unsigned StructAnonAbbrev = Stream.EmitAbbrev(std::move(Abbv)); // Abbrev for TYPE_CODE_STRUCT_NAME. Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_NAME)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); unsigned StructNameAbbrev = Stream.EmitAbbrev(std::move(Abbv)); // Abbrev for TYPE_CODE_STRUCT_NAMED. Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_NAMED)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // ispacked Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits)); unsigned StructNamedAbbrev = Stream.EmitAbbrev(std::move(Abbv)); // Abbrev for TYPE_CODE_ARRAY. Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_ARRAY)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // size Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits)); unsigned ArrayAbbrev = Stream.EmitAbbrev(std::move(Abbv)); // Emit an entry count so the reader can reserve space. TypeVals.push_back(TypeList.size()); Stream.EmitRecord(bitc::TYPE_CODE_NUMENTRY, TypeVals); TypeVals.clear(); // Loop over all of the types, emitting each in turn. for (unsigned i = 0, e = TypeList.size(); i != e; ++i) { Type *T = TypeList[i]; int AbbrevToUse = 0; unsigned Code = 0; switch (T->getTypeID()) { case Type::VoidTyID: Code = bitc::TYPE_CODE_VOID; break; case Type::HalfTyID: Code = bitc::TYPE_CODE_HALF; break; case Type::FloatTyID: Code = bitc::TYPE_CODE_FLOAT; break; case Type::DoubleTyID: Code = bitc::TYPE_CODE_DOUBLE; break; case Type::X86_FP80TyID: Code = bitc::TYPE_CODE_X86_FP80; break; case Type::FP128TyID: Code = bitc::TYPE_CODE_FP128; break; case Type::PPC_FP128TyID: Code = bitc::TYPE_CODE_PPC_FP128; break; case Type::LabelTyID: Code = bitc::TYPE_CODE_LABEL; break; case Type::MetadataTyID: Code = bitc::TYPE_CODE_METADATA; break; case Type::X86_MMXTyID: Code = bitc::TYPE_CODE_X86_MMX; break; case Type::TokenTyID: Code = bitc::TYPE_CODE_TOKEN; break; case Type::IntegerTyID: // INTEGER: [width] Code = bitc::TYPE_CODE_INTEGER; TypeVals.push_back(cast(T)->getBitWidth()); break; case Type::PointerTyID: { PointerType *PTy = cast(T); // POINTER: [pointee type, address space] Code = bitc::TYPE_CODE_POINTER; TypeVals.push_back(VE.getTypeID(PTy->getElementType())); unsigned AddressSpace = PTy->getAddressSpace(); TypeVals.push_back(AddressSpace); if (AddressSpace == 0) AbbrevToUse = PtrAbbrev; break; } case Type::FunctionTyID: { FunctionType *FT = cast(T); // FUNCTION: [isvararg, retty, paramty x N] Code = bitc::TYPE_CODE_FUNCTION; TypeVals.push_back(FT->isVarArg()); TypeVals.push_back(VE.getTypeID(FT->getReturnType())); for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) TypeVals.push_back(VE.getTypeID(FT->getParamType(i))); AbbrevToUse = FunctionAbbrev; break; } case Type::StructTyID: { StructType *ST = cast(T); // STRUCT: [ispacked, eltty x N] TypeVals.push_back(ST->isPacked()); // Output all of the element types. for (StructType::element_iterator I = ST->element_begin(), E = ST->element_end(); I != E; ++I) TypeVals.push_back(VE.getTypeID(*I)); if (ST->isLiteral()) { Code = bitc::TYPE_CODE_STRUCT_ANON; AbbrevToUse = StructAnonAbbrev; } else { if (ST->isOpaque()) { Code = bitc::TYPE_CODE_OPAQUE; } else { Code = bitc::TYPE_CODE_STRUCT_NAMED; AbbrevToUse = StructNamedAbbrev; } // Emit the name if it is present. if (!ST->getName().empty()) writeStringRecord(Stream, bitc::TYPE_CODE_STRUCT_NAME, ST->getName(), StructNameAbbrev); } break; } case Type::ArrayTyID: { ArrayType *AT = cast(T); // ARRAY: [numelts, eltty] Code = bitc::TYPE_CODE_ARRAY; TypeVals.push_back(AT->getNumElements()); TypeVals.push_back(VE.getTypeID(AT->getElementType())); AbbrevToUse = ArrayAbbrev; break; } case Type::VectorTyID: { VectorType *VT = cast(T); // VECTOR [numelts, eltty] Code = bitc::TYPE_CODE_VECTOR; TypeVals.push_back(VT->getNumElements()); TypeVals.push_back(VE.getTypeID(VT->getElementType())); break; } } // Emit the finished record. Stream.EmitRecord(Code, TypeVals, AbbrevToUse); TypeVals.clear(); } Stream.ExitBlock(); } static unsigned getEncodedLinkage(const GlobalValue::LinkageTypes Linkage) { switch (Linkage) { case GlobalValue::ExternalLinkage: return 0; case GlobalValue::WeakAnyLinkage: return 16; case GlobalValue::AppendingLinkage: return 2; case GlobalValue::InternalLinkage: return 3; case GlobalValue::LinkOnceAnyLinkage: return 18; case GlobalValue::ExternalWeakLinkage: return 7; case GlobalValue::CommonLinkage: return 8; case GlobalValue::PrivateLinkage: return 9; case GlobalValue::WeakODRLinkage: return 17; case GlobalValue::LinkOnceODRLinkage: return 19; case GlobalValue::AvailableExternallyLinkage: return 12; } llvm_unreachable("Invalid linkage"); } static unsigned getEncodedLinkage(const GlobalValue &GV) { return getEncodedLinkage(GV.getLinkage()); } static uint64_t getEncodedFFlags(FunctionSummary::FFlags Flags) { uint64_t RawFlags = 0; RawFlags |= Flags.ReadNone; RawFlags |= (Flags.ReadOnly << 1); RawFlags |= (Flags.NoRecurse << 2); RawFlags |= (Flags.ReturnDoesNotAlias << 3); return RawFlags; } // Decode the flags for GlobalValue in the summary static uint64_t getEncodedGVSummaryFlags(GlobalValueSummary::GVFlags Flags) { uint64_t RawFlags = 0; RawFlags |= Flags.NotEligibleToImport; // bool RawFlags |= (Flags.Live << 1); RawFlags |= (Flags.DSOLocal << 2); // Linkage don't need to be remapped at that time for the summary. Any future // change to the getEncodedLinkage() function will need to be taken into // account here as well. RawFlags = (RawFlags << 4) | Flags.Linkage; // 4 bits return RawFlags; } static unsigned getEncodedVisibility(const GlobalValue &GV) { switch (GV.getVisibility()) { case GlobalValue::DefaultVisibility: return 0; case GlobalValue::HiddenVisibility: return 1; case GlobalValue::ProtectedVisibility: return 2; } llvm_unreachable("Invalid visibility"); } static unsigned getEncodedDLLStorageClass(const GlobalValue &GV) { switch (GV.getDLLStorageClass()) { case GlobalValue::DefaultStorageClass: return 0; case GlobalValue::DLLImportStorageClass: return 1; case GlobalValue::DLLExportStorageClass: return 2; } llvm_unreachable("Invalid DLL storage class"); } static unsigned getEncodedThreadLocalMode(const GlobalValue &GV) { switch (GV.getThreadLocalMode()) { case GlobalVariable::NotThreadLocal: return 0; case GlobalVariable::GeneralDynamicTLSModel: return 1; case GlobalVariable::LocalDynamicTLSModel: return 2; case GlobalVariable::InitialExecTLSModel: return 3; case GlobalVariable::LocalExecTLSModel: return 4; } llvm_unreachable("Invalid TLS model"); } static unsigned getEncodedComdatSelectionKind(const Comdat &C) { switch (C.getSelectionKind()) { case Comdat::Any: return bitc::COMDAT_SELECTION_KIND_ANY; case Comdat::ExactMatch: return bitc::COMDAT_SELECTION_KIND_EXACT_MATCH; case Comdat::Largest: return bitc::COMDAT_SELECTION_KIND_LARGEST; case Comdat::NoDuplicates: return bitc::COMDAT_SELECTION_KIND_NO_DUPLICATES; case Comdat::SameSize: return bitc::COMDAT_SELECTION_KIND_SAME_SIZE; } llvm_unreachable("Invalid selection kind"); } static unsigned getEncodedUnnamedAddr(const GlobalValue &GV) { switch (GV.getUnnamedAddr()) { case GlobalValue::UnnamedAddr::None: return 0; case GlobalValue::UnnamedAddr::Local: return 2; case GlobalValue::UnnamedAddr::Global: return 1; } llvm_unreachable("Invalid unnamed_addr"); } size_t ModuleBitcodeWriter::addToStrtab(StringRef Str) { if (GenerateHash) Hasher.update(Str); return StrtabBuilder.add(Str); } void ModuleBitcodeWriter::writeComdats() { SmallVector Vals; for (const Comdat *C : VE.getComdats()) { // COMDAT: [strtab offset, strtab size, selection_kind] Vals.push_back(addToStrtab(C->getName())); Vals.push_back(C->getName().size()); Vals.push_back(getEncodedComdatSelectionKind(*C)); Stream.EmitRecord(bitc::MODULE_CODE_COMDAT, Vals, /*AbbrevToUse=*/0); Vals.clear(); } } /// Write a record that will eventually hold the word offset of the /// module-level VST. For now the offset is 0, which will be backpatched /// after the real VST is written. Saves the bit offset to backpatch. void ModuleBitcodeWriter::writeValueSymbolTableForwardDecl() { // Write a placeholder value in for the offset of the real VST, // which is written after the function blocks so that it can include // the offset of each function. The placeholder offset will be // updated when the real VST is written. auto Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_VSTOFFSET)); // Blocks are 32-bit aligned, so we can use a 32-bit word offset to // hold the real VST offset. Must use fixed instead of VBR as we don't // know how many VBR chunks to reserve ahead of time. Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); unsigned VSTOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbv)); // Emit the placeholder uint64_t Vals[] = {bitc::MODULE_CODE_VSTOFFSET, 0}; Stream.EmitRecordWithAbbrev(VSTOffsetAbbrev, Vals); // Compute and save the bit offset to the placeholder, which will be // patched when the real VST is written. We can simply subtract the 32-bit // fixed size from the current bit number to get the location to backpatch. VSTOffsetPlaceholder = Stream.GetCurrentBitNo() - 32; } enum StringEncoding { SE_Char6, SE_Fixed7, SE_Fixed8 }; /// Determine the encoding to use for the given string name and length. static StringEncoding getStringEncoding(StringRef Str) { bool isChar6 = true; for (char C : Str) { if (isChar6) isChar6 = BitCodeAbbrevOp::isChar6(C); if ((unsigned char)C & 128) // don't bother scanning the rest. return SE_Fixed8; } if (isChar6) return SE_Char6; return SE_Fixed7; } /// Emit top-level description of module, including target triple, inline asm, /// descriptors for global variables, and function prototype info. /// Returns the bit offset to backpatch with the location of the real VST. void ModuleBitcodeWriter::writeModuleInfo() { // Emit various pieces of data attached to a module. if (!M.getTargetTriple().empty()) writeStringRecord(Stream, bitc::MODULE_CODE_TRIPLE, M.getTargetTriple(), 0 /*TODO*/); const std::string &DL = M.getDataLayoutStr(); if (!DL.empty()) writeStringRecord(Stream, bitc::MODULE_CODE_DATALAYOUT, DL, 0 /*TODO*/); if (!M.getModuleInlineAsm().empty()) writeStringRecord(Stream, bitc::MODULE_CODE_ASM, M.getModuleInlineAsm(), 0 /*TODO*/); // Emit information about sections and GC, computing how many there are. Also // compute the maximum alignment value. std::map SectionMap; std::map GCMap; unsigned MaxAlignment = 0; unsigned MaxGlobalType = 0; for (const GlobalValue &GV : M.globals()) { MaxAlignment = std::max(MaxAlignment, GV.getAlignment()); MaxGlobalType = std::max(MaxGlobalType, VE.getTypeID(GV.getValueType())); if (GV.hasSection()) { // Give section names unique ID's. unsigned &Entry = SectionMap[GV.getSection()]; if (!Entry) { writeStringRecord(Stream, bitc::MODULE_CODE_SECTIONNAME, GV.getSection(), 0 /*TODO*/); Entry = SectionMap.size(); } } } for (const Function &F : M) { MaxAlignment = std::max(MaxAlignment, F.getAlignment()); if (F.hasSection()) { // Give section names unique ID's. unsigned &Entry = SectionMap[F.getSection()]; if (!Entry) { writeStringRecord(Stream, bitc::MODULE_CODE_SECTIONNAME, F.getSection(), 0 /*TODO*/); Entry = SectionMap.size(); } } if (F.hasGC()) { // Same for GC names. unsigned &Entry = GCMap[F.getGC()]; if (!Entry) { writeStringRecord(Stream, bitc::MODULE_CODE_GCNAME, F.getGC(), 0 /*TODO*/); Entry = GCMap.size(); } } } // Emit abbrev for globals, now that we know # sections and max alignment. unsigned SimpleGVarAbbrev = 0; if (!M.global_empty()) { // Add an abbrev for common globals with no visibility or thread localness. auto Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_GLOBALVAR)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, Log2_32_Ceil(MaxGlobalType+1))); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // AddrSpace << 2 //| explicitType << 1 //| constant Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Initializer. Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 5)); // Linkage. if (MaxAlignment == 0) // Alignment. Abbv->Add(BitCodeAbbrevOp(0)); else { unsigned MaxEncAlignment = Log2_32(MaxAlignment)+1; Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, Log2_32_Ceil(MaxEncAlignment+1))); } if (SectionMap.empty()) // Section. Abbv->Add(BitCodeAbbrevOp(0)); else Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, Log2_32_Ceil(SectionMap.size()+1))); // Don't bother emitting vis + thread local. SimpleGVarAbbrev = Stream.EmitAbbrev(std::move(Abbv)); } SmallVector Vals; // Emit the module's source file name. { StringEncoding Bits = getStringEncoding(M.getSourceFileName()); BitCodeAbbrevOp AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8); if (Bits == SE_Char6) AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Char6); else if (Bits == SE_Fixed7) AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7); // MODULE_CODE_SOURCE_FILENAME: [namechar x N] auto Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_SOURCE_FILENAME)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(AbbrevOpToUse); unsigned FilenameAbbrev = Stream.EmitAbbrev(std::move(Abbv)); for (const auto P : M.getSourceFileName()) Vals.push_back((unsigned char)P); // Emit the finished record. Stream.EmitRecord(bitc::MODULE_CODE_SOURCE_FILENAME, Vals, FilenameAbbrev); Vals.clear(); } // Emit the global variable information. for (const GlobalVariable &GV : M.globals()) { unsigned AbbrevToUse = 0; // GLOBALVAR: [strtab offset, strtab size, type, isconst, initid, // linkage, alignment, section, visibility, threadlocal, // unnamed_addr, externally_initialized, dllstorageclass, // comdat, attributes, DSO_Local] Vals.push_back(addToStrtab(GV.getName())); Vals.push_back(GV.getName().size()); Vals.push_back(VE.getTypeID(GV.getValueType())); Vals.push_back(GV.getType()->getAddressSpace() << 2 | 2 | GV.isConstant()); Vals.push_back(GV.isDeclaration() ? 0 : (VE.getValueID(GV.getInitializer()) + 1)); Vals.push_back(getEncodedLinkage(GV)); Vals.push_back(Log2_32(GV.getAlignment())+1); Vals.push_back(GV.hasSection() ? SectionMap[GV.getSection()] : 0); if (GV.isThreadLocal() || GV.getVisibility() != GlobalValue::DefaultVisibility || GV.getUnnamedAddr() != GlobalValue::UnnamedAddr::None || GV.isExternallyInitialized() || GV.getDLLStorageClass() != GlobalValue::DefaultStorageClass || GV.hasComdat() || GV.hasAttributes() || GV.isDSOLocal()) { Vals.push_back(getEncodedVisibility(GV)); Vals.push_back(getEncodedThreadLocalMode(GV)); Vals.push_back(getEncodedUnnamedAddr(GV)); Vals.push_back(GV.isExternallyInitialized()); Vals.push_back(getEncodedDLLStorageClass(GV)); Vals.push_back(GV.hasComdat() ? VE.getComdatID(GV.getComdat()) : 0); auto AL = GV.getAttributesAsList(AttributeList::FunctionIndex); Vals.push_back(VE.getAttributeListID(AL)); Vals.push_back(GV.isDSOLocal()); } else { AbbrevToUse = SimpleGVarAbbrev; } Stream.EmitRecord(bitc::MODULE_CODE_GLOBALVAR, Vals, AbbrevToUse); Vals.clear(); } // Emit the function proto information. for (const Function &F : M) { // FUNCTION: [strtab offset, strtab size, type, callingconv, isproto, // linkage, paramattrs, alignment, section, visibility, gc, // unnamed_addr, prologuedata, dllstorageclass, comdat, // prefixdata, personalityfn, DSO_Local] Vals.push_back(addToStrtab(F.getName())); Vals.push_back(F.getName().size()); Vals.push_back(VE.getTypeID(F.getFunctionType())); Vals.push_back(F.getCallingConv()); Vals.push_back(F.isDeclaration()); Vals.push_back(getEncodedLinkage(F)); Vals.push_back(VE.getAttributeListID(F.getAttributes())); Vals.push_back(Log2_32(F.getAlignment())+1); Vals.push_back(F.hasSection() ? SectionMap[F.getSection()] : 0); Vals.push_back(getEncodedVisibility(F)); Vals.push_back(F.hasGC() ? GCMap[F.getGC()] : 0); Vals.push_back(getEncodedUnnamedAddr(F)); Vals.push_back(F.hasPrologueData() ? (VE.getValueID(F.getPrologueData()) + 1) : 0); Vals.push_back(getEncodedDLLStorageClass(F)); Vals.push_back(F.hasComdat() ? VE.getComdatID(F.getComdat()) : 0); Vals.push_back(F.hasPrefixData() ? (VE.getValueID(F.getPrefixData()) + 1) : 0); Vals.push_back( F.hasPersonalityFn() ? (VE.getValueID(F.getPersonalityFn()) + 1) : 0); Vals.push_back(F.isDSOLocal()); unsigned AbbrevToUse = 0; Stream.EmitRecord(bitc::MODULE_CODE_FUNCTION, Vals, AbbrevToUse); Vals.clear(); } // Emit the alias information. for (const GlobalAlias &A : M.aliases()) { // ALIAS: [strtab offset, strtab size, alias type, aliasee val#, linkage, // visibility, dllstorageclass, threadlocal, unnamed_addr, // DSO_Local] Vals.push_back(addToStrtab(A.getName())); Vals.push_back(A.getName().size()); Vals.push_back(VE.getTypeID(A.getValueType())); Vals.push_back(A.getType()->getAddressSpace()); Vals.push_back(VE.getValueID(A.getAliasee())); Vals.push_back(getEncodedLinkage(A)); Vals.push_back(getEncodedVisibility(A)); Vals.push_back(getEncodedDLLStorageClass(A)); Vals.push_back(getEncodedThreadLocalMode(A)); Vals.push_back(getEncodedUnnamedAddr(A)); Vals.push_back(A.isDSOLocal()); unsigned AbbrevToUse = 0; Stream.EmitRecord(bitc::MODULE_CODE_ALIAS, Vals, AbbrevToUse); Vals.clear(); } // Emit the ifunc information. for (const GlobalIFunc &I : M.ifuncs()) { // IFUNC: [strtab offset, strtab size, ifunc type, address space, resolver // val#, linkage, visibility] Vals.push_back(addToStrtab(I.getName())); Vals.push_back(I.getName().size()); Vals.push_back(VE.getTypeID(I.getValueType())); Vals.push_back(I.getType()->getAddressSpace()); Vals.push_back(VE.getValueID(I.getResolver())); Vals.push_back(getEncodedLinkage(I)); Vals.push_back(getEncodedVisibility(I)); Stream.EmitRecord(bitc::MODULE_CODE_IFUNC, Vals); Vals.clear(); } writeValueSymbolTableForwardDecl(); } static uint64_t getOptimizationFlags(const Value *V) { uint64_t Flags = 0; if (const auto *OBO = dyn_cast(V)) { if (OBO->hasNoSignedWrap()) Flags |= 1 << bitc::OBO_NO_SIGNED_WRAP; if (OBO->hasNoUnsignedWrap()) Flags |= 1 << bitc::OBO_NO_UNSIGNED_WRAP; } else if (const auto *PEO = dyn_cast(V)) { if (PEO->isExact()) Flags |= 1 << bitc::PEO_EXACT; } else if (const auto *FPMO = dyn_cast(V)) { if (FPMO->hasAllowReassoc()) - Flags |= FastMathFlags::AllowReassoc; + Flags |= bitc::AllowReassoc; if (FPMO->hasNoNaNs()) - Flags |= FastMathFlags::NoNaNs; + Flags |= bitc::NoNaNs; if (FPMO->hasNoInfs()) - Flags |= FastMathFlags::NoInfs; + Flags |= bitc::NoInfs; if (FPMO->hasNoSignedZeros()) - Flags |= FastMathFlags::NoSignedZeros; + Flags |= bitc::NoSignedZeros; if (FPMO->hasAllowReciprocal()) - Flags |= FastMathFlags::AllowReciprocal; + Flags |= bitc::AllowReciprocal; if (FPMO->hasAllowContract()) - Flags |= FastMathFlags::AllowContract; + Flags |= bitc::AllowContract; if (FPMO->hasApproxFunc()) - Flags |= FastMathFlags::ApproxFunc; + Flags |= bitc::ApproxFunc; } return Flags; } void ModuleBitcodeWriter::writeValueAsMetadata( const ValueAsMetadata *MD, SmallVectorImpl &Record) { // Mimic an MDNode with a value as one operand. Value *V = MD->getValue(); Record.push_back(VE.getTypeID(V->getType())); Record.push_back(VE.getValueID(V)); Stream.EmitRecord(bitc::METADATA_VALUE, Record, 0); Record.clear(); } void ModuleBitcodeWriter::writeMDTuple(const MDTuple *N, SmallVectorImpl &Record, unsigned Abbrev) { for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { Metadata *MD = N->getOperand(i); assert(!(MD && isa(MD)) && "Unexpected function-local metadata"); Record.push_back(VE.getMetadataOrNullID(MD)); } Stream.EmitRecord(N->isDistinct() ? bitc::METADATA_DISTINCT_NODE : bitc::METADATA_NODE, Record, Abbrev); Record.clear(); } unsigned ModuleBitcodeWriter::createDILocationAbbrev() { // Assume the column is usually under 128, and always output the inlined-at // location (it's never more expensive than building an array size 1). auto Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_LOCATION)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); return Stream.EmitAbbrev(std::move(Abbv)); } void ModuleBitcodeWriter::writeDILocation(const DILocation *N, SmallVectorImpl &Record, unsigned &Abbrev) { if (!Abbrev) Abbrev = createDILocationAbbrev(); Record.push_back(N->isDistinct()); Record.push_back(N->getLine()); Record.push_back(N->getColumn()); Record.push_back(VE.getMetadataID(N->getScope())); Record.push_back(VE.getMetadataOrNullID(N->getInlinedAt())); Stream.EmitRecord(bitc::METADATA_LOCATION, Record, Abbrev); Record.clear(); } unsigned ModuleBitcodeWriter::createGenericDINodeAbbrev() { // Assume the column is usually under 128, and always output the inlined-at // location (it's never more expensive than building an array size 1). auto Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_GENERIC_DEBUG)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); return Stream.EmitAbbrev(std::move(Abbv)); } void ModuleBitcodeWriter::writeGenericDINode(const GenericDINode *N, SmallVectorImpl &Record, unsigned &Abbrev) { if (!Abbrev) Abbrev = createGenericDINodeAbbrev(); Record.push_back(N->isDistinct()); Record.push_back(N->getTag()); Record.push_back(0); // Per-tag version field; unused for now. for (auto &I : N->operands()) Record.push_back(VE.getMetadataOrNullID(I)); Stream.EmitRecord(bitc::METADATA_GENERIC_DEBUG, Record, Abbrev); Record.clear(); } static uint64_t rotateSign(int64_t I) { uint64_t U = I; return I < 0 ? ~(U << 1) : U << 1; } void ModuleBitcodeWriter::writeDISubrange(const DISubrange *N, SmallVectorImpl &Record, unsigned Abbrev) { Record.push_back(N->isDistinct()); Record.push_back(N->getCount()); Record.push_back(rotateSign(N->getLowerBound())); Stream.EmitRecord(bitc::METADATA_SUBRANGE, Record, Abbrev); Record.clear(); } void ModuleBitcodeWriter::writeDIEnumerator(const DIEnumerator *N, SmallVectorImpl &Record, unsigned Abbrev) { Record.push_back(N->isDistinct()); Record.push_back(rotateSign(N->getValue())); Record.push_back(VE.getMetadataOrNullID(N->getRawName())); Stream.EmitRecord(bitc::METADATA_ENUMERATOR, Record, Abbrev); Record.clear(); } void ModuleBitcodeWriter::writeDIBasicType(const DIBasicType *N, SmallVectorImpl &Record, unsigned Abbrev) { Record.push_back(N->isDistinct()); Record.push_back(N->getTag()); Record.push_back(VE.getMetadataOrNullID(N->getRawName())); Record.push_back(N->getSizeInBits()); Record.push_back(N->getAlignInBits()); Record.push_back(N->getEncoding()); Stream.EmitRecord(bitc::METADATA_BASIC_TYPE, Record, Abbrev); Record.clear(); } void ModuleBitcodeWriter::writeDIDerivedType(const DIDerivedType *N, SmallVectorImpl &Record, unsigned Abbrev) { Record.push_back(N->isDistinct()); Record.push_back(N->getTag()); Record.push_back(VE.getMetadataOrNullID(N->getRawName())); Record.push_back(VE.getMetadataOrNullID(N->getFile())); Record.push_back(N->getLine()); Record.push_back(VE.getMetadataOrNullID(N->getScope())); Record.push_back(VE.getMetadataOrNullID(N->getBaseType())); Record.push_back(N->getSizeInBits()); Record.push_back(N->getAlignInBits()); Record.push_back(N->getOffsetInBits()); Record.push_back(N->getFlags()); Record.push_back(VE.getMetadataOrNullID(N->getExtraData())); // DWARF address space is encoded as N->getDWARFAddressSpace() + 1. 0 means // that there is no DWARF address space associated with DIDerivedType. if (const auto &DWARFAddressSpace = N->getDWARFAddressSpace()) Record.push_back(*DWARFAddressSpace + 1); else Record.push_back(0); Stream.EmitRecord(bitc::METADATA_DERIVED_TYPE, Record, Abbrev); Record.clear(); } void ModuleBitcodeWriter::writeDICompositeType( const DICompositeType *N, SmallVectorImpl &Record, unsigned Abbrev) { const unsigned IsNotUsedInOldTypeRef = 0x2; Record.push_back(IsNotUsedInOldTypeRef | (unsigned)N->isDistinct()); Record.push_back(N->getTag()); Record.push_back(VE.getMetadataOrNullID(N->getRawName())); Record.push_back(VE.getMetadataOrNullID(N->getFile())); Record.push_back(N->getLine()); Record.push_back(VE.getMetadataOrNullID(N->getScope())); Record.push_back(VE.getMetadataOrNullID(N->getBaseType())); Record.push_back(N->getSizeInBits()); Record.push_back(N->getAlignInBits()); Record.push_back(N->getOffsetInBits()); Record.push_back(N->getFlags()); Record.push_back(VE.getMetadataOrNullID(N->getElements().get())); Record.push_back(N->getRuntimeLang()); Record.push_back(VE.getMetadataOrNullID(N->getVTableHolder())); Record.push_back(VE.getMetadataOrNullID(N->getTemplateParams().get())); Record.push_back(VE.getMetadataOrNullID(N->getRawIdentifier())); Stream.EmitRecord(bitc::METADATA_COMPOSITE_TYPE, Record, Abbrev); Record.clear(); } void ModuleBitcodeWriter::writeDISubroutineType( const DISubroutineType *N, SmallVectorImpl &Record, unsigned Abbrev) { const unsigned HasNoOldTypeRefs = 0x2; Record.push_back(HasNoOldTypeRefs | (unsigned)N->isDistinct()); Record.push_back(N->getFlags()); Record.push_back(VE.getMetadataOrNullID(N->getTypeArray().get())); Record.push_back(N->getCC()); Stream.EmitRecord(bitc::METADATA_SUBROUTINE_TYPE, Record, Abbrev); Record.clear(); } void ModuleBitcodeWriter::writeDIFile(const DIFile *N, SmallVectorImpl &Record, unsigned Abbrev) { Record.push_back(N->isDistinct()); Record.push_back(VE.getMetadataOrNullID(N->getRawFilename())); Record.push_back(VE.getMetadataOrNullID(N->getRawDirectory())); Record.push_back(N->getChecksumKind()); Record.push_back(VE.getMetadataOrNullID(N->getRawChecksum())); Stream.EmitRecord(bitc::METADATA_FILE, Record, Abbrev); Record.clear(); } void ModuleBitcodeWriter::writeDICompileUnit(const DICompileUnit *N, SmallVectorImpl &Record, unsigned Abbrev) { assert(N->isDistinct() && "Expected distinct compile units"); Record.push_back(/* IsDistinct */ true); Record.push_back(N->getSourceLanguage()); Record.push_back(VE.getMetadataOrNullID(N->getFile())); Record.push_back(VE.getMetadataOrNullID(N->getRawProducer())); Record.push_back(N->isOptimized()); Record.push_back(VE.getMetadataOrNullID(N->getRawFlags())); Record.push_back(N->getRuntimeVersion()); Record.push_back(VE.getMetadataOrNullID(N->getRawSplitDebugFilename())); Record.push_back(N->getEmissionKind()); Record.push_back(VE.getMetadataOrNullID(N->getEnumTypes().get())); Record.push_back(VE.getMetadataOrNullID(N->getRetainedTypes().get())); Record.push_back(/* subprograms */ 0); Record.push_back(VE.getMetadataOrNullID(N->getGlobalVariables().get())); Record.push_back(VE.getMetadataOrNullID(N->getImportedEntities().get())); Record.push_back(N->getDWOId()); Record.push_back(VE.getMetadataOrNullID(N->getMacros().get())); Record.push_back(N->getSplitDebugInlining()); Record.push_back(N->getDebugInfoForProfiling()); Record.push_back(N->getGnuPubnames()); Stream.EmitRecord(bitc::METADATA_COMPILE_UNIT, Record, Abbrev); Record.clear(); } void ModuleBitcodeWriter::writeDISubprogram(const DISubprogram *N, SmallVectorImpl &Record, unsigned Abbrev) { uint64_t HasUnitFlag = 1 << 1; Record.push_back(N->isDistinct() | HasUnitFlag); Record.push_back(VE.getMetadataOrNullID(N->getScope())); Record.push_back(VE.getMetadataOrNullID(N->getRawName())); Record.push_back(VE.getMetadataOrNullID(N->getRawLinkageName())); Record.push_back(VE.getMetadataOrNullID(N->getFile())); Record.push_back(N->getLine()); Record.push_back(VE.getMetadataOrNullID(N->getType())); Record.push_back(N->isLocalToUnit()); Record.push_back(N->isDefinition()); Record.push_back(N->getScopeLine()); Record.push_back(VE.getMetadataOrNullID(N->getContainingType())); Record.push_back(N->getVirtuality()); Record.push_back(N->getVirtualIndex()); Record.push_back(N->getFlags()); Record.push_back(N->isOptimized()); Record.push_back(VE.getMetadataOrNullID(N->getRawUnit())); Record.push_back(VE.getMetadataOrNullID(N->getTemplateParams().get())); Record.push_back(VE.getMetadataOrNullID(N->getDeclaration())); Record.push_back(VE.getMetadataOrNullID(N->getVariables().get())); Record.push_back(N->getThisAdjustment()); Record.push_back(VE.getMetadataOrNullID(N->getThrownTypes().get())); Stream.EmitRecord(bitc::METADATA_SUBPROGRAM, Record, Abbrev); Record.clear(); } void ModuleBitcodeWriter::writeDILexicalBlock(const DILexicalBlock *N, SmallVectorImpl &Record, unsigned Abbrev) { Record.push_back(N->isDistinct()); Record.push_back(VE.getMetadataOrNullID(N->getScope())); Record.push_back(VE.getMetadataOrNullID(N->getFile())); Record.push_back(N->getLine()); Record.push_back(N->getColumn()); Stream.EmitRecord(bitc::METADATA_LEXICAL_BLOCK, Record, Abbrev); Record.clear(); } void ModuleBitcodeWriter::writeDILexicalBlockFile( const DILexicalBlockFile *N, SmallVectorImpl &Record, unsigned Abbrev) { Record.push_back(N->isDistinct()); Record.push_back(VE.getMetadataOrNullID(N->getScope())); Record.push_back(VE.getMetadataOrNullID(N->getFile())); Record.push_back(N->getDiscriminator()); Stream.EmitRecord(bitc::METADATA_LEXICAL_BLOCK_FILE, Record, Abbrev); Record.clear(); } void ModuleBitcodeWriter::writeDINamespace(const DINamespace *N, SmallVectorImpl &Record, unsigned Abbrev) { Record.push_back(N->isDistinct() | N->getExportSymbols() << 1); Record.push_back(VE.getMetadataOrNullID(N->getScope())); Record.push_back(VE.getMetadataOrNullID(N->getRawName())); Stream.EmitRecord(bitc::METADATA_NAMESPACE, Record, Abbrev); Record.clear(); } void ModuleBitcodeWriter::writeDIMacro(const DIMacro *N, SmallVectorImpl &Record, unsigned Abbrev) { Record.push_back(N->isDistinct()); Record.push_back(N->getMacinfoType()); Record.push_back(N->getLine()); Record.push_back(VE.getMetadataOrNullID(N->getRawName())); Record.push_back(VE.getMetadataOrNullID(N->getRawValue())); Stream.EmitRecord(bitc::METADATA_MACRO, Record, Abbrev); Record.clear(); } void ModuleBitcodeWriter::writeDIMacroFile(const DIMacroFile *N, SmallVectorImpl &Record, unsigned Abbrev) { Record.push_back(N->isDistinct()); Record.push_back(N->getMacinfoType()); Record.push_back(N->getLine()); Record.push_back(VE.getMetadataOrNullID(N->getFile())); Record.push_back(VE.getMetadataOrNullID(N->getElements().get())); Stream.EmitRecord(bitc::METADATA_MACRO_FILE, Record, Abbrev); Record.clear(); } void ModuleBitcodeWriter::writeDIModule(const DIModule *N, SmallVectorImpl &Record, unsigned Abbrev) { Record.push_back(N->isDistinct()); for (auto &I : N->operands()) Record.push_back(VE.getMetadataOrNullID(I)); Stream.EmitRecord(bitc::METADATA_MODULE, Record, Abbrev); Record.clear(); } void ModuleBitcodeWriter::writeDITemplateTypeParameter( const DITemplateTypeParameter *N, SmallVectorImpl &Record, unsigned Abbrev) { Record.push_back(N->isDistinct()); Record.push_back(VE.getMetadataOrNullID(N->getRawName())); Record.push_back(VE.getMetadataOrNullID(N->getType())); Stream.EmitRecord(bitc::METADATA_TEMPLATE_TYPE, Record, Abbrev); Record.clear(); } void ModuleBitcodeWriter::writeDITemplateValueParameter( const DITemplateValueParameter *N, SmallVectorImpl &Record, unsigned Abbrev) { Record.push_back(N->isDistinct()); Record.push_back(N->getTag()); Record.push_back(VE.getMetadataOrNullID(N->getRawName())); Record.push_back(VE.getMetadataOrNullID(N->getType())); Record.push_back(VE.getMetadataOrNullID(N->getValue())); Stream.EmitRecord(bitc::METADATA_TEMPLATE_VALUE, Record, Abbrev); Record.clear(); } void ModuleBitcodeWriter::writeDIGlobalVariable( const DIGlobalVariable *N, SmallVectorImpl &Record, unsigned Abbrev) { const uint64_t Version = 1 << 1; Record.push_back((uint64_t)N->isDistinct() | Version); Record.push_back(VE.getMetadataOrNullID(N->getScope())); Record.push_back(VE.getMetadataOrNullID(N->getRawName())); Record.push_back(VE.getMetadataOrNullID(N->getRawLinkageName())); Record.push_back(VE.getMetadataOrNullID(N->getFile())); Record.push_back(N->getLine()); Record.push_back(VE.getMetadataOrNullID(N->getType())); Record.push_back(N->isLocalToUnit()); Record.push_back(N->isDefinition()); Record.push_back(/* expr */ 0); Record.push_back(VE.getMetadataOrNullID(N->getStaticDataMemberDeclaration())); Record.push_back(N->getAlignInBits()); Stream.EmitRecord(bitc::METADATA_GLOBAL_VAR, Record, Abbrev); Record.clear(); } void ModuleBitcodeWriter::writeDILocalVariable( const DILocalVariable *N, SmallVectorImpl &Record, unsigned Abbrev) { // In order to support all possible bitcode formats in BitcodeReader we need // to distinguish the following cases: // 1) Record has no artificial tag (Record[1]), // has no obsolete inlinedAt field (Record[9]). // In this case Record size will be 8, HasAlignment flag is false. // 2) Record has artificial tag (Record[1]), // has no obsolete inlignedAt field (Record[9]). // In this case Record size will be 9, HasAlignment flag is false. // 3) Record has both artificial tag (Record[1]) and // obsolete inlignedAt field (Record[9]). // In this case Record size will be 10, HasAlignment flag is false. // 4) Record has neither artificial tag, nor inlignedAt field, but // HasAlignment flag is true and Record[8] contains alignment value. const uint64_t HasAlignmentFlag = 1 << 1; Record.push_back((uint64_t)N->isDistinct() | HasAlignmentFlag); Record.push_back(VE.getMetadataOrNullID(N->getScope())); Record.push_back(VE.getMetadataOrNullID(N->getRawName())); Record.push_back(VE.getMetadataOrNullID(N->getFile())); Record.push_back(N->getLine()); Record.push_back(VE.getMetadataOrNullID(N->getType())); Record.push_back(N->getArg()); Record.push_back(N->getFlags()); Record.push_back(N->getAlignInBits()); Stream.EmitRecord(bitc::METADATA_LOCAL_VAR, Record, Abbrev); Record.clear(); } void ModuleBitcodeWriter::writeDIExpression(const DIExpression *N, SmallVectorImpl &Record, unsigned Abbrev) { Record.reserve(N->getElements().size() + 1); const uint64_t Version = 3 << 1; Record.push_back((uint64_t)N->isDistinct() | Version); Record.append(N->elements_begin(), N->elements_end()); Stream.EmitRecord(bitc::METADATA_EXPRESSION, Record, Abbrev); Record.clear(); } void ModuleBitcodeWriter::writeDIGlobalVariableExpression( const DIGlobalVariableExpression *N, SmallVectorImpl &Record, unsigned Abbrev) { Record.push_back(N->isDistinct()); Record.push_back(VE.getMetadataOrNullID(N->getVariable())); Record.push_back(VE.getMetadataOrNullID(N->getExpression())); Stream.EmitRecord(bitc::METADATA_GLOBAL_VAR_EXPR, Record, Abbrev); Record.clear(); } void ModuleBitcodeWriter::writeDIObjCProperty(const DIObjCProperty *N, SmallVectorImpl &Record, unsigned Abbrev) { Record.push_back(N->isDistinct()); Record.push_back(VE.getMetadataOrNullID(N->getRawName())); Record.push_back(VE.getMetadataOrNullID(N->getFile())); Record.push_back(N->getLine()); Record.push_back(VE.getMetadataOrNullID(N->getRawSetterName())); Record.push_back(VE.getMetadataOrNullID(N->getRawGetterName())); Record.push_back(N->getAttributes()); Record.push_back(VE.getMetadataOrNullID(N->getType())); Stream.EmitRecord(bitc::METADATA_OBJC_PROPERTY, Record, Abbrev); Record.clear(); } void ModuleBitcodeWriter::writeDIImportedEntity( const DIImportedEntity *N, SmallVectorImpl &Record, unsigned Abbrev) { Record.push_back(N->isDistinct()); Record.push_back(N->getTag()); Record.push_back(VE.getMetadataOrNullID(N->getScope())); Record.push_back(VE.getMetadataOrNullID(N->getEntity())); Record.push_back(N->getLine()); Record.push_back(VE.getMetadataOrNullID(N->getRawName())); Record.push_back(VE.getMetadataOrNullID(N->getRawFile())); Stream.EmitRecord(bitc::METADATA_IMPORTED_ENTITY, Record, Abbrev); Record.clear(); } unsigned ModuleBitcodeWriter::createNamedMetadataAbbrev() { auto Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_NAME)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); return Stream.EmitAbbrev(std::move(Abbv)); } void ModuleBitcodeWriter::writeNamedMetadata( SmallVectorImpl &Record) { if (M.named_metadata_empty()) return; unsigned Abbrev = createNamedMetadataAbbrev(); for (const NamedMDNode &NMD : M.named_metadata()) { // Write name. StringRef Str = NMD.getName(); Record.append(Str.bytes_begin(), Str.bytes_end()); Stream.EmitRecord(bitc::METADATA_NAME, Record, Abbrev); Record.clear(); // Write named metadata operands. for (const MDNode *N : NMD.operands()) Record.push_back(VE.getMetadataID(N)); Stream.EmitRecord(bitc::METADATA_NAMED_NODE, Record, 0); Record.clear(); } } unsigned ModuleBitcodeWriter::createMetadataStringsAbbrev() { auto Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_STRINGS)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // # of strings Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // offset to chars Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); return Stream.EmitAbbrev(std::move(Abbv)); } /// Write out a record for MDString. /// /// All the metadata strings in a metadata block are emitted in a single /// record. The sizes and strings themselves are shoved into a blob. void ModuleBitcodeWriter::writeMetadataStrings( ArrayRef Strings, SmallVectorImpl &Record) { if (Strings.empty()) return; // Start the record with the number of strings. Record.push_back(bitc::METADATA_STRINGS); Record.push_back(Strings.size()); // Emit the sizes of the strings in the blob. SmallString<256> Blob; { BitstreamWriter W(Blob); for (const Metadata *MD : Strings) W.EmitVBR(cast(MD)->getLength(), 6); W.FlushToWord(); } // Add the offset to the strings to the record. Record.push_back(Blob.size()); // Add the strings to the blob. for (const Metadata *MD : Strings) Blob.append(cast(MD)->getString()); // Emit the final record. Stream.EmitRecordWithBlob(createMetadataStringsAbbrev(), Record, Blob); Record.clear(); } // Generates an enum to use as an index in the Abbrev array of Metadata record. enum MetadataAbbrev : unsigned { #define HANDLE_MDNODE_LEAF(CLASS) CLASS##AbbrevID, #include "llvm/IR/Metadata.def" LastPlusOne }; void ModuleBitcodeWriter::writeMetadataRecords( ArrayRef MDs, SmallVectorImpl &Record, std::vector *MDAbbrevs, std::vector *IndexPos) { if (MDs.empty()) return; // Initialize MDNode abbreviations. #define HANDLE_MDNODE_LEAF(CLASS) unsigned CLASS##Abbrev = 0; #include "llvm/IR/Metadata.def" for (const Metadata *MD : MDs) { if (IndexPos) IndexPos->push_back(Stream.GetCurrentBitNo()); if (const MDNode *N = dyn_cast(MD)) { assert(N->isResolved() && "Expected forward references to be resolved"); switch (N->getMetadataID()) { default: llvm_unreachable("Invalid MDNode subclass"); #define HANDLE_MDNODE_LEAF(CLASS) \ case Metadata::CLASS##Kind: \ if (MDAbbrevs) \ write##CLASS(cast(N), Record, \ (*MDAbbrevs)[MetadataAbbrev::CLASS##AbbrevID]); \ else \ write##CLASS(cast(N), Record, CLASS##Abbrev); \ continue; #include "llvm/IR/Metadata.def" } } writeValueAsMetadata(cast(MD), Record); } } void ModuleBitcodeWriter::writeModuleMetadata() { if (!VE.hasMDs() && M.named_metadata_empty()) return; Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 4); SmallVector Record; // Emit all abbrevs upfront, so that the reader can jump in the middle of the // block and load any metadata. std::vector MDAbbrevs; MDAbbrevs.resize(MetadataAbbrev::LastPlusOne); MDAbbrevs[MetadataAbbrev::DILocationAbbrevID] = createDILocationAbbrev(); MDAbbrevs[MetadataAbbrev::GenericDINodeAbbrevID] = createGenericDINodeAbbrev(); auto Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_INDEX_OFFSET)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); unsigned OffsetAbbrev = Stream.EmitAbbrev(std::move(Abbv)); Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_INDEX)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); unsigned IndexAbbrev = Stream.EmitAbbrev(std::move(Abbv)); // Emit MDStrings together upfront. writeMetadataStrings(VE.getMDStrings(), Record); // We only emit an index for the metadata record if we have more than a given // (naive) threshold of metadatas, otherwise it is not worth it. if (VE.getNonMDStrings().size() > IndexThreshold) { // Write a placeholder value in for the offset of the metadata index, // which is written after the records, so that it can include // the offset of each entry. The placeholder offset will be // updated after all records are emitted. uint64_t Vals[] = {0, 0}; Stream.EmitRecord(bitc::METADATA_INDEX_OFFSET, Vals, OffsetAbbrev); } // Compute and save the bit offset to the current position, which will be // patched when we emit the index later. We can simply subtract the 64-bit // fixed size from the current bit number to get the location to backpatch. uint64_t IndexOffsetRecordBitPos = Stream.GetCurrentBitNo(); // This index will contain the bitpos for each individual record. std::vector IndexPos; IndexPos.reserve(VE.getNonMDStrings().size()); // Write all the records writeMetadataRecords(VE.getNonMDStrings(), Record, &MDAbbrevs, &IndexPos); if (VE.getNonMDStrings().size() > IndexThreshold) { // Now that we have emitted all the records we will emit the index. But // first // backpatch the forward reference so that the reader can skip the records // efficiently. Stream.BackpatchWord64(IndexOffsetRecordBitPos - 64, Stream.GetCurrentBitNo() - IndexOffsetRecordBitPos); // Delta encode the index. uint64_t PreviousValue = IndexOffsetRecordBitPos; for (auto &Elt : IndexPos) { auto EltDelta = Elt - PreviousValue; PreviousValue = Elt; Elt = EltDelta; } // Emit the index record. Stream.EmitRecord(bitc::METADATA_INDEX, IndexPos, IndexAbbrev); IndexPos.clear(); } // Write the named metadata now. writeNamedMetadata(Record); auto AddDeclAttachedMetadata = [&](const GlobalObject &GO) { SmallVector Record; Record.push_back(VE.getValueID(&GO)); pushGlobalMetadataAttachment(Record, GO); Stream.EmitRecord(bitc::METADATA_GLOBAL_DECL_ATTACHMENT, Record); }; for (const Function &F : M) if (F.isDeclaration() && F.hasMetadata()) AddDeclAttachedMetadata(F); // FIXME: Only store metadata for declarations here, and move data for global // variable definitions to a separate block (PR28134). for (const GlobalVariable &GV : M.globals()) if (GV.hasMetadata()) AddDeclAttachedMetadata(GV); Stream.ExitBlock(); } void ModuleBitcodeWriter::writeFunctionMetadata(const Function &F) { if (!VE.hasMDs()) return; Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3); SmallVector Record; writeMetadataStrings(VE.getMDStrings(), Record); writeMetadataRecords(VE.getNonMDStrings(), Record); Stream.ExitBlock(); } void ModuleBitcodeWriter::pushGlobalMetadataAttachment( SmallVectorImpl &Record, const GlobalObject &GO) { // [n x [id, mdnode]] SmallVector, 4> MDs; GO.getAllMetadata(MDs); for (const auto &I : MDs) { Record.push_back(I.first); Record.push_back(VE.getMetadataID(I.second)); } } void ModuleBitcodeWriter::writeFunctionMetadataAttachment(const Function &F) { Stream.EnterSubblock(bitc::METADATA_ATTACHMENT_ID, 3); SmallVector Record; if (F.hasMetadata()) { pushGlobalMetadataAttachment(Record, F); Stream.EmitRecord(bitc::METADATA_ATTACHMENT, Record, 0); Record.clear(); } // Write metadata attachments // METADATA_ATTACHMENT - [m x [value, [n x [id, mdnode]]] SmallVector, 4> MDs; for (const BasicBlock &BB : F) for (const Instruction &I : BB) { MDs.clear(); I.getAllMetadataOtherThanDebugLoc(MDs); // If no metadata, ignore instruction. if (MDs.empty()) continue; Record.push_back(VE.getInstructionID(&I)); for (unsigned i = 0, e = MDs.size(); i != e; ++i) { Record.push_back(MDs[i].first); Record.push_back(VE.getMetadataID(MDs[i].second)); } Stream.EmitRecord(bitc::METADATA_ATTACHMENT, Record, 0); Record.clear(); } Stream.ExitBlock(); } void ModuleBitcodeWriter::writeModuleMetadataKinds() { SmallVector Record; // Write metadata kinds // METADATA_KIND - [n x [id, name]] SmallVector Names; M.getMDKindNames(Names); if (Names.empty()) return; Stream.EnterSubblock(bitc::METADATA_KIND_BLOCK_ID, 3); for (unsigned MDKindID = 0, e = Names.size(); MDKindID != e; ++MDKindID) { Record.push_back(MDKindID); StringRef KName = Names[MDKindID]; Record.append(KName.begin(), KName.end()); Stream.EmitRecord(bitc::METADATA_KIND, Record, 0); Record.clear(); } Stream.ExitBlock(); } void ModuleBitcodeWriter::writeOperandBundleTags() { // Write metadata kinds // // OPERAND_BUNDLE_TAGS_BLOCK_ID : N x OPERAND_BUNDLE_TAG // // OPERAND_BUNDLE_TAG - [strchr x N] SmallVector Tags; M.getOperandBundleTags(Tags); if (Tags.empty()) return; Stream.EnterSubblock(bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID, 3); SmallVector Record; for (auto Tag : Tags) { Record.append(Tag.begin(), Tag.end()); Stream.EmitRecord(bitc::OPERAND_BUNDLE_TAG, Record, 0); Record.clear(); } Stream.ExitBlock(); } void ModuleBitcodeWriter::writeSyncScopeNames() { SmallVector SSNs; M.getContext().getSyncScopeNames(SSNs); if (SSNs.empty()) return; Stream.EnterSubblock(bitc::SYNC_SCOPE_NAMES_BLOCK_ID, 2); SmallVector Record; for (auto SSN : SSNs) { Record.append(SSN.begin(), SSN.end()); Stream.EmitRecord(bitc::SYNC_SCOPE_NAME, Record, 0); Record.clear(); } Stream.ExitBlock(); } static void emitSignedInt64(SmallVectorImpl &Vals, uint64_t V) { if ((int64_t)V >= 0) Vals.push_back(V << 1); else Vals.push_back((-V << 1) | 1); } void ModuleBitcodeWriter::writeConstants(unsigned FirstVal, unsigned LastVal, bool isGlobal) { if (FirstVal == LastVal) return; Stream.EnterSubblock(bitc::CONSTANTS_BLOCK_ID, 4); unsigned AggregateAbbrev = 0; unsigned String8Abbrev = 0; unsigned CString7Abbrev = 0; unsigned CString6Abbrev = 0; // If this is a constant pool for the module, emit module-specific abbrevs. if (isGlobal) { // Abbrev for CST_CODE_AGGREGATE. auto Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_AGGREGATE)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, Log2_32_Ceil(LastVal+1))); AggregateAbbrev = Stream.EmitAbbrev(std::move(Abbv)); // Abbrev for CST_CODE_STRING. Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_STRING)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); String8Abbrev = Stream.EmitAbbrev(std::move(Abbv)); // Abbrev for CST_CODE_CSTRING. Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CSTRING)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); CString7Abbrev = Stream.EmitAbbrev(std::move(Abbv)); // Abbrev for CST_CODE_CSTRING. Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CSTRING)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); CString6Abbrev = Stream.EmitAbbrev(std::move(Abbv)); } SmallVector Record; const ValueEnumerator::ValueList &Vals = VE.getValues(); Type *LastTy = nullptr; for (unsigned i = FirstVal; i != LastVal; ++i) { const Value *V = Vals[i].first; // If we need to switch types, do so now. if (V->getType() != LastTy) { LastTy = V->getType(); Record.push_back(VE.getTypeID(LastTy)); Stream.EmitRecord(bitc::CST_CODE_SETTYPE, Record, CONSTANTS_SETTYPE_ABBREV); Record.clear(); } if (const InlineAsm *IA = dyn_cast(V)) { Record.push_back(unsigned(IA->hasSideEffects()) | unsigned(IA->isAlignStack()) << 1 | unsigned(IA->getDialect()&1) << 2); // Add the asm string. const std::string &AsmStr = IA->getAsmString(); Record.push_back(AsmStr.size()); Record.append(AsmStr.begin(), AsmStr.end()); // Add the constraint string. const std::string &ConstraintStr = IA->getConstraintString(); Record.push_back(ConstraintStr.size()); Record.append(ConstraintStr.begin(), ConstraintStr.end()); Stream.EmitRecord(bitc::CST_CODE_INLINEASM, Record); Record.clear(); continue; } const Constant *C = cast(V); unsigned Code = -1U; unsigned AbbrevToUse = 0; if (C->isNullValue()) { Code = bitc::CST_CODE_NULL; } else if (isa(C)) { Code = bitc::CST_CODE_UNDEF; } else if (const ConstantInt *IV = dyn_cast(C)) { if (IV->getBitWidth() <= 64) { uint64_t V = IV->getSExtValue(); emitSignedInt64(Record, V); Code = bitc::CST_CODE_INTEGER; AbbrevToUse = CONSTANTS_INTEGER_ABBREV; } else { // Wide integers, > 64 bits in size. // We have an arbitrary precision integer value to write whose // bit width is > 64. However, in canonical unsigned integer // format it is likely that the high bits are going to be zero. // So, we only write the number of active words. unsigned NWords = IV->getValue().getActiveWords(); const uint64_t *RawWords = IV->getValue().getRawData(); for (unsigned i = 0; i != NWords; ++i) { emitSignedInt64(Record, RawWords[i]); } Code = bitc::CST_CODE_WIDE_INTEGER; } } else if (const ConstantFP *CFP = dyn_cast(C)) { Code = bitc::CST_CODE_FLOAT; Type *Ty = CFP->getType(); if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy()) { Record.push_back(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); } else if (Ty->isX86_FP80Ty()) { // api needed to prevent premature destruction // bits are not in the same order as a normal i80 APInt, compensate. APInt api = CFP->getValueAPF().bitcastToAPInt(); const uint64_t *p = api.getRawData(); Record.push_back((p[1] << 48) | (p[0] >> 16)); Record.push_back(p[0] & 0xffffLL); } else if (Ty->isFP128Ty() || Ty->isPPC_FP128Ty()) { APInt api = CFP->getValueAPF().bitcastToAPInt(); const uint64_t *p = api.getRawData(); Record.push_back(p[0]); Record.push_back(p[1]); } else { assert(0 && "Unknown FP type!"); } } else if (isa(C) && cast(C)->isString()) { const ConstantDataSequential *Str = cast(C); // Emit constant strings specially. unsigned NumElts = Str->getNumElements(); // If this is a null-terminated string, use the denser CSTRING encoding. if (Str->isCString()) { Code = bitc::CST_CODE_CSTRING; --NumElts; // Don't encode the null, which isn't allowed by char6. } else { Code = bitc::CST_CODE_STRING; AbbrevToUse = String8Abbrev; } bool isCStr7 = Code == bitc::CST_CODE_CSTRING; bool isCStrChar6 = Code == bitc::CST_CODE_CSTRING; for (unsigned i = 0; i != NumElts; ++i) { unsigned char V = Str->getElementAsInteger(i); Record.push_back(V); isCStr7 &= (V & 128) == 0; if (isCStrChar6) isCStrChar6 = BitCodeAbbrevOp::isChar6(V); } if (isCStrChar6) AbbrevToUse = CString6Abbrev; else if (isCStr7) AbbrevToUse = CString7Abbrev; } else if (const ConstantDataSequential *CDS = dyn_cast(C)) { Code = bitc::CST_CODE_DATA; Type *EltTy = CDS->getType()->getElementType(); if (isa(EltTy)) { for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) Record.push_back(CDS->getElementAsInteger(i)); } else { for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) Record.push_back( CDS->getElementAsAPFloat(i).bitcastToAPInt().getLimitedValue()); } } else if (isa(C)) { Code = bitc::CST_CODE_AGGREGATE; for (const Value *Op : C->operands()) Record.push_back(VE.getValueID(Op)); AbbrevToUse = AggregateAbbrev; } else if (const ConstantExpr *CE = dyn_cast(C)) { switch (CE->getOpcode()) { default: if (Instruction::isCast(CE->getOpcode())) { Code = bitc::CST_CODE_CE_CAST; Record.push_back(getEncodedCastOpcode(CE->getOpcode())); Record.push_back(VE.getTypeID(C->getOperand(0)->getType())); Record.push_back(VE.getValueID(C->getOperand(0))); AbbrevToUse = CONSTANTS_CE_CAST_Abbrev; } else { assert(CE->getNumOperands() == 2 && "Unknown constant expr!"); Code = bitc::CST_CODE_CE_BINOP; Record.push_back(getEncodedBinaryOpcode(CE->getOpcode())); Record.push_back(VE.getValueID(C->getOperand(0))); Record.push_back(VE.getValueID(C->getOperand(1))); uint64_t Flags = getOptimizationFlags(CE); if (Flags != 0) Record.push_back(Flags); } break; case Instruction::GetElementPtr: { Code = bitc::CST_CODE_CE_GEP; const auto *GO = cast(C); Record.push_back(VE.getTypeID(GO->getSourceElementType())); if (Optional Idx = GO->getInRangeIndex()) { Code = bitc::CST_CODE_CE_GEP_WITH_INRANGE_INDEX; Record.push_back((*Idx << 1) | GO->isInBounds()); } else if (GO->isInBounds()) Code = bitc::CST_CODE_CE_INBOUNDS_GEP; for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i) { Record.push_back(VE.getTypeID(C->getOperand(i)->getType())); Record.push_back(VE.getValueID(C->getOperand(i))); } break; } case Instruction::Select: Code = bitc::CST_CODE_CE_SELECT; Record.push_back(VE.getValueID(C->getOperand(0))); Record.push_back(VE.getValueID(C->getOperand(1))); Record.push_back(VE.getValueID(C->getOperand(2))); break; case Instruction::ExtractElement: Code = bitc::CST_CODE_CE_EXTRACTELT; Record.push_back(VE.getTypeID(C->getOperand(0)->getType())); Record.push_back(VE.getValueID(C->getOperand(0))); Record.push_back(VE.getTypeID(C->getOperand(1)->getType())); Record.push_back(VE.getValueID(C->getOperand(1))); break; case Instruction::InsertElement: Code = bitc::CST_CODE_CE_INSERTELT; Record.push_back(VE.getValueID(C->getOperand(0))); Record.push_back(VE.getValueID(C->getOperand(1))); Record.push_back(VE.getTypeID(C->getOperand(2)->getType())); Record.push_back(VE.getValueID(C->getOperand(2))); break; case Instruction::ShuffleVector: // If the return type and argument types are the same, this is a // standard shufflevector instruction. If the types are different, // then the shuffle is widening or truncating the input vectors, and // the argument type must also be encoded. if (C->getType() == C->getOperand(0)->getType()) { Code = bitc::CST_CODE_CE_SHUFFLEVEC; } else { Code = bitc::CST_CODE_CE_SHUFVEC_EX; Record.push_back(VE.getTypeID(C->getOperand(0)->getType())); } Record.push_back(VE.getValueID(C->getOperand(0))); Record.push_back(VE.getValueID(C->getOperand(1))); Record.push_back(VE.getValueID(C->getOperand(2))); break; case Instruction::ICmp: case Instruction::FCmp: Code = bitc::CST_CODE_CE_CMP; Record.push_back(VE.getTypeID(C->getOperand(0)->getType())); Record.push_back(VE.getValueID(C->getOperand(0))); Record.push_back(VE.getValueID(C->getOperand(1))); Record.push_back(CE->getPredicate()); break; } } else if (const BlockAddress *BA = dyn_cast(C)) { Code = bitc::CST_CODE_BLOCKADDRESS; Record.push_back(VE.getTypeID(BA->getFunction()->getType())); Record.push_back(VE.getValueID(BA->getFunction())); Record.push_back(VE.getGlobalBasicBlockID(BA->getBasicBlock())); } else { #ifndef NDEBUG C->dump(); #endif llvm_unreachable("Unknown constant!"); } Stream.EmitRecord(Code, Record, AbbrevToUse); Record.clear(); } Stream.ExitBlock(); } void ModuleBitcodeWriter::writeModuleConstants() { const ValueEnumerator::ValueList &Vals = VE.getValues(); // Find the first constant to emit, which is the first non-globalvalue value. // We know globalvalues have been emitted by WriteModuleInfo. for (unsigned i = 0, e = Vals.size(); i != e; ++i) { if (!isa(Vals[i].first)) { writeConstants(i, Vals.size(), true); return; } } } /// pushValueAndType - The file has to encode both the value and type id for /// many values, because we need to know what type to create for forward /// references. However, most operands are not forward references, so this type /// field is not needed. /// /// This function adds V's value ID to Vals. If the value ID is higher than the /// instruction ID, then it is a forward reference, and it also includes the /// type ID. The value ID that is written is encoded relative to the InstID. bool ModuleBitcodeWriter::pushValueAndType(const Value *V, unsigned InstID, SmallVectorImpl &Vals) { unsigned ValID = VE.getValueID(V); // Make encoding relative to the InstID. Vals.push_back(InstID - ValID); if (ValID >= InstID) { Vals.push_back(VE.getTypeID(V->getType())); return true; } return false; } void ModuleBitcodeWriter::writeOperandBundles(ImmutableCallSite CS, unsigned InstID) { SmallVector Record; LLVMContext &C = CS.getInstruction()->getContext(); for (unsigned i = 0, e = CS.getNumOperandBundles(); i != e; ++i) { const auto &Bundle = CS.getOperandBundleAt(i); Record.push_back(C.getOperandBundleTagID(Bundle.getTagName())); for (auto &Input : Bundle.Inputs) pushValueAndType(Input, InstID, Record); Stream.EmitRecord(bitc::FUNC_CODE_OPERAND_BUNDLE, Record); Record.clear(); } } /// pushValue - Like pushValueAndType, but where the type of the value is /// omitted (perhaps it was already encoded in an earlier operand). void ModuleBitcodeWriter::pushValue(const Value *V, unsigned InstID, SmallVectorImpl &Vals) { unsigned ValID = VE.getValueID(V); Vals.push_back(InstID - ValID); } void ModuleBitcodeWriter::pushValueSigned(const Value *V, unsigned InstID, SmallVectorImpl &Vals) { unsigned ValID = VE.getValueID(V); int64_t diff = ((int32_t)InstID - (int32_t)ValID); emitSignedInt64(Vals, diff); } /// WriteInstruction - Emit an instruction to the specified stream. void ModuleBitcodeWriter::writeInstruction(const Instruction &I, unsigned InstID, SmallVectorImpl &Vals) { unsigned Code = 0; unsigned AbbrevToUse = 0; VE.setInstructionID(&I); switch (I.getOpcode()) { default: if (Instruction::isCast(I.getOpcode())) { Code = bitc::FUNC_CODE_INST_CAST; if (!pushValueAndType(I.getOperand(0), InstID, Vals)) AbbrevToUse = FUNCTION_INST_CAST_ABBREV; Vals.push_back(VE.getTypeID(I.getType())); Vals.push_back(getEncodedCastOpcode(I.getOpcode())); } else { assert(isa(I) && "Unknown instruction!"); Code = bitc::FUNC_CODE_INST_BINOP; if (!pushValueAndType(I.getOperand(0), InstID, Vals)) AbbrevToUse = FUNCTION_INST_BINOP_ABBREV; pushValue(I.getOperand(1), InstID, Vals); Vals.push_back(getEncodedBinaryOpcode(I.getOpcode())); uint64_t Flags = getOptimizationFlags(&I); if (Flags != 0) { if (AbbrevToUse == FUNCTION_INST_BINOP_ABBREV) AbbrevToUse = FUNCTION_INST_BINOP_FLAGS_ABBREV; Vals.push_back(Flags); } } break; case Instruction::GetElementPtr: { Code = bitc::FUNC_CODE_INST_GEP; AbbrevToUse = FUNCTION_INST_GEP_ABBREV; auto &GEPInst = cast(I); Vals.push_back(GEPInst.isInBounds()); Vals.push_back(VE.getTypeID(GEPInst.getSourceElementType())); for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) pushValueAndType(I.getOperand(i), InstID, Vals); break; } case Instruction::ExtractValue: { Code = bitc::FUNC_CODE_INST_EXTRACTVAL; pushValueAndType(I.getOperand(0), InstID, Vals); const ExtractValueInst *EVI = cast(&I); Vals.append(EVI->idx_begin(), EVI->idx_end()); break; } case Instruction::InsertValue: { Code = bitc::FUNC_CODE_INST_INSERTVAL; pushValueAndType(I.getOperand(0), InstID, Vals); pushValueAndType(I.getOperand(1), InstID, Vals); const InsertValueInst *IVI = cast(&I); Vals.append(IVI->idx_begin(), IVI->idx_end()); break; } case Instruction::Select: Code = bitc::FUNC_CODE_INST_VSELECT; pushValueAndType(I.getOperand(1), InstID, Vals); pushValue(I.getOperand(2), InstID, Vals); pushValueAndType(I.getOperand(0), InstID, Vals); break; case Instruction::ExtractElement: Code = bitc::FUNC_CODE_INST_EXTRACTELT; pushValueAndType(I.getOperand(0), InstID, Vals); pushValueAndType(I.getOperand(1), InstID, Vals); break; case Instruction::InsertElement: Code = bitc::FUNC_CODE_INST_INSERTELT; pushValueAndType(I.getOperand(0), InstID, Vals); pushValue(I.getOperand(1), InstID, Vals); pushValueAndType(I.getOperand(2), InstID, Vals); break; case Instruction::ShuffleVector: Code = bitc::FUNC_CODE_INST_SHUFFLEVEC; pushValueAndType(I.getOperand(0), InstID, Vals); pushValue(I.getOperand(1), InstID, Vals); pushValue(I.getOperand(2), InstID, Vals); break; case Instruction::ICmp: case Instruction::FCmp: { // compare returning Int1Ty or vector of Int1Ty Code = bitc::FUNC_CODE_INST_CMP2; pushValueAndType(I.getOperand(0), InstID, Vals); pushValue(I.getOperand(1), InstID, Vals); Vals.push_back(cast(I).getPredicate()); uint64_t Flags = getOptimizationFlags(&I); if (Flags != 0) Vals.push_back(Flags); break; } case Instruction::Ret: { Code = bitc::FUNC_CODE_INST_RET; unsigned NumOperands = I.getNumOperands(); if (NumOperands == 0) AbbrevToUse = FUNCTION_INST_RET_VOID_ABBREV; else if (NumOperands == 1) { if (!pushValueAndType(I.getOperand(0), InstID, Vals)) AbbrevToUse = FUNCTION_INST_RET_VAL_ABBREV; } else { for (unsigned i = 0, e = NumOperands; i != e; ++i) pushValueAndType(I.getOperand(i), InstID, Vals); } } break; case Instruction::Br: { Code = bitc::FUNC_CODE_INST_BR; const BranchInst &II = cast(I); Vals.push_back(VE.getValueID(II.getSuccessor(0))); if (II.isConditional()) { Vals.push_back(VE.getValueID(II.getSuccessor(1))); pushValue(II.getCondition(), InstID, Vals); } } break; case Instruction::Switch: { Code = bitc::FUNC_CODE_INST_SWITCH; const SwitchInst &SI = cast(I); Vals.push_back(VE.getTypeID(SI.getCondition()->getType())); pushValue(SI.getCondition(), InstID, Vals); Vals.push_back(VE.getValueID(SI.getDefaultDest())); for (auto Case : SI.cases()) { Vals.push_back(VE.getValueID(Case.getCaseValue())); Vals.push_back(VE.getValueID(Case.getCaseSuccessor())); } } break; case Instruction::IndirectBr: Code = bitc::FUNC_CODE_INST_INDIRECTBR; Vals.push_back(VE.getTypeID(I.getOperand(0)->getType())); // Encode the address operand as relative, but not the basic blocks. pushValue(I.getOperand(0), InstID, Vals); for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) Vals.push_back(VE.getValueID(I.getOperand(i))); break; case Instruction::Invoke: { const InvokeInst *II = cast(&I); const Value *Callee = II->getCalledValue(); FunctionType *FTy = II->getFunctionType(); if (II->hasOperandBundles()) writeOperandBundles(II, InstID); Code = bitc::FUNC_CODE_INST_INVOKE; Vals.push_back(VE.getAttributeListID(II->getAttributes())); Vals.push_back(II->getCallingConv() | 1 << 13); Vals.push_back(VE.getValueID(II->getNormalDest())); Vals.push_back(VE.getValueID(II->getUnwindDest())); Vals.push_back(VE.getTypeID(FTy)); pushValueAndType(Callee, InstID, Vals); // Emit value #'s for the fixed parameters. for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) pushValue(I.getOperand(i), InstID, Vals); // fixed param. // Emit type/value pairs for varargs params. if (FTy->isVarArg()) { for (unsigned i = FTy->getNumParams(), e = II->getNumArgOperands(); i != e; ++i) pushValueAndType(I.getOperand(i), InstID, Vals); // vararg } break; } case Instruction::Resume: Code = bitc::FUNC_CODE_INST_RESUME; pushValueAndType(I.getOperand(0), InstID, Vals); break; case Instruction::CleanupRet: { Code = bitc::FUNC_CODE_INST_CLEANUPRET; const auto &CRI = cast(I); pushValue(CRI.getCleanupPad(), InstID, Vals); if (CRI.hasUnwindDest()) Vals.push_back(VE.getValueID(CRI.getUnwindDest())); break; } case Instruction::CatchRet: { Code = bitc::FUNC_CODE_INST_CATCHRET; const auto &CRI = cast(I); pushValue(CRI.getCatchPad(), InstID, Vals); Vals.push_back(VE.getValueID(CRI.getSuccessor())); break; } case Instruction::CleanupPad: case Instruction::CatchPad: { const auto &FuncletPad = cast(I); Code = isa(FuncletPad) ? bitc::FUNC_CODE_INST_CATCHPAD : bitc::FUNC_CODE_INST_CLEANUPPAD; pushValue(FuncletPad.getParentPad(), InstID, Vals); unsigned NumArgOperands = FuncletPad.getNumArgOperands(); Vals.push_back(NumArgOperands); for (unsigned Op = 0; Op != NumArgOperands; ++Op) pushValueAndType(FuncletPad.getArgOperand(Op), InstID, Vals); break; } case Instruction::CatchSwitch: { Code = bitc::FUNC_CODE_INST_CATCHSWITCH; const auto &CatchSwitch = cast(I); pushValue(CatchSwitch.getParentPad(), InstID, Vals); unsigned NumHandlers = CatchSwitch.getNumHandlers(); Vals.push_back(NumHandlers); for (const BasicBlock *CatchPadBB : CatchSwitch.handlers()) Vals.push_back(VE.getValueID(CatchPadBB)); if (CatchSwitch.hasUnwindDest()) Vals.push_back(VE.getValueID(CatchSwitch.getUnwindDest())); break; } case Instruction::Unreachable: Code = bitc::FUNC_CODE_INST_UNREACHABLE; AbbrevToUse = FUNCTION_INST_UNREACHABLE_ABBREV; break; case Instruction::PHI: { const PHINode &PN = cast(I); Code = bitc::FUNC_CODE_INST_PHI; // With the newer instruction encoding, forward references could give // negative valued IDs. This is most common for PHIs, so we use // signed VBRs. SmallVector Vals64; Vals64.push_back(VE.getTypeID(PN.getType())); for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { pushValueSigned(PN.getIncomingValue(i), InstID, Vals64); Vals64.push_back(VE.getValueID(PN.getIncomingBlock(i))); } // Emit a Vals64 vector and exit. Stream.EmitRecord(Code, Vals64, AbbrevToUse); Vals64.clear(); return; } case Instruction::LandingPad: { const LandingPadInst &LP = cast(I); Code = bitc::FUNC_CODE_INST_LANDINGPAD; Vals.push_back(VE.getTypeID(LP.getType())); Vals.push_back(LP.isCleanup()); Vals.push_back(LP.getNumClauses()); for (unsigned I = 0, E = LP.getNumClauses(); I != E; ++I) { if (LP.isCatch(I)) Vals.push_back(LandingPadInst::Catch); else Vals.push_back(LandingPadInst::Filter); pushValueAndType(LP.getClause(I), InstID, Vals); } break; } case Instruction::Alloca: { Code = bitc::FUNC_CODE_INST_ALLOCA; const AllocaInst &AI = cast(I); Vals.push_back(VE.getTypeID(AI.getAllocatedType())); Vals.push_back(VE.getTypeID(I.getOperand(0)->getType())); Vals.push_back(VE.getValueID(I.getOperand(0))); // size. unsigned AlignRecord = Log2_32(AI.getAlignment()) + 1; assert(Log2_32(Value::MaximumAlignment) + 1 < 1 << 5 && "not enough bits for maximum alignment"); assert(AlignRecord < 1 << 5 && "alignment greater than 1 << 64"); AlignRecord |= AI.isUsedWithInAlloca() << 5; AlignRecord |= 1 << 6; AlignRecord |= AI.isSwiftError() << 7; Vals.push_back(AlignRecord); break; } case Instruction::Load: if (cast(I).isAtomic()) { Code = bitc::FUNC_CODE_INST_LOADATOMIC; pushValueAndType(I.getOperand(0), InstID, Vals); } else { Code = bitc::FUNC_CODE_INST_LOAD; if (!pushValueAndType(I.getOperand(0), InstID, Vals)) // ptr AbbrevToUse = FUNCTION_INST_LOAD_ABBREV; } Vals.push_back(VE.getTypeID(I.getType())); Vals.push_back(Log2_32(cast(I).getAlignment())+1); Vals.push_back(cast(I).isVolatile()); if (cast(I).isAtomic()) { Vals.push_back(getEncodedOrdering(cast(I).getOrdering())); Vals.push_back(getEncodedSyncScopeID(cast(I).getSyncScopeID())); } break; case Instruction::Store: if (cast(I).isAtomic()) Code = bitc::FUNC_CODE_INST_STOREATOMIC; else Code = bitc::FUNC_CODE_INST_STORE; pushValueAndType(I.getOperand(1), InstID, Vals); // ptrty + ptr pushValueAndType(I.getOperand(0), InstID, Vals); // valty + val Vals.push_back(Log2_32(cast(I).getAlignment())+1); Vals.push_back(cast(I).isVolatile()); if (cast(I).isAtomic()) { Vals.push_back(getEncodedOrdering(cast(I).getOrdering())); Vals.push_back( getEncodedSyncScopeID(cast(I).getSyncScopeID())); } break; case Instruction::AtomicCmpXchg: Code = bitc::FUNC_CODE_INST_CMPXCHG; pushValueAndType(I.getOperand(0), InstID, Vals); // ptrty + ptr pushValueAndType(I.getOperand(1), InstID, Vals); // cmp. pushValue(I.getOperand(2), InstID, Vals); // newval. Vals.push_back(cast(I).isVolatile()); Vals.push_back( getEncodedOrdering(cast(I).getSuccessOrdering())); Vals.push_back( getEncodedSyncScopeID(cast(I).getSyncScopeID())); Vals.push_back( getEncodedOrdering(cast(I).getFailureOrdering())); Vals.push_back(cast(I).isWeak()); break; case Instruction::AtomicRMW: Code = bitc::FUNC_CODE_INST_ATOMICRMW; pushValueAndType(I.getOperand(0), InstID, Vals); // ptrty + ptr pushValue(I.getOperand(1), InstID, Vals); // val. Vals.push_back( getEncodedRMWOperation(cast(I).getOperation())); Vals.push_back(cast(I).isVolatile()); Vals.push_back(getEncodedOrdering(cast(I).getOrdering())); Vals.push_back( getEncodedSyncScopeID(cast(I).getSyncScopeID())); break; case Instruction::Fence: Code = bitc::FUNC_CODE_INST_FENCE; Vals.push_back(getEncodedOrdering(cast(I).getOrdering())); Vals.push_back(getEncodedSyncScopeID(cast(I).getSyncScopeID())); break; case Instruction::Call: { const CallInst &CI = cast(I); FunctionType *FTy = CI.getFunctionType(); if (CI.hasOperandBundles()) writeOperandBundles(&CI, InstID); Code = bitc::FUNC_CODE_INST_CALL; Vals.push_back(VE.getAttributeListID(CI.getAttributes())); unsigned Flags = getOptimizationFlags(&I); Vals.push_back(CI.getCallingConv() << bitc::CALL_CCONV | unsigned(CI.isTailCall()) << bitc::CALL_TAIL | unsigned(CI.isMustTailCall()) << bitc::CALL_MUSTTAIL | 1 << bitc::CALL_EXPLICIT_TYPE | unsigned(CI.isNoTailCall()) << bitc::CALL_NOTAIL | unsigned(Flags != 0) << bitc::CALL_FMF); if (Flags != 0) Vals.push_back(Flags); Vals.push_back(VE.getTypeID(FTy)); pushValueAndType(CI.getCalledValue(), InstID, Vals); // Callee // Emit value #'s for the fixed parameters. for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) { // Check for labels (can happen with asm labels). if (FTy->getParamType(i)->isLabelTy()) Vals.push_back(VE.getValueID(CI.getArgOperand(i))); else pushValue(CI.getArgOperand(i), InstID, Vals); // fixed param. } // Emit type/value pairs for varargs params. if (FTy->isVarArg()) { for (unsigned i = FTy->getNumParams(), e = CI.getNumArgOperands(); i != e; ++i) pushValueAndType(CI.getArgOperand(i), InstID, Vals); // varargs } break; } case Instruction::VAArg: Code = bitc::FUNC_CODE_INST_VAARG; Vals.push_back(VE.getTypeID(I.getOperand(0)->getType())); // valistty pushValue(I.getOperand(0), InstID, Vals); // valist. Vals.push_back(VE.getTypeID(I.getType())); // restype. break; } Stream.EmitRecord(Code, Vals, AbbrevToUse); Vals.clear(); } /// Write a GlobalValue VST to the module. The purpose of this data structure is /// to allow clients to efficiently find the function body. void ModuleBitcodeWriter::writeGlobalValueSymbolTable( DenseMap &FunctionToBitcodeIndex) { // Get the offset of the VST we are writing, and backpatch it into // the VST forward declaration record. uint64_t VSTOffset = Stream.GetCurrentBitNo(); // The BitcodeStartBit was the stream offset of the identification block. VSTOffset -= bitcodeStartBit(); assert((VSTOffset & 31) == 0 && "VST block not 32-bit aligned"); // Note that we add 1 here because the offset is relative to one word // before the start of the identification block, which was historically // always the start of the regular bitcode header. Stream.BackpatchWord(VSTOffsetPlaceholder, VSTOffset / 32 + 1); Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4); auto Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset unsigned FnEntryAbbrev = Stream.EmitAbbrev(std::move(Abbv)); for (const Function &F : M) { uint64_t Record[2]; if (F.isDeclaration()) continue; Record[0] = VE.getValueID(&F); // Save the word offset of the function (from the start of the // actual bitcode written to the stream). uint64_t BitcodeIndex = FunctionToBitcodeIndex[&F] - bitcodeStartBit(); assert((BitcodeIndex & 31) == 0 && "function block not 32-bit aligned"); // Note that we add 1 here because the offset is relative to one word // before the start of the identification block, which was historically // always the start of the regular bitcode header. Record[1] = BitcodeIndex / 32 + 1; Stream.EmitRecord(bitc::VST_CODE_FNENTRY, Record, FnEntryAbbrev); } Stream.ExitBlock(); } /// Emit names for arguments, instructions and basic blocks in a function. void ModuleBitcodeWriter::writeFunctionLevelValueSymbolTable( const ValueSymbolTable &VST) { if (VST.empty()) return; Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4); // FIXME: Set up the abbrev, we know how many values there are! // FIXME: We know if the type names can use 7-bit ascii. SmallVector NameVals; for (const ValueName &Name : VST) { // Figure out the encoding to use for the name. StringEncoding Bits = getStringEncoding(Name.getKey()); unsigned AbbrevToUse = VST_ENTRY_8_ABBREV; NameVals.push_back(VE.getValueID(Name.getValue())); // VST_CODE_ENTRY: [valueid, namechar x N] // VST_CODE_BBENTRY: [bbid, namechar x N] unsigned Code; if (isa(Name.getValue())) { Code = bitc::VST_CODE_BBENTRY; if (Bits == SE_Char6) AbbrevToUse = VST_BBENTRY_6_ABBREV; } else { Code = bitc::VST_CODE_ENTRY; if (Bits == SE_Char6) AbbrevToUse = VST_ENTRY_6_ABBREV; else if (Bits == SE_Fixed7) AbbrevToUse = VST_ENTRY_7_ABBREV; } for (const auto P : Name.getKey()) NameVals.push_back((unsigned char)P); // Emit the finished record. Stream.EmitRecord(Code, NameVals, AbbrevToUse); NameVals.clear(); } Stream.ExitBlock(); } void ModuleBitcodeWriter::writeUseList(UseListOrder &&Order) { assert(Order.Shuffle.size() >= 2 && "Shuffle too small"); unsigned Code; if (isa(Order.V)) Code = bitc::USELIST_CODE_BB; else Code = bitc::USELIST_CODE_DEFAULT; SmallVector Record(Order.Shuffle.begin(), Order.Shuffle.end()); Record.push_back(VE.getValueID(Order.V)); Stream.EmitRecord(Code, Record); } void ModuleBitcodeWriter::writeUseListBlock(const Function *F) { assert(VE.shouldPreserveUseListOrder() && "Expected to be preserving use-list order"); auto hasMore = [&]() { return !VE.UseListOrders.empty() && VE.UseListOrders.back().F == F; }; if (!hasMore()) // Nothing to do. return; Stream.EnterSubblock(bitc::USELIST_BLOCK_ID, 3); while (hasMore()) { writeUseList(std::move(VE.UseListOrders.back())); VE.UseListOrders.pop_back(); } Stream.ExitBlock(); } /// Emit a function body to the module stream. void ModuleBitcodeWriter::writeFunction( const Function &F, DenseMap &FunctionToBitcodeIndex) { // Save the bitcode index of the start of this function block for recording // in the VST. FunctionToBitcodeIndex[&F] = Stream.GetCurrentBitNo(); Stream.EnterSubblock(bitc::FUNCTION_BLOCK_ID, 4); VE.incorporateFunction(F); SmallVector Vals; // Emit the number of basic blocks, so the reader can create them ahead of // time. Vals.push_back(VE.getBasicBlocks().size()); Stream.EmitRecord(bitc::FUNC_CODE_DECLAREBLOCKS, Vals); Vals.clear(); // If there are function-local constants, emit them now. unsigned CstStart, CstEnd; VE.getFunctionConstantRange(CstStart, CstEnd); writeConstants(CstStart, CstEnd, false); // If there is function-local metadata, emit it now. writeFunctionMetadata(F); // Keep a running idea of what the instruction ID is. unsigned InstID = CstEnd; bool NeedsMetadataAttachment = F.hasMetadata(); DILocation *LastDL = nullptr; // Finally, emit all the instructions, in order. for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) { writeInstruction(*I, InstID, Vals); if (!I->getType()->isVoidTy()) ++InstID; // If the instruction has metadata, write a metadata attachment later. NeedsMetadataAttachment |= I->hasMetadataOtherThanDebugLoc(); // If the instruction has a debug location, emit it. DILocation *DL = I->getDebugLoc(); if (!DL) continue; if (DL == LastDL) { // Just repeat the same debug loc as last time. Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC_AGAIN, Vals); continue; } Vals.push_back(DL->getLine()); Vals.push_back(DL->getColumn()); Vals.push_back(VE.getMetadataOrNullID(DL->getScope())); Vals.push_back(VE.getMetadataOrNullID(DL->getInlinedAt())); Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC, Vals); Vals.clear(); LastDL = DL; } // Emit names for all the instructions etc. if (auto *Symtab = F.getValueSymbolTable()) writeFunctionLevelValueSymbolTable(*Symtab); if (NeedsMetadataAttachment) writeFunctionMetadataAttachment(F); if (VE.shouldPreserveUseListOrder()) writeUseListBlock(&F); VE.purgeFunction(); Stream.ExitBlock(); } // Emit blockinfo, which defines the standard abbreviations etc. void ModuleBitcodeWriter::writeBlockInfo() { // We only want to emit block info records for blocks that have multiple // instances: CONSTANTS_BLOCK, FUNCTION_BLOCK and VALUE_SYMTAB_BLOCK. // Other blocks can define their abbrevs inline. Stream.EnterBlockInfoBlock(); { // 8-bit fixed-width VST_CODE_ENTRY/VST_CODE_BBENTRY strings. auto Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID, Abbv) != VST_ENTRY_8_ABBREV) llvm_unreachable("Unexpected abbrev ordering!"); } { // 7-bit fixed width VST_CODE_ENTRY strings. auto Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_ENTRY)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID, Abbv) != VST_ENTRY_7_ABBREV) llvm_unreachable("Unexpected abbrev ordering!"); } { // 6-bit char6 VST_CODE_ENTRY strings. auto Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_ENTRY)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID, Abbv) != VST_ENTRY_6_ABBREV) llvm_unreachable("Unexpected abbrev ordering!"); } { // 6-bit char6 VST_CODE_BBENTRY strings. auto Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_BBENTRY)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID, Abbv) != VST_BBENTRY_6_ABBREV) llvm_unreachable("Unexpected abbrev ordering!"); } { // SETTYPE abbrev for CONSTANTS_BLOCK. auto Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_SETTYPE)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, VE.computeBitsRequiredForTypeIndicies())); if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID, Abbv) != CONSTANTS_SETTYPE_ABBREV) llvm_unreachable("Unexpected abbrev ordering!"); } { // INTEGER abbrev for CONSTANTS_BLOCK. auto Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_INTEGER)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID, Abbv) != CONSTANTS_INTEGER_ABBREV) llvm_unreachable("Unexpected abbrev ordering!"); } { // CE_CAST abbrev for CONSTANTS_BLOCK. auto Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CE_CAST)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // cast opc Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, // typeid VE.computeBitsRequiredForTypeIndicies())); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID, Abbv) != CONSTANTS_CE_CAST_Abbrev) llvm_unreachable("Unexpected abbrev ordering!"); } { // NULL abbrev for CONSTANTS_BLOCK. auto Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_NULL)); if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID, Abbv) != CONSTANTS_NULL_Abbrev) llvm_unreachable("Unexpected abbrev ordering!"); } // FIXME: This should only use space for first class types! { // INST_LOAD abbrev for FUNCTION_BLOCK. auto Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_LOAD)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Ptr Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, // dest ty VE.computeBitsRequiredForTypeIndicies())); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // Align Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // volatile if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) != FUNCTION_INST_LOAD_ABBREV) llvm_unreachable("Unexpected abbrev ordering!"); } { // INST_BINOP abbrev for FUNCTION_BLOCK. auto Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_BINOP)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LHS Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // RHS Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // opc if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) != FUNCTION_INST_BINOP_ABBREV) llvm_unreachable("Unexpected abbrev ordering!"); } { // INST_BINOP_FLAGS abbrev for FUNCTION_BLOCK. auto Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_BINOP)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LHS Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // RHS Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // opc - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); // flags + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); // flags if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) != FUNCTION_INST_BINOP_FLAGS_ABBREV) llvm_unreachable("Unexpected abbrev ordering!"); } { // INST_CAST abbrev for FUNCTION_BLOCK. auto Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_CAST)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // OpVal Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, // dest ty VE.computeBitsRequiredForTypeIndicies())); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // opc if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) != FUNCTION_INST_CAST_ABBREV) llvm_unreachable("Unexpected abbrev ordering!"); } { // INST_RET abbrev for FUNCTION_BLOCK. auto Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_RET)); if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) != FUNCTION_INST_RET_VOID_ABBREV) llvm_unreachable("Unexpected abbrev ordering!"); } { // INST_RET abbrev for FUNCTION_BLOCK. auto Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_RET)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // ValID if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) != FUNCTION_INST_RET_VAL_ABBREV) llvm_unreachable("Unexpected abbrev ordering!"); } { // INST_UNREACHABLE abbrev for FUNCTION_BLOCK. auto Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_UNREACHABLE)); if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) != FUNCTION_INST_UNREACHABLE_ABBREV) llvm_unreachable("Unexpected abbrev ordering!"); } { auto Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_GEP)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, // dest ty Log2_32_Ceil(VE.getTypes().size() + 1))); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) != FUNCTION_INST_GEP_ABBREV) llvm_unreachable("Unexpected abbrev ordering!"); } Stream.ExitBlock(); } /// Write the module path strings, currently only used when generating /// a combined index file. void IndexBitcodeWriter::writeModStrings() { Stream.EnterSubblock(bitc::MODULE_STRTAB_BLOCK_ID, 3); // TODO: See which abbrev sizes we actually need to emit // 8-bit fixed-width MST_ENTRY strings. auto Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_ENTRY)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); unsigned Abbrev8Bit = Stream.EmitAbbrev(std::move(Abbv)); // 7-bit fixed width MST_ENTRY strings. Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_ENTRY)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); unsigned Abbrev7Bit = Stream.EmitAbbrev(std::move(Abbv)); // 6-bit char6 MST_ENTRY strings. Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_ENTRY)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); unsigned Abbrev6Bit = Stream.EmitAbbrev(std::move(Abbv)); // Module Hash, 160 bits SHA1. Optionally, emitted after each MST_CODE_ENTRY. Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_HASH)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); unsigned AbbrevHash = Stream.EmitAbbrev(std::move(Abbv)); SmallVector Vals; forEachModule( [&](const StringMapEntry> &MPSE) { StringRef Key = MPSE.getKey(); const auto &Value = MPSE.getValue(); StringEncoding Bits = getStringEncoding(Key); unsigned AbbrevToUse = Abbrev8Bit; if (Bits == SE_Char6) AbbrevToUse = Abbrev6Bit; else if (Bits == SE_Fixed7) AbbrevToUse = Abbrev7Bit; Vals.push_back(Value.first); Vals.append(Key.begin(), Key.end()); // Emit the finished record. Stream.EmitRecord(bitc::MST_CODE_ENTRY, Vals, AbbrevToUse); // Emit an optional hash for the module now const auto &Hash = Value.second; if (llvm::any_of(Hash, [](uint32_t H) { return H; })) { Vals.assign(Hash.begin(), Hash.end()); // Emit the hash record. Stream.EmitRecord(bitc::MST_CODE_HASH, Vals, AbbrevHash); } Vals.clear(); }); Stream.ExitBlock(); } /// Write the function type metadata related records that need to appear before /// a function summary entry (whether per-module or combined). static void writeFunctionTypeMetadataRecords(BitstreamWriter &Stream, FunctionSummary *FS) { if (!FS->type_tests().empty()) Stream.EmitRecord(bitc::FS_TYPE_TESTS, FS->type_tests()); SmallVector Record; auto WriteVFuncIdVec = [&](uint64_t Ty, ArrayRef VFs) { if (VFs.empty()) return; Record.clear(); for (auto &VF : VFs) { Record.push_back(VF.GUID); Record.push_back(VF.Offset); } Stream.EmitRecord(Ty, Record); }; WriteVFuncIdVec(bitc::FS_TYPE_TEST_ASSUME_VCALLS, FS->type_test_assume_vcalls()); WriteVFuncIdVec(bitc::FS_TYPE_CHECKED_LOAD_VCALLS, FS->type_checked_load_vcalls()); auto WriteConstVCallVec = [&](uint64_t Ty, ArrayRef VCs) { for (auto &VC : VCs) { Record.clear(); Record.push_back(VC.VFunc.GUID); Record.push_back(VC.VFunc.Offset); Record.insert(Record.end(), VC.Args.begin(), VC.Args.end()); Stream.EmitRecord(Ty, Record); } }; WriteConstVCallVec(bitc::FS_TYPE_TEST_ASSUME_CONST_VCALL, FS->type_test_assume_const_vcalls()); WriteConstVCallVec(bitc::FS_TYPE_CHECKED_LOAD_CONST_VCALL, FS->type_checked_load_const_vcalls()); } // Helper to emit a single function summary record. void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord( SmallVector &NameVals, GlobalValueSummary *Summary, unsigned ValueID, unsigned FSCallsAbbrev, unsigned FSCallsProfileAbbrev, const Function &F) { NameVals.push_back(ValueID); FunctionSummary *FS = cast(Summary); writeFunctionTypeMetadataRecords(Stream, FS); NameVals.push_back(getEncodedGVSummaryFlags(FS->flags())); NameVals.push_back(FS->instCount()); NameVals.push_back(getEncodedFFlags(FS->fflags())); NameVals.push_back(FS->refs().size()); for (auto &RI : FS->refs()) NameVals.push_back(VE.getValueID(RI.getValue())); bool HasProfileData = F.hasProfileData(); for (auto &ECI : FS->calls()) { NameVals.push_back(getValueId(ECI.first)); if (HasProfileData) NameVals.push_back(static_cast(ECI.second.Hotness)); } unsigned FSAbbrev = (HasProfileData ? FSCallsProfileAbbrev : FSCallsAbbrev); unsigned Code = (HasProfileData ? bitc::FS_PERMODULE_PROFILE : bitc::FS_PERMODULE); // Emit the finished record. Stream.EmitRecord(Code, NameVals, FSAbbrev); NameVals.clear(); } // Collect the global value references in the given variable's initializer, // and emit them in a summary record. void ModuleBitcodeWriterBase::writeModuleLevelReferences( const GlobalVariable &V, SmallVector &NameVals, unsigned FSModRefsAbbrev) { auto VI = Index->getValueInfo(GlobalValue::getGUID(V.getName())); if (!VI || VI.getSummaryList().empty()) { // Only declarations should not have a summary (a declaration might however // have a summary if the def was in module level asm). assert(V.isDeclaration()); return; } auto *Summary = VI.getSummaryList()[0].get(); NameVals.push_back(VE.getValueID(&V)); GlobalVarSummary *VS = cast(Summary); NameVals.push_back(getEncodedGVSummaryFlags(VS->flags())); unsigned SizeBeforeRefs = NameVals.size(); for (auto &RI : VS->refs()) NameVals.push_back(VE.getValueID(RI.getValue())); // Sort the refs for determinism output, the vector returned by FS->refs() has // been initialized from a DenseSet. std::sort(NameVals.begin() + SizeBeforeRefs, NameVals.end()); Stream.EmitRecord(bitc::FS_PERMODULE_GLOBALVAR_INIT_REFS, NameVals, FSModRefsAbbrev); NameVals.clear(); } // Current version for the summary. // This is bumped whenever we introduce changes in the way some record are // interpreted, like flags for instance. static const uint64_t INDEX_VERSION = 4; /// Emit the per-module summary section alongside the rest of /// the module's bitcode. void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() { // By default we compile with ThinLTO if the module has a summary, but the // client can request full LTO with a module flag. bool IsThinLTO = true; if (auto *MD = mdconst::extract_or_null(M.getModuleFlag("ThinLTO"))) IsThinLTO = MD->getZExtValue(); Stream.EnterSubblock(IsThinLTO ? bitc::GLOBALVAL_SUMMARY_BLOCK_ID : bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID, 4); Stream.EmitRecord(bitc::FS_VERSION, ArrayRef{INDEX_VERSION}); if (Index->begin() == Index->end()) { Stream.ExitBlock(); return; } for (const auto &GVI : valueIds()) { Stream.EmitRecord(bitc::FS_VALUE_GUID, ArrayRef{GVI.second, GVI.first}); } // Abbrev for FS_PERMODULE. auto Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs // numrefs x valueid, n x (valueid) Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); unsigned FSCallsAbbrev = Stream.EmitAbbrev(std::move(Abbv)); // Abbrev for FS_PERMODULE_PROFILE. Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_PROFILE)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs // numrefs x valueid, n x (valueid, hotness) Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); unsigned FSCallsProfileAbbrev = Stream.EmitAbbrev(std::move(Abbv)); // Abbrev for FS_PERMODULE_GLOBALVAR_INIT_REFS. Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_GLOBALVAR_INIT_REFS)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); // valueids Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); unsigned FSModRefsAbbrev = Stream.EmitAbbrev(std::move(Abbv)); // Abbrev for FS_ALIAS. Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::FS_ALIAS)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid unsigned FSAliasAbbrev = Stream.EmitAbbrev(std::move(Abbv)); SmallVector NameVals; // Iterate over the list of functions instead of the Index to // ensure the ordering is stable. for (const Function &F : M) { // Summary emission does not support anonymous functions, they have to // renamed using the anonymous function renaming pass. if (!F.hasName()) report_fatal_error("Unexpected anonymous function when writing summary"); ValueInfo VI = Index->getValueInfo(GlobalValue::getGUID(F.getName())); if (!VI || VI.getSummaryList().empty()) { // Only declarations should not have a summary (a declaration might // however have a summary if the def was in module level asm). assert(F.isDeclaration()); continue; } auto *Summary = VI.getSummaryList()[0].get(); writePerModuleFunctionSummaryRecord(NameVals, Summary, VE.getValueID(&F), FSCallsAbbrev, FSCallsProfileAbbrev, F); } // Capture references from GlobalVariable initializers, which are outside // of a function scope. for (const GlobalVariable &G : M.globals()) writeModuleLevelReferences(G, NameVals, FSModRefsAbbrev); for (const GlobalAlias &A : M.aliases()) { auto *Aliasee = A.getBaseObject(); if (!Aliasee->hasName()) // Nameless function don't have an entry in the summary, skip it. continue; auto AliasId = VE.getValueID(&A); auto AliaseeId = VE.getValueID(Aliasee); NameVals.push_back(AliasId); auto *Summary = Index->getGlobalValueSummary(A); AliasSummary *AS = cast(Summary); NameVals.push_back(getEncodedGVSummaryFlags(AS->flags())); NameVals.push_back(AliaseeId); Stream.EmitRecord(bitc::FS_ALIAS, NameVals, FSAliasAbbrev); NameVals.clear(); } Stream.ExitBlock(); } /// Emit the combined summary section into the combined index file. void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { Stream.EnterSubblock(bitc::GLOBALVAL_SUMMARY_BLOCK_ID, 3); Stream.EmitRecord(bitc::FS_VERSION, ArrayRef{INDEX_VERSION}); for (const auto &GVI : valueIds()) { Stream.EmitRecord(bitc::FS_VALUE_GUID, ArrayRef{GVI.second, GVI.first}); } // Abbrev for FS_COMBINED. auto Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // modid Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs // numrefs x valueid, n x (valueid) Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); unsigned FSCallsAbbrev = Stream.EmitAbbrev(std::move(Abbv)); // Abbrev for FS_COMBINED_PROFILE. Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_PROFILE)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // modid Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs // numrefs x valueid, n x (valueid, hotness) Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); unsigned FSCallsProfileAbbrev = Stream.EmitAbbrev(std::move(Abbv)); // Abbrev for FS_COMBINED_GLOBALVAR_INIT_REFS. Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_GLOBALVAR_INIT_REFS)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // modid Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); // valueids Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); unsigned FSModRefsAbbrev = Stream.EmitAbbrev(std::move(Abbv)); // Abbrev for FS_COMBINED_ALIAS. Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_ALIAS)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // modid Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid unsigned FSAliasAbbrev = Stream.EmitAbbrev(std::move(Abbv)); // The aliases are emitted as a post-pass, and will point to the value // id of the aliasee. Save them in a vector for post-processing. SmallVector Aliases; // Save the value id for each summary for alias emission. DenseMap SummaryToValueIdMap; SmallVector NameVals; // For local linkage, we also emit the original name separately // immediately after the record. auto MaybeEmitOriginalName = [&](GlobalValueSummary &S) { if (!GlobalValue::isLocalLinkage(S.linkage())) return; NameVals.push_back(S.getOriginalName()); Stream.EmitRecord(bitc::FS_COMBINED_ORIGINAL_NAME, NameVals); NameVals.clear(); }; forEachSummary([&](GVInfo I, bool IsAliasee) { GlobalValueSummary *S = I.second; assert(S); auto ValueId = getValueId(I.first); assert(ValueId); SummaryToValueIdMap[S] = *ValueId; // If this is invoked for an aliasee, we want to record the above // mapping, but then not emit a summary entry (if the aliasee is // to be imported, we will invoke this separately with IsAliasee=false). if (IsAliasee) return; if (auto *AS = dyn_cast(S)) { // Will process aliases as a post-pass because the reader wants all // global to be loaded first. Aliases.push_back(AS); return; } if (auto *VS = dyn_cast(S)) { NameVals.push_back(*ValueId); NameVals.push_back(Index.getModuleId(VS->modulePath())); NameVals.push_back(getEncodedGVSummaryFlags(VS->flags())); for (auto &RI : VS->refs()) { auto RefValueId = getValueId(RI.getGUID()); if (!RefValueId) continue; NameVals.push_back(*RefValueId); } // Emit the finished record. Stream.EmitRecord(bitc::FS_COMBINED_GLOBALVAR_INIT_REFS, NameVals, FSModRefsAbbrev); NameVals.clear(); MaybeEmitOriginalName(*S); return; } auto *FS = cast(S); writeFunctionTypeMetadataRecords(Stream, FS); NameVals.push_back(*ValueId); NameVals.push_back(Index.getModuleId(FS->modulePath())); NameVals.push_back(getEncodedGVSummaryFlags(FS->flags())); NameVals.push_back(FS->instCount()); NameVals.push_back(getEncodedFFlags(FS->fflags())); // Fill in below NameVals.push_back(0); unsigned Count = 0; for (auto &RI : FS->refs()) { auto RefValueId = getValueId(RI.getGUID()); if (!RefValueId) continue; NameVals.push_back(*RefValueId); Count++; } NameVals[5] = Count; bool HasProfileData = false; for (auto &EI : FS->calls()) { HasProfileData |= EI.second.Hotness != CalleeInfo::HotnessType::Unknown; if (HasProfileData) break; } for (auto &EI : FS->calls()) { // If this GUID doesn't have a value id, it doesn't have a function // summary and we don't need to record any calls to it. GlobalValue::GUID GUID = EI.first.getGUID(); auto CallValueId = getValueId(GUID); if (!CallValueId) { // For SamplePGO, the indirect call targets for local functions will // have its original name annotated in profile. We try to find the // corresponding PGOFuncName as the GUID. GUID = Index.getGUIDFromOriginalID(GUID); if (GUID == 0) continue; CallValueId = getValueId(GUID); if (!CallValueId) continue; // The mapping from OriginalId to GUID may return a GUID // that corresponds to a static variable. Filter it out here. // This can happen when // 1) There is a call to a library function which does not have // a CallValidId; // 2) There is a static variable with the OriginalGUID identical // to the GUID of the library function in 1); // When this happens, the logic for SamplePGO kicks in and // the static variable in 2) will be found, which needs to be // filtered out. auto *GVSum = Index.getGlobalValueSummary(GUID, false); if (GVSum && GVSum->getSummaryKind() == GlobalValueSummary::GlobalVarKind) continue; } NameVals.push_back(*CallValueId); if (HasProfileData) NameVals.push_back(static_cast(EI.second.Hotness)); } unsigned FSAbbrev = (HasProfileData ? FSCallsProfileAbbrev : FSCallsAbbrev); unsigned Code = (HasProfileData ? bitc::FS_COMBINED_PROFILE : bitc::FS_COMBINED); // Emit the finished record. Stream.EmitRecord(Code, NameVals, FSAbbrev); NameVals.clear(); MaybeEmitOriginalName(*S); }); for (auto *AS : Aliases) { auto AliasValueId = SummaryToValueIdMap[AS]; assert(AliasValueId); NameVals.push_back(AliasValueId); NameVals.push_back(Index.getModuleId(AS->modulePath())); NameVals.push_back(getEncodedGVSummaryFlags(AS->flags())); auto AliaseeValueId = SummaryToValueIdMap[&AS->getAliasee()]; assert(AliaseeValueId); NameVals.push_back(AliaseeValueId); // Emit the finished record. Stream.EmitRecord(bitc::FS_COMBINED_ALIAS, NameVals, FSAliasAbbrev); NameVals.clear(); MaybeEmitOriginalName(*AS); } if (!Index.cfiFunctionDefs().empty()) { for (auto &S : Index.cfiFunctionDefs()) { NameVals.push_back(StrtabBuilder.add(S)); NameVals.push_back(S.size()); } Stream.EmitRecord(bitc::FS_CFI_FUNCTION_DEFS, NameVals); NameVals.clear(); } if (!Index.cfiFunctionDecls().empty()) { for (auto &S : Index.cfiFunctionDecls()) { NameVals.push_back(StrtabBuilder.add(S)); NameVals.push_back(S.size()); } Stream.EmitRecord(bitc::FS_CFI_FUNCTION_DECLS, NameVals); NameVals.clear(); } Stream.ExitBlock(); } /// Create the "IDENTIFICATION_BLOCK_ID" containing a single string with the /// current llvm version, and a record for the epoch number. static void writeIdentificationBlock(BitstreamWriter &Stream) { Stream.EnterSubblock(bitc::IDENTIFICATION_BLOCK_ID, 5); // Write the "user readable" string identifying the bitcode producer auto Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::IDENTIFICATION_CODE_STRING)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); auto StringAbbrev = Stream.EmitAbbrev(std::move(Abbv)); writeStringRecord(Stream, bitc::IDENTIFICATION_CODE_STRING, "LLVM" LLVM_VERSION_STRING, StringAbbrev); // Write the epoch version Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::IDENTIFICATION_CODE_EPOCH)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); auto EpochAbbrev = Stream.EmitAbbrev(std::move(Abbv)); SmallVector Vals = {bitc::BITCODE_CURRENT_EPOCH}; Stream.EmitRecord(bitc::IDENTIFICATION_CODE_EPOCH, Vals, EpochAbbrev); Stream.ExitBlock(); } void ModuleBitcodeWriter::writeModuleHash(size_t BlockStartPos) { // Emit the module's hash. // MODULE_CODE_HASH: [5*i32] if (GenerateHash) { uint32_t Vals[5]; Hasher.update(ArrayRef((const uint8_t *)&(Buffer)[BlockStartPos], Buffer.size() - BlockStartPos)); StringRef Hash = Hasher.result(); for (int Pos = 0; Pos < 20; Pos += 4) { Vals[Pos / 4] = support::endian::read32be(Hash.data() + Pos); } // Emit the finished record. Stream.EmitRecord(bitc::MODULE_CODE_HASH, Vals); if (ModHash) // Save the written hash value. std::copy(std::begin(Vals), std::end(Vals), std::begin(*ModHash)); } } void ModuleBitcodeWriter::write() { writeIdentificationBlock(Stream); Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3); size_t BlockStartPos = Buffer.size(); writeModuleVersion(); // Emit blockinfo, which defines the standard abbreviations etc. writeBlockInfo(); // Emit information about attribute groups. writeAttributeGroupTable(); // Emit information about parameter attributes. writeAttributeTable(); // Emit information describing all of the types in the module. writeTypeTable(); writeComdats(); // Emit top-level description of module, including target triple, inline asm, // descriptors for global variables, and function prototype info. writeModuleInfo(); // Emit constants. writeModuleConstants(); // Emit metadata kind names. writeModuleMetadataKinds(); // Emit metadata. writeModuleMetadata(); // Emit module-level use-lists. if (VE.shouldPreserveUseListOrder()) writeUseListBlock(nullptr); writeOperandBundleTags(); writeSyncScopeNames(); // Emit function bodies. DenseMap FunctionToBitcodeIndex; for (Module::const_iterator F = M.begin(), E = M.end(); F != E; ++F) if (!F->isDeclaration()) writeFunction(*F, FunctionToBitcodeIndex); // Need to write after the above call to WriteFunction which populates // the summary information in the index. if (Index) writePerModuleGlobalValueSummary(); writeGlobalValueSymbolTable(FunctionToBitcodeIndex); writeModuleHash(BlockStartPos); Stream.ExitBlock(); } static void writeInt32ToBuffer(uint32_t Value, SmallVectorImpl &Buffer, uint32_t &Position) { support::endian::write32le(&Buffer[Position], Value); Position += 4; } /// If generating a bc file on darwin, we have to emit a /// header and trailer to make it compatible with the system archiver. To do /// this we emit the following header, and then emit a trailer that pads the /// file out to be a multiple of 16 bytes. /// /// struct bc_header { /// uint32_t Magic; // 0x0B17C0DE /// uint32_t Version; // Version, currently always 0. /// uint32_t BitcodeOffset; // Offset to traditional bitcode file. /// uint32_t BitcodeSize; // Size of traditional bitcode file. /// uint32_t CPUType; // CPU specifier. /// ... potentially more later ... /// }; static void emitDarwinBCHeaderAndTrailer(SmallVectorImpl &Buffer, const Triple &TT) { unsigned CPUType = ~0U; // Match x86_64-*, i[3-9]86-*, powerpc-*, powerpc64-*, arm-*, thumb-*, // armv[0-9]-*, thumbv[0-9]-*, armv5te-*, or armv6t2-*. The CPUType is a magic // number from /usr/include/mach/machine.h. It is ok to reproduce the // specific constants here because they are implicitly part of the Darwin ABI. enum { DARWIN_CPU_ARCH_ABI64 = 0x01000000, DARWIN_CPU_TYPE_X86 = 7, DARWIN_CPU_TYPE_ARM = 12, DARWIN_CPU_TYPE_POWERPC = 18 }; Triple::ArchType Arch = TT.getArch(); if (Arch == Triple::x86_64) CPUType = DARWIN_CPU_TYPE_X86 | DARWIN_CPU_ARCH_ABI64; else if (Arch == Triple::x86) CPUType = DARWIN_CPU_TYPE_X86; else if (Arch == Triple::ppc) CPUType = DARWIN_CPU_TYPE_POWERPC; else if (Arch == Triple::ppc64) CPUType = DARWIN_CPU_TYPE_POWERPC | DARWIN_CPU_ARCH_ABI64; else if (Arch == Triple::arm || Arch == Triple::thumb) CPUType = DARWIN_CPU_TYPE_ARM; // Traditional Bitcode starts after header. assert(Buffer.size() >= BWH_HeaderSize && "Expected header size to be reserved"); unsigned BCOffset = BWH_HeaderSize; unsigned BCSize = Buffer.size() - BWH_HeaderSize; // Write the magic and version. unsigned Position = 0; writeInt32ToBuffer(0x0B17C0DE, Buffer, Position); writeInt32ToBuffer(0, Buffer, Position); // Version. writeInt32ToBuffer(BCOffset, Buffer, Position); writeInt32ToBuffer(BCSize, Buffer, Position); writeInt32ToBuffer(CPUType, Buffer, Position); // If the file is not a multiple of 16 bytes, insert dummy padding. while (Buffer.size() & 15) Buffer.push_back(0); } /// Helper to write the header common to all bitcode files. static void writeBitcodeHeader(BitstreamWriter &Stream) { // Emit the file header. Stream.Emit((unsigned)'B', 8); Stream.Emit((unsigned)'C', 8); Stream.Emit(0x0, 4); Stream.Emit(0xC, 4); Stream.Emit(0xE, 4); Stream.Emit(0xD, 4); } BitcodeWriter::BitcodeWriter(SmallVectorImpl &Buffer) : Buffer(Buffer), Stream(new BitstreamWriter(Buffer)) { writeBitcodeHeader(*Stream); } BitcodeWriter::~BitcodeWriter() { assert(WroteStrtab); } void BitcodeWriter::writeBlob(unsigned Block, unsigned Record, StringRef Blob) { Stream->EnterSubblock(Block, 3); auto Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(Record)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); auto AbbrevNo = Stream->EmitAbbrev(std::move(Abbv)); Stream->EmitRecordWithBlob(AbbrevNo, ArrayRef{Record}, Blob); Stream->ExitBlock(); } void BitcodeWriter::writeSymtab() { assert(!WroteStrtab && !WroteSymtab); // If any module has module-level inline asm, we will require a registered asm // parser for the target so that we can create an accurate symbol table for // the module. for (Module *M : Mods) { if (M->getModuleInlineAsm().empty()) continue; std::string Err; const Triple TT(M->getTargetTriple()); const Target *T = TargetRegistry::lookupTarget(TT.str(), Err); if (!T || !T->hasMCAsmParser()) return; } WroteSymtab = true; SmallVector Symtab; // The irsymtab::build function may be unable to create a symbol table if the // module is malformed (e.g. it contains an invalid alias). Writing a symbol // table is not required for correctness, but we still want to be able to // write malformed modules to bitcode files, so swallow the error. if (Error E = irsymtab::build(Mods, Symtab, StrtabBuilder, Alloc)) { consumeError(std::move(E)); return; } writeBlob(bitc::SYMTAB_BLOCK_ID, bitc::SYMTAB_BLOB, {Symtab.data(), Symtab.size()}); } void BitcodeWriter::writeStrtab() { assert(!WroteStrtab); std::vector Strtab; StrtabBuilder.finalizeInOrder(); Strtab.resize(StrtabBuilder.getSize()); StrtabBuilder.write((uint8_t *)Strtab.data()); writeBlob(bitc::STRTAB_BLOCK_ID, bitc::STRTAB_BLOB, {Strtab.data(), Strtab.size()}); WroteStrtab = true; } void BitcodeWriter::copyStrtab(StringRef Strtab) { writeBlob(bitc::STRTAB_BLOCK_ID, bitc::STRTAB_BLOB, Strtab); WroteStrtab = true; } void BitcodeWriter::writeModule(const Module *M, bool ShouldPreserveUseListOrder, const ModuleSummaryIndex *Index, bool GenerateHash, ModuleHash *ModHash) { assert(!WroteStrtab); // The Mods vector is used by irsymtab::build, which requires non-const // Modules in case it needs to materialize metadata. But the bitcode writer // requires that the module is materialized, so we can cast to non-const here, // after checking that it is in fact materialized. assert(M->isMaterialized()); Mods.push_back(const_cast(M)); ModuleBitcodeWriter ModuleWriter(M, Buffer, StrtabBuilder, *Stream, ShouldPreserveUseListOrder, Index, GenerateHash, ModHash); ModuleWriter.write(); } void BitcodeWriter::writeIndex( const ModuleSummaryIndex *Index, const std::map *ModuleToSummariesForIndex) { IndexBitcodeWriter IndexWriter(*Stream, StrtabBuilder, *Index, ModuleToSummariesForIndex); IndexWriter.write(); } /// WriteBitcodeToFile - Write the specified module to the specified output /// stream. void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out, bool ShouldPreserveUseListOrder, const ModuleSummaryIndex *Index, bool GenerateHash, ModuleHash *ModHash) { SmallVector Buffer; Buffer.reserve(256*1024); // If this is darwin or another generic macho target, reserve space for the // header. Triple TT(M->getTargetTriple()); if (TT.isOSDarwin() || TT.isOSBinFormatMachO()) Buffer.insert(Buffer.begin(), BWH_HeaderSize, 0); BitcodeWriter Writer(Buffer); Writer.writeModule(M, ShouldPreserveUseListOrder, Index, GenerateHash, ModHash); Writer.writeSymtab(); Writer.writeStrtab(); if (TT.isOSDarwin() || TT.isOSBinFormatMachO()) emitDarwinBCHeaderAndTrailer(Buffer, TT); // Write the generated bitstream to "Out". Out.write((char*)&Buffer.front(), Buffer.size()); } void IndexBitcodeWriter::write() { Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3); writeModuleVersion(); // Write the module paths in the combined index. writeModStrings(); // Write the summary combined index records. writeCombinedGlobalValueSummary(); Stream.ExitBlock(); } // Write the specified module summary index to the given raw output stream, // where it will be written in a new bitcode block. This is used when // writing the combined index file for ThinLTO. When writing a subset of the // index for a distributed backend, provide a \p ModuleToSummariesForIndex map. void llvm::WriteIndexToFile( const ModuleSummaryIndex &Index, raw_ostream &Out, const std::map *ModuleToSummariesForIndex) { SmallVector Buffer; Buffer.reserve(256 * 1024); BitcodeWriter Writer(Buffer); Writer.writeIndex(&Index, ModuleToSummariesForIndex); Writer.writeStrtab(); Out.write((char *)&Buffer.front(), Buffer.size()); } namespace { /// Class to manage the bitcode writing for a thin link bitcode file. class ThinLinkBitcodeWriter : public ModuleBitcodeWriterBase { /// ModHash is for use in ThinLTO incremental build, generated while writing /// the module bitcode file. const ModuleHash *ModHash; public: ThinLinkBitcodeWriter(const Module *M, StringTableBuilder &StrtabBuilder, BitstreamWriter &Stream, const ModuleSummaryIndex &Index, const ModuleHash &ModHash) : ModuleBitcodeWriterBase(M, StrtabBuilder, Stream, /*ShouldPreserveUseListOrder=*/false, &Index), ModHash(&ModHash) {} void write(); private: void writeSimplifiedModuleInfo(); }; } // end anonymous namespace // This function writes a simpilified module info for thin link bitcode file. // It only contains the source file name along with the name(the offset and // size in strtab) and linkage for global values. For the global value info // entry, in order to keep linkage at offset 5, there are three zeros used // as padding. void ThinLinkBitcodeWriter::writeSimplifiedModuleInfo() { SmallVector Vals; // Emit the module's source file name. { StringEncoding Bits = getStringEncoding(M.getSourceFileName()); BitCodeAbbrevOp AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8); if (Bits == SE_Char6) AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Char6); else if (Bits == SE_Fixed7) AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7); // MODULE_CODE_SOURCE_FILENAME: [namechar x N] auto Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_SOURCE_FILENAME)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(AbbrevOpToUse); unsigned FilenameAbbrev = Stream.EmitAbbrev(std::move(Abbv)); for (const auto P : M.getSourceFileName()) Vals.push_back((unsigned char)P); Stream.EmitRecord(bitc::MODULE_CODE_SOURCE_FILENAME, Vals, FilenameAbbrev); Vals.clear(); } // Emit the global variable information. for (const GlobalVariable &GV : M.globals()) { // GLOBALVAR: [strtab offset, strtab size, 0, 0, 0, linkage] Vals.push_back(StrtabBuilder.add(GV.getName())); Vals.push_back(GV.getName().size()); Vals.push_back(0); Vals.push_back(0); Vals.push_back(0); Vals.push_back(getEncodedLinkage(GV)); Stream.EmitRecord(bitc::MODULE_CODE_GLOBALVAR, Vals); Vals.clear(); } // Emit the function proto information. for (const Function &F : M) { // FUNCTION: [strtab offset, strtab size, 0, 0, 0, linkage] Vals.push_back(StrtabBuilder.add(F.getName())); Vals.push_back(F.getName().size()); Vals.push_back(0); Vals.push_back(0); Vals.push_back(0); Vals.push_back(getEncodedLinkage(F)); Stream.EmitRecord(bitc::MODULE_CODE_FUNCTION, Vals); Vals.clear(); } // Emit the alias information. for (const GlobalAlias &A : M.aliases()) { // ALIAS: [strtab offset, strtab size, 0, 0, 0, linkage] Vals.push_back(StrtabBuilder.add(A.getName())); Vals.push_back(A.getName().size()); Vals.push_back(0); Vals.push_back(0); Vals.push_back(0); Vals.push_back(getEncodedLinkage(A)); Stream.EmitRecord(bitc::MODULE_CODE_ALIAS, Vals); Vals.clear(); } // Emit the ifunc information. for (const GlobalIFunc &I : M.ifuncs()) { // IFUNC: [strtab offset, strtab size, 0, 0, 0, linkage] Vals.push_back(StrtabBuilder.add(I.getName())); Vals.push_back(I.getName().size()); Vals.push_back(0); Vals.push_back(0); Vals.push_back(0); Vals.push_back(getEncodedLinkage(I)); Stream.EmitRecord(bitc::MODULE_CODE_IFUNC, Vals); Vals.clear(); } } void ThinLinkBitcodeWriter::write() { Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3); writeModuleVersion(); writeSimplifiedModuleInfo(); writePerModuleGlobalValueSummary(); // Write module hash. Stream.EmitRecord(bitc::MODULE_CODE_HASH, ArrayRef(*ModHash)); Stream.ExitBlock(); } void BitcodeWriter::writeThinLinkBitcode(const Module *M, const ModuleSummaryIndex &Index, const ModuleHash &ModHash) { assert(!WroteStrtab); // The Mods vector is used by irsymtab::build, which requires non-const // Modules in case it needs to materialize metadata. But the bitcode writer // requires that the module is materialized, so we can cast to non-const here, // after checking that it is in fact materialized. assert(M->isMaterialized()); Mods.push_back(const_cast(M)); ThinLinkBitcodeWriter ThinLinkWriter(M, StrtabBuilder, *Stream, Index, ModHash); ThinLinkWriter.write(); } // Write the specified thin link bitcode file to the given raw output stream, // where it will be written in a new bitcode block. This is used when // writing the per-module index file for ThinLTO. void llvm::WriteThinLinkBitcodeToFile(const Module *M, raw_ostream &Out, const ModuleSummaryIndex &Index, const ModuleHash &ModHash) { SmallVector Buffer; Buffer.reserve(256 * 1024); BitcodeWriter Writer(Buffer); Writer.writeThinLinkBitcode(M, Index, ModHash); Writer.writeSymtab(); Writer.writeStrtab(); Out.write((char *)&Buffer.front(), Buffer.size()); } Index: head/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp =================================================================== --- head/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp (revision 329982) +++ head/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp (revision 329983) @@ -1,1572 +1,1633 @@ //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// /// \file /// This file implements the targeting of the InstructionSelector class for /// AArch64. /// \todo This should be generated by TableGen. //===----------------------------------------------------------------------===// #include "AArch64InstrInfo.h" #include "AArch64MachineFunctionInfo.h" #include "AArch64RegisterBankInfo.h" #include "AArch64RegisterInfo.h" #include "AArch64Subtarget.h" #include "AArch64TargetMachine.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/Type.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #define DEBUG_TYPE "aarch64-isel" using namespace llvm; namespace { #define GET_GLOBALISEL_PREDICATE_BITSET #include "AArch64GenGlobalISel.inc" #undef GET_GLOBALISEL_PREDICATE_BITSET class AArch64InstructionSelector : public InstructionSelector { public: AArch64InstructionSelector(const AArch64TargetMachine &TM, const AArch64Subtarget &STI, const AArch64RegisterBankInfo &RBI); bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const override; static const char *getName() { return DEBUG_TYPE; } private: /// tblgen-erated 'select' implementation, used as the initial selector for /// the patterns that don't require complex C++. bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const; bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const; bool selectCompareBranch(MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const; ComplexRendererFns selectArithImmed(MachineOperand &Root) const; ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root, unsigned Size) const; ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const { return selectAddrModeUnscaled(Root, 1); } ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const { return selectAddrModeUnscaled(Root, 2); } ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const { return selectAddrModeUnscaled(Root, 4); } ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const { return selectAddrModeUnscaled(Root, 8); } ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const { return selectAddrModeUnscaled(Root, 16); } ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root, unsigned Size) const; template ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const { return selectAddrModeIndexed(Root, Width / 8); } const AArch64TargetMachine &TM; const AArch64Subtarget &STI; const AArch64InstrInfo &TII; const AArch64RegisterInfo &TRI; const AArch64RegisterBankInfo &RBI; #define GET_GLOBALISEL_PREDICATES_DECL #include "AArch64GenGlobalISel.inc" #undef GET_GLOBALISEL_PREDICATES_DECL // We declare the temporaries used by selectImpl() in the class to minimize the // cost of constructing placeholder values. #define GET_GLOBALISEL_TEMPORARIES_DECL #include "AArch64GenGlobalISel.inc" #undef GET_GLOBALISEL_TEMPORARIES_DECL }; } // end anonymous namespace #define GET_GLOBALISEL_IMPL #include "AArch64GenGlobalISel.inc" #undef GET_GLOBALISEL_IMPL AArch64InstructionSelector::AArch64InstructionSelector( const AArch64TargetMachine &TM, const AArch64Subtarget &STI, const AArch64RegisterBankInfo &RBI) : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), RBI(RBI), #define GET_GLOBALISEL_PREDICATES_INIT #include "AArch64GenGlobalISel.inc" #undef GET_GLOBALISEL_PREDICATES_INIT #define GET_GLOBALISEL_TEMPORARIES_INIT #include "AArch64GenGlobalISel.inc" #undef GET_GLOBALISEL_TEMPORARIES_INIT { } // FIXME: This should be target-independent, inferred from the types declared // for each class in the bank. static const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB, - const RegisterBankInfo &RBI) { + const RegisterBankInfo &RBI, + bool GetAllRegSet = false) { if (RB.getID() == AArch64::GPRRegBankID) { if (Ty.getSizeInBits() <= 32) - return &AArch64::GPR32RegClass; + return GetAllRegSet ? &AArch64::GPR32allRegClass + : &AArch64::GPR32RegClass; if (Ty.getSizeInBits() == 64) - return &AArch64::GPR64RegClass; + return GetAllRegSet ? &AArch64::GPR64allRegClass + : &AArch64::GPR64RegClass; return nullptr; } if (RB.getID() == AArch64::FPRRegBankID) { + if (Ty.getSizeInBits() <= 16) + return &AArch64::FPR16RegClass; if (Ty.getSizeInBits() == 32) return &AArch64::FPR32RegClass; if (Ty.getSizeInBits() == 64) return &AArch64::FPR64RegClass; if (Ty.getSizeInBits() == 128) return &AArch64::FPR128RegClass; return nullptr; } return nullptr; } /// Check whether \p I is a currently unsupported binary operation: /// - it has an unsized type /// - an operand is not a vreg /// - all operands are not in the same bank /// These are checks that should someday live in the verifier, but right now, /// these are mostly limitations of the aarch64 selector. static bool unsupportedBinOp(const MachineInstr &I, const AArch64RegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const AArch64RegisterInfo &TRI) { LLT Ty = MRI.getType(I.getOperand(0).getReg()); if (!Ty.isValid()) { DEBUG(dbgs() << "Generic binop register should be typed\n"); return true; } const RegisterBank *PrevOpBank = nullptr; for (auto &MO : I.operands()) { // FIXME: Support non-register operands. if (!MO.isReg()) { DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n"); return true; } // FIXME: Can generic operations have physical registers operands? If // so, this will need to be taught about that, and we'll need to get the // bank out of the minimal class for the register. // Either way, this needs to be documented (and possibly verified). if (!TargetRegisterInfo::isVirtualRegister(MO.getReg())) { DEBUG(dbgs() << "Generic inst has physical register operand\n"); return true; } const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI); if (!OpBank) { DEBUG(dbgs() << "Generic register has no bank or class\n"); return true; } if (PrevOpBank && OpBank != PrevOpBank) { DEBUG(dbgs() << "Generic inst operands have different banks\n"); return true; } PrevOpBank = OpBank; } return false; } /// Select the AArch64 opcode for the basic binary operation \p GenericOpc /// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID /// and of size \p OpSize. /// \returns \p GenericOpc if the combination is unsupported. static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize) { switch (RegBankID) { case AArch64::GPRRegBankID: if (OpSize == 32) { switch (GenericOpc) { case TargetOpcode::G_SHL: return AArch64::LSLVWr; case TargetOpcode::G_LSHR: return AArch64::LSRVWr; case TargetOpcode::G_ASHR: return AArch64::ASRVWr; default: return GenericOpc; } } else if (OpSize == 64) { switch (GenericOpc) { case TargetOpcode::G_GEP: return AArch64::ADDXrr; case TargetOpcode::G_SHL: return AArch64::LSLVXr; case TargetOpcode::G_LSHR: return AArch64::LSRVXr; case TargetOpcode::G_ASHR: return AArch64::ASRVXr; default: return GenericOpc; } } break; case AArch64::FPRRegBankID: switch (OpSize) { case 32: switch (GenericOpc) { case TargetOpcode::G_FADD: return AArch64::FADDSrr; case TargetOpcode::G_FSUB: return AArch64::FSUBSrr; case TargetOpcode::G_FMUL: return AArch64::FMULSrr; case TargetOpcode::G_FDIV: return AArch64::FDIVSrr; default: return GenericOpc; } case 64: switch (GenericOpc) { case TargetOpcode::G_FADD: return AArch64::FADDDrr; case TargetOpcode::G_FSUB: return AArch64::FSUBDrr; case TargetOpcode::G_FMUL: return AArch64::FMULDrr; case TargetOpcode::G_FDIV: return AArch64::FDIVDrr; case TargetOpcode::G_OR: return AArch64::ORRv8i8; default: return GenericOpc; } } break; } return GenericOpc; } /// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc, /// appropriate for the (value) register bank \p RegBankID and of memory access /// size \p OpSize. This returns the variant with the base+unsigned-immediate /// addressing mode (e.g., LDRXui). /// \returns \p GenericOpc if the combination is unsupported. static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize) { const bool isStore = GenericOpc == TargetOpcode::G_STORE; switch (RegBankID) { case AArch64::GPRRegBankID: switch (OpSize) { case 8: return isStore ? AArch64::STRBBui : AArch64::LDRBBui; case 16: return isStore ? AArch64::STRHHui : AArch64::LDRHHui; case 32: return isStore ? AArch64::STRWui : AArch64::LDRWui; case 64: return isStore ? AArch64::STRXui : AArch64::LDRXui; } break; case AArch64::FPRRegBankID: switch (OpSize) { case 8: return isStore ? AArch64::STRBui : AArch64::LDRBui; case 16: return isStore ? AArch64::STRHui : AArch64::LDRHui; case 32: return isStore ? AArch64::STRSui : AArch64::LDRSui; case 64: return isStore ? AArch64::STRDui : AArch64::LDRDui; } break; } return GenericOpc; } +static bool selectFP16CopyFromGPR32(MachineInstr &I, const TargetInstrInfo &TII, + MachineRegisterInfo &MRI, unsigned SrcReg) { + // Copies from gpr32 to fpr16 need to use a sub-register copy. + unsigned CopyReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass); + BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::COPY)) + .addDef(CopyReg) + .addUse(SrcReg); + unsigned SubRegCopy = MRI.createVirtualRegister(&AArch64::FPR16RegClass); + BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY)) + .addDef(SubRegCopy) + .addUse(CopyReg, 0, AArch64::hsub); + + MachineOperand &RegOp = I.getOperand(1); + RegOp.setReg(SubRegCopy); + return true; +} + static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) { unsigned DstReg = I.getOperand(0).getReg(); + unsigned SrcReg = I.getOperand(1).getReg(); + if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { + if (TRI.getRegClass(AArch64::FPR16RegClassID)->contains(DstReg) && + !TargetRegisterInfo::isPhysicalRegister(SrcReg)) { + const RegisterBank &RegBank = *RBI.getRegBank(SrcReg, MRI, TRI); + const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank( + MRI.getType(SrcReg), RegBank, RBI, /* GetAllRegSet */ true); + if (SrcRC == &AArch64::GPR32allRegClass) + return selectFP16CopyFromGPR32(I, TII, MRI, SrcReg); + } assert(I.isCopy() && "Generic operators do not allow physical registers"); return true; } const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI); const unsigned DstSize = MRI.getType(DstReg).getSizeInBits(); - unsigned SrcReg = I.getOperand(1).getReg(); + (void)DstSize; const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); (void)SrcSize; assert((!TargetRegisterInfo::isPhysicalRegister(SrcReg) || I.isCopy()) && "No phys reg on generic operators"); assert( (DstSize == SrcSize || // Copies are a mean to setup initial types, the number of // bits may not exactly match. (TargetRegisterInfo::isPhysicalRegister(SrcReg) && DstSize <= RBI.getSizeInBits(SrcReg, MRI, TRI)) || // Copies are a mean to copy bits around, as long as we are // on the same register class, that's fine. Otherwise, that // means we need some SUBREG_TO_REG or AND & co. (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && "Copy with different width?!"); assert((DstSize <= 64 || RegBank.getID() == AArch64::FPRRegBankID) && "GPRs cannot get more than 64-bit width values"); - const TargetRegisterClass *RC = nullptr; - if (RegBank.getID() == AArch64::FPRRegBankID) { - if (DstSize <= 16) - RC = &AArch64::FPR16RegClass; - else if (DstSize <= 32) - RC = &AArch64::FPR32RegClass; - else if (DstSize <= 64) - RC = &AArch64::FPR64RegClass; - else if (DstSize <= 128) - RC = &AArch64::FPR128RegClass; - else { - DEBUG(dbgs() << "Unexpected bitcast size " << DstSize << '\n'); - return false; + const TargetRegisterClass *RC = getRegClassForTypeOnBank( + MRI.getType(DstReg), RegBank, RBI, /* GetAllRegSet */ true); + if (!RC) { + DEBUG(dbgs() << "Unexpected bitcast size " << DstSize << '\n'); + return false; + } + + if (!TargetRegisterInfo::isPhysicalRegister(SrcReg)) { + const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(SrcReg); + const TargetRegisterClass *SrcRC = + RegClassOrBank.dyn_cast(); + const RegisterBank *RB = nullptr; + if (!SrcRC) { + RB = RegClassOrBank.get(); + SrcRC = getRegClassForTypeOnBank(MRI.getType(SrcReg), *RB, RBI, true); } - } else { - assert(RegBank.getID() == AArch64::GPRRegBankID && - "Bitcast for the flags?"); - RC = - DstSize <= 32 ? &AArch64::GPR32allRegClass : &AArch64::GPR64allRegClass; + // Copies from fpr16 to gpr32 need to use SUBREG_TO_REG. + if (RC == &AArch64::GPR32allRegClass && SrcRC == &AArch64::FPR16RegClass) { + unsigned PromoteReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass); + BuildMI(*I.getParent(), I, I.getDebugLoc(), + TII.get(AArch64::SUBREG_TO_REG)) + .addDef(PromoteReg) + .addImm(0) + .addUse(SrcReg) + .addImm(AArch64::hsub); + MachineOperand &RegOp = I.getOperand(1); + RegOp.setReg(PromoteReg); + } else if (RC == &AArch64::FPR16RegClass && + SrcRC == &AArch64::GPR32allRegClass) { + selectFP16CopyFromGPR32(I, TII, MRI, SrcReg); + } } // No need to constrain SrcReg. It will get constrained when // we hit another of its use or its defs. // Copies do not have constraints. if (!RBI.constrainGenericRegister(DstReg, *RC, MRI)) { DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) << " operand\n"); return false; } I.setDesc(TII.get(AArch64::COPY)); return true; } static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) { if (!DstTy.isScalar() || !SrcTy.isScalar()) return GenericOpc; const unsigned DstSize = DstTy.getSizeInBits(); const unsigned SrcSize = SrcTy.getSizeInBits(); switch (DstSize) { case 32: switch (SrcSize) { case 32: switch (GenericOpc) { case TargetOpcode::G_SITOFP: return AArch64::SCVTFUWSri; case TargetOpcode::G_UITOFP: return AArch64::UCVTFUWSri; case TargetOpcode::G_FPTOSI: return AArch64::FCVTZSUWSr; case TargetOpcode::G_FPTOUI: return AArch64::FCVTZUUWSr; default: return GenericOpc; } case 64: switch (GenericOpc) { case TargetOpcode::G_SITOFP: return AArch64::SCVTFUXSri; case TargetOpcode::G_UITOFP: return AArch64::UCVTFUXSri; case TargetOpcode::G_FPTOSI: return AArch64::FCVTZSUWDr; case TargetOpcode::G_FPTOUI: return AArch64::FCVTZUUWDr; default: return GenericOpc; } default: return GenericOpc; } case 64: switch (SrcSize) { case 32: switch (GenericOpc) { case TargetOpcode::G_SITOFP: return AArch64::SCVTFUWDri; case TargetOpcode::G_UITOFP: return AArch64::UCVTFUWDri; case TargetOpcode::G_FPTOSI: return AArch64::FCVTZSUXSr; case TargetOpcode::G_FPTOUI: return AArch64::FCVTZUUXSr; default: return GenericOpc; } case 64: switch (GenericOpc) { case TargetOpcode::G_SITOFP: return AArch64::SCVTFUXDri; case TargetOpcode::G_UITOFP: return AArch64::UCVTFUXDri; case TargetOpcode::G_FPTOSI: return AArch64::FCVTZSUXDr; case TargetOpcode::G_FPTOUI: return AArch64::FCVTZUUXDr; default: return GenericOpc; } default: return GenericOpc; } default: return GenericOpc; }; return GenericOpc; } static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) { switch (P) { default: llvm_unreachable("Unknown condition code!"); case CmpInst::ICMP_NE: return AArch64CC::NE; case CmpInst::ICMP_EQ: return AArch64CC::EQ; case CmpInst::ICMP_SGT: return AArch64CC::GT; case CmpInst::ICMP_SGE: return AArch64CC::GE; case CmpInst::ICMP_SLT: return AArch64CC::LT; case CmpInst::ICMP_SLE: return AArch64CC::LE; case CmpInst::ICMP_UGT: return AArch64CC::HI; case CmpInst::ICMP_UGE: return AArch64CC::HS; case CmpInst::ICMP_ULT: return AArch64CC::LO; case CmpInst::ICMP_ULE: return AArch64CC::LS; } } static void changeFCMPPredToAArch64CC(CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2) { CondCode2 = AArch64CC::AL; switch (P) { default: llvm_unreachable("Unknown FP condition!"); case CmpInst::FCMP_OEQ: CondCode = AArch64CC::EQ; break; case CmpInst::FCMP_OGT: CondCode = AArch64CC::GT; break; case CmpInst::FCMP_OGE: CondCode = AArch64CC::GE; break; case CmpInst::FCMP_OLT: CondCode = AArch64CC::MI; break; case CmpInst::FCMP_OLE: CondCode = AArch64CC::LS; break; case CmpInst::FCMP_ONE: CondCode = AArch64CC::MI; CondCode2 = AArch64CC::GT; break; case CmpInst::FCMP_ORD: CondCode = AArch64CC::VC; break; case CmpInst::FCMP_UNO: CondCode = AArch64CC::VS; break; case CmpInst::FCMP_UEQ: CondCode = AArch64CC::EQ; CondCode2 = AArch64CC::VS; break; case CmpInst::FCMP_UGT: CondCode = AArch64CC::HI; break; case CmpInst::FCMP_UGE: CondCode = AArch64CC::PL; break; case CmpInst::FCMP_ULT: CondCode = AArch64CC::LT; break; case CmpInst::FCMP_ULE: CondCode = AArch64CC::LE; break; case CmpInst::FCMP_UNE: CondCode = AArch64CC::NE; break; } } bool AArch64InstructionSelector::selectCompareBranch( MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { const unsigned CondReg = I.getOperand(0).getReg(); MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); MachineInstr *CCMI = MRI.getVRegDef(CondReg); if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) CCMI = MRI.getVRegDef(CCMI->getOperand(1).getReg()); if (CCMI->getOpcode() != TargetOpcode::G_ICMP) return false; unsigned LHS = CCMI->getOperand(2).getReg(); unsigned RHS = CCMI->getOperand(3).getReg(); if (!getConstantVRegVal(RHS, MRI)) std::swap(RHS, LHS); const auto RHSImm = getConstantVRegVal(RHS, MRI); if (!RHSImm || *RHSImm != 0) return false; const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI); if (RB.getID() != AArch64::GPRRegBankID) return false; const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate(); if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ) return false; const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits(); unsigned CBOpc = 0; if (CmpWidth <= 32) CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW); else if (CmpWidth == 64) CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX); else return false; auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc)) .addUse(LHS) .addMBB(DestMBB); constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI); I.eraseFromParent(); return true; } bool AArch64InstructionSelector::selectVaStartAAPCS( MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { return false; } bool AArch64InstructionSelector::selectVaStartDarwin( MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { AArch64FunctionInfo *FuncInfo = MF.getInfo(); unsigned ListReg = I.getOperand(0).getReg(); unsigned ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri)) .addDef(ArgsAddrReg) .addFrameIndex(FuncInfo->getVarArgsStackIndex()) .addImm(0) .addImm(0); constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui)) .addUse(ArgsAddrReg) .addUse(ListReg) .addImm(0) .addMemOperand(*I.memoperands_begin()); constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); I.eraseFromParent(); return true; } bool AArch64InstructionSelector::select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const { assert(I.getParent() && "Instruction should be in a basic block!"); assert(I.getParent()->getParent() && "Instruction should be in a function!"); MachineBasicBlock &MBB = *I.getParent(); MachineFunction &MF = *MBB.getParent(); MachineRegisterInfo &MRI = MF.getRegInfo(); unsigned Opcode = I.getOpcode(); // G_PHI requires same handling as PHI if (!isPreISelGenericOpcode(Opcode) || Opcode == TargetOpcode::G_PHI) { // Certain non-generic instructions also need some special handling. if (Opcode == TargetOpcode::LOAD_STACK_GUARD) return constrainSelectedInstRegOperands(I, TII, TRI, RBI); if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) { const unsigned DefReg = I.getOperand(0).getReg(); const LLT DefTy = MRI.getType(DefReg); const TargetRegisterClass *DefRC = nullptr; if (TargetRegisterInfo::isPhysicalRegister(DefReg)) { DefRC = TRI.getRegClass(DefReg); } else { const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(DefReg); DefRC = RegClassOrBank.dyn_cast(); if (!DefRC) { if (!DefTy.isValid()) { DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n"); return false; } const RegisterBank &RB = *RegClassOrBank.get(); DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI); if (!DefRC) { DEBUG(dbgs() << "PHI operand has unexpected size/bank\n"); return false; } } } I.setDesc(TII.get(TargetOpcode::PHI)); return RBI.constrainGenericRegister(DefReg, *DefRC, MRI); } if (I.isCopy()) return selectCopy(I, TII, MRI, TRI, RBI); return true; } if (I.getNumOperands() != I.getNumExplicitOperands()) { DEBUG(dbgs() << "Generic instruction has unexpected implicit operands\n"); return false; } if (selectImpl(I, CoverageInfo)) return true; LLT Ty = I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{}; switch (Opcode) { case TargetOpcode::G_BRCOND: { if (Ty.getSizeInBits() > 32) { // We shouldn't need this on AArch64, but it would be implemented as an // EXTRACT_SUBREG followed by a TBNZW because TBNZX has no encoding if the // bit being tested is < 32. DEBUG(dbgs() << "G_BRCOND has type: " << Ty << ", expected at most 32-bits"); return false; } const unsigned CondReg = I.getOperand(0).getReg(); MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); if (selectCompareBranch(I, MF, MRI)) return true; auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW)) .addUse(CondReg) .addImm(/*bit offset=*/0) .addMBB(DestMBB); I.eraseFromParent(); return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI); } case TargetOpcode::G_BRINDIRECT: { I.setDesc(TII.get(AArch64::BR)); return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } case TargetOpcode::G_FCONSTANT: case TargetOpcode::G_CONSTANT: { const bool isFP = Opcode == TargetOpcode::G_FCONSTANT; const LLT s32 = LLT::scalar(32); const LLT s64 = LLT::scalar(64); const LLT p0 = LLT::pointer(0, 64); const unsigned DefReg = I.getOperand(0).getReg(); const LLT DefTy = MRI.getType(DefReg); const unsigned DefSize = DefTy.getSizeInBits(); const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); // FIXME: Redundant check, but even less readable when factored out. if (isFP) { if (Ty != s32 && Ty != s64) { DEBUG(dbgs() << "Unable to materialize FP " << Ty << " constant, expected: " << s32 << " or " << s64 << '\n'); return false; } if (RB.getID() != AArch64::FPRRegBankID) { DEBUG(dbgs() << "Unable to materialize FP " << Ty << " constant on bank: " << RB << ", expected: FPR\n"); return false; } // The case when we have 0.0 is covered by tablegen. Reject it here so we // can be sure tablegen works correctly and isn't rescued by this code. if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0)) return false; } else { // s32 and s64 are covered by tablegen. if (Ty != p0) { DEBUG(dbgs() << "Unable to materialize integer " << Ty << " constant, expected: " << s32 << ", " << s64 << ", or " << p0 << '\n'); return false; } if (RB.getID() != AArch64::GPRRegBankID) { DEBUG(dbgs() << "Unable to materialize integer " << Ty << " constant on bank: " << RB << ", expected: GPR\n"); return false; } } const unsigned MovOpc = DefSize == 32 ? AArch64::MOVi32imm : AArch64::MOVi64imm; I.setDesc(TII.get(MovOpc)); if (isFP) { const TargetRegisterClass &GPRRC = DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass; const TargetRegisterClass &FPRRC = DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass; const unsigned DefGPRReg = MRI.createVirtualRegister(&GPRRC); MachineOperand &RegOp = I.getOperand(0); RegOp.setReg(DefGPRReg); BuildMI(MBB, std::next(I.getIterator()), I.getDebugLoc(), TII.get(AArch64::COPY)) .addDef(DefReg) .addUse(DefGPRReg); if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) { DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n"); return false; } MachineOperand &ImmOp = I.getOperand(1); // FIXME: Is going through int64_t always correct? ImmOp.ChangeToImmediate( ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue()); } else if (I.getOperand(1).isCImm()) { uint64_t Val = I.getOperand(1).getCImm()->getZExtValue(); I.getOperand(1).ChangeToImmediate(Val); } else if (I.getOperand(1).isImm()) { uint64_t Val = I.getOperand(1).getImm(); I.getOperand(1).ChangeToImmediate(Val); } constrainSelectedInstRegOperands(I, TII, TRI, RBI); return true; } case TargetOpcode::G_EXTRACT: { LLT SrcTy = MRI.getType(I.getOperand(1).getReg()); + LLT DstTy = MRI.getType(I.getOperand(0).getReg()); + unsigned SrcSize = SrcTy.getSizeInBits(); // Larger extracts are vectors, same-size extracts should be something else // by now (either split up or simplified to a COPY). if (SrcTy.getSizeInBits() > 64 || Ty.getSizeInBits() > 32) return false; - I.setDesc(TII.get(AArch64::UBFMXri)); + I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri)); MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() + Ty.getSizeInBits() - 1); + if (SrcSize < 64) { + assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 && + "unexpected G_EXTRACT types"); + return constrainSelectedInstRegOperands(I, TII, TRI, RBI); + } + unsigned DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64)); BuildMI(MBB, std::next(I.getIterator()), I.getDebugLoc(), TII.get(AArch64::COPY)) .addDef(I.getOperand(0).getReg()) .addUse(DstReg, 0, AArch64::sub_32); RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR32RegClass, MRI); I.getOperand(0).setReg(DstReg); return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } case TargetOpcode::G_INSERT: { LLT SrcTy = MRI.getType(I.getOperand(2).getReg()); + LLT DstTy = MRI.getType(I.getOperand(0).getReg()); + unsigned DstSize = DstTy.getSizeInBits(); + (void)DstSize; // Larger inserts are vectors, same-size ones should be something else by // now (split up or turned into COPYs). if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32) return false; - I.setDesc(TII.get(AArch64::BFMXri)); + I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri)); unsigned LSB = I.getOperand(3).getImm(); unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits(); - I.getOperand(3).setImm((64 - LSB) % 64); + I.getOperand(3).setImm((DstSize - LSB) % DstSize); MachineInstrBuilder(MF, I).addImm(Width - 1); + + if (DstSize < 64) { + assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 && + "unexpected G_INSERT types"); + return constrainSelectedInstRegOperands(I, TII, TRI, RBI); + } unsigned SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64)); BuildMI(MBB, I.getIterator(), I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG)) .addDef(SrcReg) .addImm(0) .addUse(I.getOperand(2).getReg()) .addImm(AArch64::sub_32); RBI.constrainGenericRegister(I.getOperand(2).getReg(), AArch64::GPR32RegClass, MRI); I.getOperand(2).setReg(SrcReg); return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } case TargetOpcode::G_FRAME_INDEX: { // allocas and G_FRAME_INDEX are only supported in addrspace(0). if (Ty != LLT::pointer(0, 64)) { DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty << ", expected: " << LLT::pointer(0, 64) << '\n'); return false; } I.setDesc(TII.get(AArch64::ADDXri)); // MOs for a #0 shifted immediate. I.addOperand(MachineOperand::CreateImm(0)); I.addOperand(MachineOperand::CreateImm(0)); return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } case TargetOpcode::G_GLOBAL_VALUE: { auto GV = I.getOperand(1).getGlobal(); if (GV->isThreadLocal()) { // FIXME: we don't support TLS yet. return false; } unsigned char OpFlags = STI.ClassifyGlobalReference(GV, TM); if (OpFlags & AArch64II::MO_GOT) { I.setDesc(TII.get(AArch64::LOADgot)); I.getOperand(1).setTargetFlags(OpFlags); } else if (TM.getCodeModel() == CodeModel::Large) { // Materialize the global using movz/movk instructions. unsigned MovZDstReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); auto InsertPt = std::next(I.getIterator()); auto MovZ = BuildMI(MBB, InsertPt, I.getDebugLoc(), TII.get(AArch64::MOVZXi)) .addDef(MovZDstReg); MovZ->addOperand(MF, I.getOperand(1)); MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 | AArch64II::MO_NC); MovZ->addOperand(MF, MachineOperand::CreateImm(0)); constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI); auto BuildMovK = [&](unsigned SrcReg, unsigned char Flags, unsigned Offset, unsigned ForceDstReg) { unsigned DstReg = ForceDstReg ? ForceDstReg : MRI.createVirtualRegister(&AArch64::GPR64RegClass); auto MovI = BuildMI(MBB, InsertPt, MovZ->getDebugLoc(), TII.get(AArch64::MOVKXi)) .addDef(DstReg) .addReg(SrcReg); MovI->addOperand(MF, MachineOperand::CreateGA( GV, MovZ->getOperand(1).getOffset(), Flags)); MovI->addOperand(MF, MachineOperand::CreateImm(Offset)); constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI); return DstReg; }; unsigned DstReg = BuildMovK(MovZ->getOperand(0).getReg(), AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0); DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0); BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg()); I.eraseFromParent(); return true; } else { I.setDesc(TII.get(AArch64::MOVaddr)); I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE); MachineInstrBuilder MIB(MF, I); MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(), OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); } return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } case TargetOpcode::G_LOAD: case TargetOpcode::G_STORE: { LLT MemTy = Ty; LLT PtrTy = MRI.getType(I.getOperand(1).getReg()); if (PtrTy != LLT::pointer(0, 64)) { DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy << ", expected: " << LLT::pointer(0, 64) << '\n'); return false; } auto &MemOp = **I.memoperands_begin(); if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) { DEBUG(dbgs() << "Atomic load/store not supported yet\n"); return false; } // FIXME: PR36018: Volatile loads in some cases are incorrectly selected by // folding with an extend. Until we have a G_SEXTLOAD solution bail out if // we hit one. if (Opcode == TargetOpcode::G_LOAD && MemOp.isVolatile()) return false; const unsigned PtrReg = I.getOperand(1).getReg(); #ifndef NDEBUG const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI); // Sanity-check the pointer register. assert(PtrRB.getID() == AArch64::GPRRegBankID && "Load/Store pointer operand isn't a GPR"); assert(MRI.getType(PtrReg).isPointer() && "Load/Store pointer operand isn't a pointer"); #endif const unsigned ValReg = I.getOperand(0).getReg(); const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI); const unsigned NewOpc = selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemTy.getSizeInBits()); if (NewOpc == I.getOpcode()) return false; I.setDesc(TII.get(NewOpc)); uint64_t Offset = 0; auto *PtrMI = MRI.getVRegDef(PtrReg); // Try to fold a GEP into our unsigned immediate addressing mode. if (PtrMI->getOpcode() == TargetOpcode::G_GEP) { if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) { int64_t Imm = *COff; const unsigned Size = MemTy.getSizeInBits() / 8; const unsigned Scale = Log2_32(Size); if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) { unsigned Ptr2Reg = PtrMI->getOperand(1).getReg(); I.getOperand(1).setReg(Ptr2Reg); PtrMI = MRI.getVRegDef(Ptr2Reg); Offset = Imm / Size; } } } // If we haven't folded anything into our addressing mode yet, try to fold // a frame index into the base+offset. if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX) I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex()); I.addOperand(MachineOperand::CreateImm(Offset)); // If we're storing a 0, use WZR/XZR. if (auto CVal = getConstantVRegVal(ValReg, MRI)) { if (*CVal == 0 && Opcode == TargetOpcode::G_STORE) { if (I.getOpcode() == AArch64::STRWui) I.getOperand(0).setReg(AArch64::WZR); else if (I.getOpcode() == AArch64::STRXui) I.getOperand(0).setReg(AArch64::XZR); } } return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } case TargetOpcode::G_SMULH: case TargetOpcode::G_UMULH: { // Reject the various things we don't support yet. if (unsupportedBinOp(I, RBI, MRI, TRI)) return false; const unsigned DefReg = I.getOperand(0).getReg(); const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); if (RB.getID() != AArch64::GPRRegBankID) { DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n"); return false; } if (Ty != LLT::scalar(64)) { DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty << ", expected: " << LLT::scalar(64) << '\n'); return false; } unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr : AArch64::UMULHrr; I.setDesc(TII.get(NewOpc)); // Now that we selected an opcode, we need to constrain the register // operands to use appropriate classes. return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } case TargetOpcode::G_FADD: case TargetOpcode::G_FSUB: case TargetOpcode::G_FMUL: case TargetOpcode::G_FDIV: case TargetOpcode::G_OR: case TargetOpcode::G_SHL: case TargetOpcode::G_LSHR: case TargetOpcode::G_ASHR: case TargetOpcode::G_GEP: { // Reject the various things we don't support yet. if (unsupportedBinOp(I, RBI, MRI, TRI)) return false; const unsigned OpSize = Ty.getSizeInBits(); const unsigned DefReg = I.getOperand(0).getReg(); const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize); if (NewOpc == I.getOpcode()) return false; I.setDesc(TII.get(NewOpc)); // FIXME: Should the type be always reset in setDesc? // Now that we selected an opcode, we need to constrain the register // operands to use appropriate classes. return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } case TargetOpcode::G_PTR_MASK: { uint64_t Align = I.getOperand(2).getImm(); if (Align >= 64 || Align == 0) return false; uint64_t Mask = ~((1ULL << Align) - 1); I.setDesc(TII.get(AArch64::ANDXri)); I.getOperand(2).setImm(AArch64_AM::encodeLogicalImmediate(Mask, 64)); return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } case TargetOpcode::G_PTRTOINT: case TargetOpcode::G_TRUNC: { const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); const LLT SrcTy = MRI.getType(I.getOperand(1).getReg()); const unsigned DstReg = I.getOperand(0).getReg(); const unsigned SrcReg = I.getOperand(1).getReg(); const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); if (DstRB.getID() != SrcRB.getID()) { DEBUG(dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"); return false; } if (DstRB.getID() == AArch64::GPRRegBankID) { const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB, RBI); if (!DstRC) return false; const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB, RBI); if (!SrcRC) return false; if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n"); return false; } if (DstRC == SrcRC) { // Nothing to be done } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) && SrcTy == LLT::scalar(64)) { llvm_unreachable("TableGen can import this case"); return false; } else if (DstRC == &AArch64::GPR32RegClass && SrcRC == &AArch64::GPR64RegClass) { I.getOperand(1).setSubReg(AArch64::sub_32); } else { DEBUG(dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"); return false; } I.setDesc(TII.get(TargetOpcode::COPY)); return true; } else if (DstRB.getID() == AArch64::FPRRegBankID) { if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) { I.setDesc(TII.get(AArch64::XTNv4i16)); constrainSelectedInstRegOperands(I, TII, TRI, RBI); return true; } } return false; } case TargetOpcode::G_ANYEXT: { const unsigned DstReg = I.getOperand(0).getReg(); const unsigned SrcReg = I.getOperand(1).getReg(); const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI); if (RBDst.getID() != AArch64::GPRRegBankID) { DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst << ", expected: GPR\n"); return false; } const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI); if (RBSrc.getID() != AArch64::GPRRegBankID) { DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc << ", expected: GPR\n"); return false; } const unsigned DstSize = MRI.getType(DstReg).getSizeInBits(); if (DstSize == 0) { DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n"); return false; } if (DstSize != 64 && DstSize > 32) { DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize << ", expected: 32 or 64\n"); return false; } // At this point G_ANYEXT is just like a plain COPY, but we need // to explicitly form the 64-bit value if any. if (DstSize > 32) { unsigned ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass); BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG)) .addDef(ExtSrc) .addImm(0) .addUse(SrcReg) .addImm(AArch64::sub_32); I.getOperand(1).setReg(ExtSrc); } return selectCopy(I, TII, MRI, TRI, RBI); } case TargetOpcode::G_ZEXT: case TargetOpcode::G_SEXT: { unsigned Opcode = I.getOpcode(); const LLT DstTy = MRI.getType(I.getOperand(0).getReg()), SrcTy = MRI.getType(I.getOperand(1).getReg()); const bool isSigned = Opcode == TargetOpcode::G_SEXT; const unsigned DefReg = I.getOperand(0).getReg(); const unsigned SrcReg = I.getOperand(1).getReg(); const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); if (RB.getID() != AArch64::GPRRegBankID) { DEBUG(dbgs() << TII.getName(I.getOpcode()) << " on bank: " << RB << ", expected: GPR\n"); return false; } MachineInstr *ExtI; if (DstTy == LLT::scalar(64)) { // FIXME: Can we avoid manually doing this? if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, MRI)) { DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode) << " operand\n"); return false; } const unsigned SrcXReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG)) .addDef(SrcXReg) .addImm(0) .addUse(SrcReg) .addImm(AArch64::sub_32); const unsigned NewOpc = isSigned ? AArch64::SBFMXri : AArch64::UBFMXri; ExtI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc)) .addDef(DefReg) .addUse(SrcXReg) .addImm(0) .addImm(SrcTy.getSizeInBits() - 1); } else if (DstTy.isScalar() && DstTy.getSizeInBits() <= 32) { const unsigned NewOpc = isSigned ? AArch64::SBFMWri : AArch64::UBFMWri; ExtI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc)) .addDef(DefReg) .addUse(SrcReg) .addImm(0) .addImm(SrcTy.getSizeInBits() - 1); } else { return false; } constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); I.eraseFromParent(); return true; } case TargetOpcode::G_SITOFP: case TargetOpcode::G_UITOFP: case TargetOpcode::G_FPTOSI: case TargetOpcode::G_FPTOUI: { const LLT DstTy = MRI.getType(I.getOperand(0).getReg()), SrcTy = MRI.getType(I.getOperand(1).getReg()); const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy); if (NewOpc == Opcode) return false; I.setDesc(TII.get(NewOpc)); constrainSelectedInstRegOperands(I, TII, TRI, RBI); return true; } case TargetOpcode::G_INTTOPTR: // The importer is currently unable to import pointer types since they // didn't exist in SelectionDAG. return selectCopy(I, TII, MRI, TRI, RBI); case TargetOpcode::G_BITCAST: // Imported SelectionDAG rules can handle every bitcast except those that // bitcast from a type to the same type. Ideally, these shouldn't occur // but we might not run an optimizer that deletes them. if (MRI.getType(I.getOperand(0).getReg()) == MRI.getType(I.getOperand(1).getReg())) return selectCopy(I, TII, MRI, TRI, RBI); return false; case TargetOpcode::G_SELECT: { if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) { DEBUG(dbgs() << "G_SELECT cond has type: " << Ty << ", expected: " << LLT::scalar(1) << '\n'); return false; } const unsigned CondReg = I.getOperand(1).getReg(); const unsigned TReg = I.getOperand(2).getReg(); const unsigned FReg = I.getOperand(3).getReg(); unsigned CSelOpc = 0; if (Ty == LLT::scalar(32)) { CSelOpc = AArch64::CSELWr; } else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) { CSelOpc = AArch64::CSELXr; } else { return false; } MachineInstr &TstMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri)) .addDef(AArch64::WZR) .addUse(CondReg) .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); MachineInstr &CSelMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CSelOpc)) .addDef(I.getOperand(0).getReg()) .addUse(TReg) .addUse(FReg) .addImm(AArch64CC::NE); constrainSelectedInstRegOperands(TstMI, TII, TRI, RBI); constrainSelectedInstRegOperands(CSelMI, TII, TRI, RBI); I.eraseFromParent(); return true; } case TargetOpcode::G_ICMP: { if (Ty != LLT::scalar(32)) { DEBUG(dbgs() << "G_ICMP result has type: " << Ty << ", expected: " << LLT::scalar(32) << '\n'); return false; } unsigned CmpOpc = 0; unsigned ZReg = 0; LLT CmpTy = MRI.getType(I.getOperand(2).getReg()); if (CmpTy == LLT::scalar(32)) { CmpOpc = AArch64::SUBSWrr; ZReg = AArch64::WZR; } else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) { CmpOpc = AArch64::SUBSXrr; ZReg = AArch64::XZR; } else { return false; } // CSINC increments the result by one when the condition code is false. // Therefore, we have to invert the predicate to get an increment by 1 when // the predicate is true. const AArch64CC::CondCode invCC = changeICMPPredToAArch64CC(CmpInst::getInversePredicate( (CmpInst::Predicate)I.getOperand(1).getPredicate())); MachineInstr &CmpMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc)) .addDef(ZReg) .addUse(I.getOperand(2).getReg()) .addUse(I.getOperand(3).getReg()); MachineInstr &CSetMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr)) .addDef(I.getOperand(0).getReg()) .addUse(AArch64::WZR) .addUse(AArch64::WZR) .addImm(invCC); constrainSelectedInstRegOperands(CmpMI, TII, TRI, RBI); constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI); I.eraseFromParent(); return true; } case TargetOpcode::G_FCMP: { if (Ty != LLT::scalar(32)) { DEBUG(dbgs() << "G_FCMP result has type: " << Ty << ", expected: " << LLT::scalar(32) << '\n'); return false; } unsigned CmpOpc = 0; LLT CmpTy = MRI.getType(I.getOperand(2).getReg()); if (CmpTy == LLT::scalar(32)) { CmpOpc = AArch64::FCMPSrr; } else if (CmpTy == LLT::scalar(64)) { CmpOpc = AArch64::FCMPDrr; } else { return false; } // FIXME: regbank AArch64CC::CondCode CC1, CC2; changeFCMPPredToAArch64CC( (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2); MachineInstr &CmpMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc)) .addUse(I.getOperand(2).getReg()) .addUse(I.getOperand(3).getReg()); const unsigned DefReg = I.getOperand(0).getReg(); unsigned Def1Reg = DefReg; if (CC2 != AArch64CC::AL) Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); MachineInstr &CSetMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr)) .addDef(Def1Reg) .addUse(AArch64::WZR) .addUse(AArch64::WZR) .addImm(getInvertedCondCode(CC1)); if (CC2 != AArch64CC::AL) { unsigned Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); MachineInstr &CSet2MI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr)) .addDef(Def2Reg) .addUse(AArch64::WZR) .addUse(AArch64::WZR) .addImm(getInvertedCondCode(CC2)); MachineInstr &OrMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr)) .addDef(DefReg) .addUse(Def1Reg) .addUse(Def2Reg); constrainSelectedInstRegOperands(OrMI, TII, TRI, RBI); constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI); } constrainSelectedInstRegOperands(CmpMI, TII, TRI, RBI); constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI); I.eraseFromParent(); return true; } case TargetOpcode::G_VASTART: return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI) : selectVaStartAAPCS(I, MF, MRI); case TargetOpcode::G_IMPLICIT_DEF: I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF)); return true; } return false; } /// SelectArithImmed - Select an immediate value that can be represented as /// a 12-bit value shifted left by either 0 or 12. If so, return true with /// Val set to the 12-bit value and Shift set to the shifter operand. InstructionSelector::ComplexRendererFns AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const { MachineInstr &MI = *Root.getParent(); MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); MachineRegisterInfo &MRI = MF.getRegInfo(); // This function is called from the addsub_shifted_imm ComplexPattern, // which lists [imm] as the list of opcode it's interested in, however // we still need to check whether the operand is actually an immediate // here because the ComplexPattern opcode list is only used in // root-level opcode matching. uint64_t Immed; if (Root.isImm()) Immed = Root.getImm(); else if (Root.isCImm()) Immed = Root.getCImm()->getZExtValue(); else if (Root.isReg()) { MachineInstr *Def = MRI.getVRegDef(Root.getReg()); if (Def->getOpcode() != TargetOpcode::G_CONSTANT) return None; MachineOperand &Op1 = Def->getOperand(1); if (!Op1.isCImm() || Op1.getCImm()->getBitWidth() > 64) return None; Immed = Op1.getCImm()->getZExtValue(); } else return None; unsigned ShiftAmt; if (Immed >> 12 == 0) { ShiftAmt = 0; } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { ShiftAmt = 12; Immed = Immed >> 12; } else return None; unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); return {{ [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); }, [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); }, }}; } /// Select a "register plus unscaled signed 9-bit immediate" address. This /// should only match when there is an offset that is not valid for a scaled /// immediate addressing mode. The "Size" argument is the size in bytes of the /// memory reference, which is needed here to know what is valid for a scaled /// immediate. InstructionSelector::ComplexRendererFns AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root, unsigned Size) const { MachineRegisterInfo &MRI = Root.getParent()->getParent()->getParent()->getRegInfo(); if (!Root.isReg()) return None; if (!isBaseWithConstantOffset(Root, MRI)) return None; MachineInstr *RootDef = MRI.getVRegDef(Root.getReg()); if (!RootDef) return None; MachineOperand &OffImm = RootDef->getOperand(2); if (!OffImm.isReg()) return None; MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg()); if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT) return None; int64_t RHSC; MachineOperand &RHSOp1 = RHS->getOperand(1); if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64) return None; RHSC = RHSOp1.getCImm()->getSExtValue(); // If the offset is valid as a scaled immediate, don't match here. if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size))) return None; if (RHSC >= -256 && RHSC < 256) { MachineOperand &Base = RootDef->getOperand(1); return {{ [=](MachineInstrBuilder &MIB) { MIB.add(Base); }, [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); }, }}; } return None; } /// Select a "register plus scaled unsigned 12-bit immediate" address. The /// "Size" argument is the size in bytes of the memory reference, which /// determines the scale. InstructionSelector::ComplexRendererFns AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root, unsigned Size) const { MachineRegisterInfo &MRI = Root.getParent()->getParent()->getParent()->getRegInfo(); if (!Root.isReg()) return None; MachineInstr *RootDef = MRI.getVRegDef(Root.getReg()); if (!RootDef) return None; if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) { return {{ [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); }, [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, }}; } if (isBaseWithConstantOffset(Root, MRI)) { MachineOperand &LHS = RootDef->getOperand(1); MachineOperand &RHS = RootDef->getOperand(2); MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg()); MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg()); if (LHSDef && RHSDef) { int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue(); unsigned Scale = Log2_32(Size); if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) return {{ [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); }, [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); }, }}; return {{ [=](MachineInstrBuilder &MIB) { MIB.add(LHS); }, [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); }, }}; } } } // Before falling back to our general case, check if the unscaled // instructions can handle this. If so, that's preferable. if (selectAddrModeUnscaled(Root, Size).hasValue()) return None; return {{ [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, }}; } namespace llvm { InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &TM, AArch64Subtarget &Subtarget, AArch64RegisterBankInfo &RBI) { return new AArch64InstructionSelector(TM, Subtarget, RBI); } } Index: head/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- head/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (revision 329982) +++ head/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (revision 329983) @@ -1,4828 +1,4834 @@ //===- SIInstrInfo.cpp - SI Instruction Information ----------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // /// \file /// \brief SI Implementation of TargetInstrInfo. // //===----------------------------------------------------------------------===// #include "SIInstrInfo.h" #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "GCNHazardRecognizer.h" #include "SIDefines.h" #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/LLVMContext.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetMachine.h" #include #include #include #include using namespace llvm; // Must be at least 4 to be able to branch over minimum unconditional branch // code. This is only for making it possible to write reasonably small tests for // long branches. static cl::opt BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)")); SIInstrInfo::SIInstrInfo(const SISubtarget &ST) : AMDGPUInstrInfo(ST), RI(ST), ST(ST) {} //===----------------------------------------------------------------------===// // TargetInstrInfo callbacks //===----------------------------------------------------------------------===// static unsigned getNumOperandsNoGlue(SDNode *Node) { unsigned N = Node->getNumOperands(); while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue) --N; return N; } static SDValue findChainOperand(SDNode *Load) { SDValue LastOp = Load->getOperand(getNumOperandsNoGlue(Load) - 1); assert(LastOp.getValueType() == MVT::Other && "Chain missing from load node"); return LastOp; } /// \brief Returns true if both nodes have the same value for the given /// operand \p Op, or if both nodes do not have this operand. static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) { unsigned Opc0 = N0->getMachineOpcode(); unsigned Opc1 = N1->getMachineOpcode(); int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0, OpName); int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1, OpName); if (Op0Idx == -1 && Op1Idx == -1) return true; if ((Op0Idx == -1 && Op1Idx != -1) || (Op1Idx == -1 && Op0Idx != -1)) return false; // getNamedOperandIdx returns the index for the MachineInstr's operands, // which includes the result as the first operand. We are indexing into the // MachineSDNode's operands, so we need to skip the result operand to get // the real index. --Op0Idx; --Op1Idx; return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx); } bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, AliasAnalysis *AA) const { // TODO: The generic check fails for VALU instructions that should be // rematerializable due to implicit reads of exec. We really want all of the // generic logic for this except for this. switch (MI.getOpcode()) { case AMDGPU::V_MOV_B32_e32: case AMDGPU::V_MOV_B32_e64: case AMDGPU::V_MOV_B64_PSEUDO: return true; default: return false; } } bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, int64_t &Offset1) const { if (!Load0->isMachineOpcode() || !Load1->isMachineOpcode()) return false; unsigned Opc0 = Load0->getMachineOpcode(); unsigned Opc1 = Load1->getMachineOpcode(); // Make sure both are actually loads. if (!get(Opc0).mayLoad() || !get(Opc1).mayLoad()) return false; if (isDS(Opc0) && isDS(Opc1)) { // FIXME: Handle this case: if (getNumOperandsNoGlue(Load0) != getNumOperandsNoGlue(Load1)) return false; // Check base reg. if (Load0->getOperand(1) != Load1->getOperand(1)) return false; // Check chain. if (findChainOperand(Load0) != findChainOperand(Load1)) return false; // Skip read2 / write2 variants for simplicity. // TODO: We should report true if the used offsets are adjacent (excluded // st64 versions). if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::data1) != -1 || AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::data1) != -1) return false; Offset0 = cast(Load0->getOperand(2))->getZExtValue(); Offset1 = cast(Load1->getOperand(2))->getZExtValue(); return true; } if (isSMRD(Opc0) && isSMRD(Opc1)) { // Skip time and cache invalidation instructions. if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::sbase) == -1 || AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::sbase) == -1) return false; assert(getNumOperandsNoGlue(Load0) == getNumOperandsNoGlue(Load1)); // Check base reg. if (Load0->getOperand(0) != Load1->getOperand(0)) return false; const ConstantSDNode *Load0Offset = dyn_cast(Load0->getOperand(1)); const ConstantSDNode *Load1Offset = dyn_cast(Load1->getOperand(1)); if (!Load0Offset || !Load1Offset) return false; // Check chain. if (findChainOperand(Load0) != findChainOperand(Load1)) return false; Offset0 = Load0Offset->getZExtValue(); Offset1 = Load1Offset->getZExtValue(); return true; } // MUBUF and MTBUF can access the same addresses. if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1))) { // MUBUF and MTBUF have vaddr at different indices. if (!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::soffset) || findChainOperand(Load0) != findChainOperand(Load1) || !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::vaddr) || !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::srsrc)) return false; int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset); int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset); if (OffIdx0 == -1 || OffIdx1 == -1) return false; // getNamedOperandIdx returns the index for MachineInstrs. Since they // inlcude the output in the operand list, but SDNodes don't, we need to // subtract the index by one. --OffIdx0; --OffIdx1; SDValue Off0 = Load0->getOperand(OffIdx0); SDValue Off1 = Load1->getOperand(OffIdx1); // The offset might be a FrameIndexSDNode. if (!isa(Off0) || !isa(Off1)) return false; Offset0 = cast(Off0)->getZExtValue(); Offset1 = cast(Off1)->getZExtValue(); return true; } return false; } static bool isStride64(unsigned Opc) { switch (Opc) { case AMDGPU::DS_READ2ST64_B32: case AMDGPU::DS_READ2ST64_B64: case AMDGPU::DS_WRITE2ST64_B32: case AMDGPU::DS_WRITE2ST64_B64: return true; default: return false; } } bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, const TargetRegisterInfo *TRI) const { unsigned Opc = LdSt.getOpcode(); if (isDS(LdSt)) { const MachineOperand *OffsetImm = getNamedOperand(LdSt, AMDGPU::OpName::offset); if (OffsetImm) { // Normal, single offset LDS instruction. const MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::addr); BaseReg = AddrReg->getReg(); Offset = OffsetImm->getImm(); return true; } // The 2 offset instructions use offset0 and offset1 instead. We can treat // these as a load with a single offset if the 2 offsets are consecutive. We // will use this for some partially aligned loads. const MachineOperand *Offset0Imm = getNamedOperand(LdSt, AMDGPU::OpName::offset0); const MachineOperand *Offset1Imm = getNamedOperand(LdSt, AMDGPU::OpName::offset1); uint8_t Offset0 = Offset0Imm->getImm(); uint8_t Offset1 = Offset1Imm->getImm(); if (Offset1 > Offset0 && Offset1 - Offset0 == 1) { // Each of these offsets is in element sized units, so we need to convert // to bytes of the individual reads. unsigned EltSize; if (LdSt.mayLoad()) EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, 0)) / 16; else { assert(LdSt.mayStore()); int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0); EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, Data0Idx)) / 8; } if (isStride64(Opc)) EltSize *= 64; const MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::addr); BaseReg = AddrReg->getReg(); Offset = EltSize * Offset0; return true; } return false; } if (isMUBUF(LdSt) || isMTBUF(LdSt)) { const MachineOperand *SOffset = getNamedOperand(LdSt, AMDGPU::OpName::soffset); if (SOffset && SOffset->isReg()) return false; const MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::vaddr); if (!AddrReg) return false; const MachineOperand *OffsetImm = getNamedOperand(LdSt, AMDGPU::OpName::offset); BaseReg = AddrReg->getReg(); Offset = OffsetImm->getImm(); if (SOffset) // soffset can be an inline immediate. Offset += SOffset->getImm(); return true; } if (isSMRD(LdSt)) { const MachineOperand *OffsetImm = getNamedOperand(LdSt, AMDGPU::OpName::offset); if (!OffsetImm) return false; const MachineOperand *SBaseReg = getNamedOperand(LdSt, AMDGPU::OpName::sbase); BaseReg = SBaseReg->getReg(); Offset = OffsetImm->getImm(); return true; } if (isFLAT(LdSt)) { const MachineOperand *VAddr = getNamedOperand(LdSt, AMDGPU::OpName::vaddr); if (VAddr) { // Can't analyze 2 offsets. if (getNamedOperand(LdSt, AMDGPU::OpName::saddr)) return false; BaseReg = VAddr->getReg(); } else { // scratch instructions have either vaddr or saddr. BaseReg = getNamedOperand(LdSt, AMDGPU::OpName::saddr)->getReg(); } Offset = getNamedOperand(LdSt, AMDGPU::OpName::offset)->getImm(); return true; } return false; } static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, unsigned BaseReg1, const MachineInstr &MI2, unsigned BaseReg2) { if (BaseReg1 == BaseReg2) return true; if (!MI1.hasOneMemOperand() || !MI2.hasOneMemOperand()) return false; auto MO1 = *MI1.memoperands_begin(); auto MO2 = *MI2.memoperands_begin(); if (MO1->getAddrSpace() != MO2->getAddrSpace()) return false; auto Base1 = MO1->getValue(); auto Base2 = MO2->getValue(); if (!Base1 || !Base2) return false; const MachineFunction &MF = *MI1.getParent()->getParent(); const DataLayout &DL = MF.getFunction().getParent()->getDataLayout(); Base1 = GetUnderlyingObject(Base1, DL); Base2 = GetUnderlyingObject(Base1, DL); if (isa(Base1) || isa(Base2)) return false; return Base1 == Base2; } bool SIInstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt, unsigned BaseReg1, MachineInstr &SecondLdSt, unsigned BaseReg2, unsigned NumLoads) const { if (!memOpsHaveSameBasePtr(FirstLdSt, BaseReg1, SecondLdSt, BaseReg2)) return false; const MachineOperand *FirstDst = nullptr; const MachineOperand *SecondDst = nullptr; if ((isMUBUF(FirstLdSt) && isMUBUF(SecondLdSt)) || (isMTBUF(FirstLdSt) && isMTBUF(SecondLdSt)) || (isFLAT(FirstLdSt) && isFLAT(SecondLdSt))) { const unsigned MaxGlobalLoadCluster = 6; if (NumLoads > MaxGlobalLoadCluster) return false; FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdata); if (!FirstDst) FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdst); SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdata); if (!SecondDst) SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdst); } else if (isSMRD(FirstLdSt) && isSMRD(SecondLdSt)) { FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::sdst); SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::sdst); } else if (isDS(FirstLdSt) && isDS(SecondLdSt)) { FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdst); SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdst); } if (!FirstDst || !SecondDst) return false; // Try to limit clustering based on the total number of bytes loaded // rather than the number of instructions. This is done to help reduce // register pressure. The method used is somewhat inexact, though, // because it assumes that all loads in the cluster will load the // same number of bytes as FirstLdSt. // The unit of this value is bytes. // FIXME: This needs finer tuning. unsigned LoadClusterThreshold = 16; const MachineRegisterInfo &MRI = FirstLdSt.getParent()->getParent()->getRegInfo(); const TargetRegisterClass *DstRC = MRI.getRegClass(FirstDst->getReg()); return (NumLoads * (RI.getRegSizeInBits(*DstRC) / 8)) <= LoadClusterThreshold; } static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) { MachineFunction *MF = MBB.getParent(); DiagnosticInfoUnsupported IllegalCopy(MF->getFunction(), "illegal SGPR to VGPR copy", DL, DS_Error); LLVMContext &C = MF->getFunction().getContext(); C.diagnose(IllegalCopy); BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_ILLEGAL_COPY), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); } void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { const TargetRegisterClass *RC = RI.getPhysRegClass(DestReg); if (RC == &AMDGPU::VGPR_32RegClass) { assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) || AMDGPU::SReg_32RegClass.contains(SrcReg)); BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); return; } if (RC == &AMDGPU::SReg_32_XM0RegClass || RC == &AMDGPU::SReg_32RegClass) { if (SrcReg == AMDGPU::SCC) { BuildMI(MBB, MI, DL, get(AMDGPU::S_CSELECT_B32), DestReg) .addImm(-1) .addImm(0); return; } if (!AMDGPU::SReg_32RegClass.contains(SrcReg)) { reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc); return; } BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); return; } if (RC == &AMDGPU::SReg_64RegClass) { if (DestReg == AMDGPU::VCC) { if (AMDGPU::SReg_64RegClass.contains(SrcReg)) { BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC) .addReg(SrcReg, getKillRegState(KillSrc)); } else { // FIXME: Hack until VReg_1 removed. assert(AMDGPU::VGPR_32RegClass.contains(SrcReg)); BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32)) .addImm(0) .addReg(SrcReg, getKillRegState(KillSrc)); } return; } if (!AMDGPU::SReg_64RegClass.contains(SrcReg)) { reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc); return; } BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); return; } if (DestReg == AMDGPU::SCC) { assert(AMDGPU::SReg_32RegClass.contains(SrcReg)); BuildMI(MBB, MI, DL, get(AMDGPU::S_CMP_LG_U32)) .addReg(SrcReg, getKillRegState(KillSrc)) .addImm(0); return; } unsigned EltSize = 4; unsigned Opcode = AMDGPU::V_MOV_B32_e32; if (RI.isSGPRClass(RC)) { if (RI.getRegSizeInBits(*RC) > 32) { Opcode = AMDGPU::S_MOV_B64; EltSize = 8; } else { Opcode = AMDGPU::S_MOV_B32; EltSize = 4; } if (!RI.isSGPRClass(RI.getPhysRegClass(SrcReg))) { reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc); return; } } ArrayRef SubIndices = RI.getRegSplitParts(RC, EltSize); bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg); for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) { unsigned SubIdx; if (Forward) SubIdx = SubIndices[Idx]; else SubIdx = SubIndices[SubIndices.size() - Idx - 1]; MachineInstrBuilder Builder = BuildMI(MBB, MI, DL, get(Opcode), RI.getSubReg(DestReg, SubIdx)); Builder.addReg(RI.getSubReg(SrcReg, SubIdx)); if (Idx == 0) Builder.addReg(DestReg, RegState::Define | RegState::Implicit); bool UseKill = KillSrc && Idx == SubIndices.size() - 1; Builder.addReg(SrcReg, getKillRegState(UseKill) | RegState::Implicit); } } int SIInstrInfo::commuteOpcode(unsigned Opcode) const { int NewOpc; // Try to map original to commuted opcode NewOpc = AMDGPU::getCommuteRev(Opcode); if (NewOpc != -1) // Check if the commuted (REV) opcode exists on the target. return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1; // Try to map commuted to original opcode NewOpc = AMDGPU::getCommuteOrig(Opcode); if (NewOpc != -1) // Check if the original (non-REV) opcode exists on the target. return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1; return Opcode; } void SIInstrInfo::materializeImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned DestReg, int64_t Value) const { MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); const TargetRegisterClass *RegClass = MRI.getRegClass(DestReg); if (RegClass == &AMDGPU::SReg_32RegClass || RegClass == &AMDGPU::SGPR_32RegClass || RegClass == &AMDGPU::SReg_32_XM0RegClass || RegClass == &AMDGPU::SReg_32_XM0_XEXECRegClass) { BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg) .addImm(Value); return; } if (RegClass == &AMDGPU::SReg_64RegClass || RegClass == &AMDGPU::SGPR_64RegClass || RegClass == &AMDGPU::SReg_64_XEXECRegClass) { BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg) .addImm(Value); return; } if (RegClass == &AMDGPU::VGPR_32RegClass) { BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg) .addImm(Value); return; } if (RegClass == &AMDGPU::VReg_64RegClass) { BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO), DestReg) .addImm(Value); return; } unsigned EltSize = 4; unsigned Opcode = AMDGPU::V_MOV_B32_e32; if (RI.isSGPRClass(RegClass)) { if (RI.getRegSizeInBits(*RegClass) > 32) { Opcode = AMDGPU::S_MOV_B64; EltSize = 8; } else { Opcode = AMDGPU::S_MOV_B32; EltSize = 4; } } ArrayRef SubIndices = RI.getRegSplitParts(RegClass, EltSize); for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) { int64_t IdxValue = Idx == 0 ? Value : 0; MachineInstrBuilder Builder = BuildMI(MBB, MI, DL, get(Opcode), RI.getSubReg(DestReg, Idx)); Builder.addImm(IdxValue); } } const TargetRegisterClass * SIInstrInfo::getPreferredSelectRegClass(unsigned Size) const { return &AMDGPU::VGPR_32RegClass; } void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, unsigned DstReg, ArrayRef Cond, unsigned TrueReg, unsigned FalseReg) const { MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); assert(MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass && "Not a VGPR32 reg"); if (Cond.size() == 1) { unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) .add(Cond[0]); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) .addReg(FalseReg) .addReg(TrueReg) .addReg(SReg); } else if (Cond.size() == 2) { assert(Cond[0].isImm() && "Cond[0] is not an immediate"); switch (Cond[0].getImm()) { case SIInstrInfo::SCC_TRUE: { unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg) .addImm(-1) .addImm(0); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) .addReg(FalseReg) .addReg(TrueReg) .addReg(SReg); break; } case SIInstrInfo::SCC_FALSE: { unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg) .addImm(0) .addImm(-1); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) .addReg(FalseReg) .addReg(TrueReg) .addReg(SReg); break; } case SIInstrInfo::VCCNZ: { MachineOperand RegOp = Cond[1]; RegOp.setImplicit(false); unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) .add(RegOp); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) .addReg(FalseReg) .addReg(TrueReg) .addReg(SReg); break; } case SIInstrInfo::VCCZ: { MachineOperand RegOp = Cond[1]; RegOp.setImplicit(false); unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) .add(RegOp); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) .addReg(TrueReg) .addReg(FalseReg) .addReg(SReg); break; } case SIInstrInfo::EXECNZ: { unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); unsigned SReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); BuildMI(MBB, I, DL, get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2) .addImm(0); BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg) .addImm(-1) .addImm(0); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) .addReg(FalseReg) .addReg(TrueReg) .addReg(SReg); break; } case SIInstrInfo::EXECZ: { unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); unsigned SReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); BuildMI(MBB, I, DL, get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2) .addImm(0); BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg) .addImm(0) .addImm(-1); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) .addReg(FalseReg) .addReg(TrueReg) .addReg(SReg); llvm_unreachable("Unhandled branch predicate EXECZ"); break; } default: llvm_unreachable("invalid branch predicate"); } } else { llvm_unreachable("Can only handle Cond size 1 or 2"); } } unsigned SIInstrInfo::insertEQ(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, unsigned SrcReg, int Value) const { MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_EQ_I32_e64), Reg) .addImm(Value) .addReg(SrcReg); return Reg; } unsigned SIInstrInfo::insertNE(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, unsigned SrcReg, int Value) const { MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_NE_I32_e64), Reg) .addImm(Value) .addReg(SrcReg); return Reg; } unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const { if (RI.getRegSizeInBits(*DstRC) == 32) { return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; } else if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC)) { return AMDGPU::S_MOV_B64; } else if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC)) { return AMDGPU::V_MOV_B64_PSEUDO; } return AMDGPU::COPY; } static unsigned getSGPRSpillSaveOpcode(unsigned Size) { switch (Size) { case 4: return AMDGPU::SI_SPILL_S32_SAVE; case 8: return AMDGPU::SI_SPILL_S64_SAVE; case 16: return AMDGPU::SI_SPILL_S128_SAVE; case 32: return AMDGPU::SI_SPILL_S256_SAVE; case 64: return AMDGPU::SI_SPILL_S512_SAVE; default: llvm_unreachable("unknown register size"); } } static unsigned getVGPRSpillSaveOpcode(unsigned Size) { switch (Size) { case 4: return AMDGPU::SI_SPILL_V32_SAVE; case 8: return AMDGPU::SI_SPILL_V64_SAVE; case 12: return AMDGPU::SI_SPILL_V96_SAVE; case 16: return AMDGPU::SI_SPILL_V128_SAVE; case 32: return AMDGPU::SI_SPILL_V256_SAVE; case 64: return AMDGPU::SI_SPILL_V512_SAVE; default: llvm_unreachable("unknown register size"); } } void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { MachineFunction *MF = MBB.getParent(); SIMachineFunctionInfo *MFI = MF->getInfo(); MachineFrameInfo &FrameInfo = MF->getFrameInfo(); DebugLoc DL = MBB.findDebugLoc(MI); assert(SrcReg != MFI->getStackPtrOffsetReg() && SrcReg != MFI->getFrameOffsetReg() && SrcReg != MFI->getScratchWaveOffsetReg()); unsigned Size = FrameInfo.getObjectSize(FrameIndex); unsigned Align = FrameInfo.getObjectAlignment(FrameIndex); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(*MF, FrameIndex); MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, Size, Align); unsigned SpillSize = TRI->getSpillSize(*RC); if (RI.isSGPRClass(RC)) { MFI->setHasSpilledSGPRs(); // We are only allowed to create one new instruction when spilling // registers, so we need to use pseudo instruction for spilling SGPRs. const MCInstrDesc &OpDesc = get(getSGPRSpillSaveOpcode(SpillSize)); // The SGPR spill/restore instructions only work on number sgprs, so we need // to make sure we are using the correct register class. if (TargetRegisterInfo::isVirtualRegister(SrcReg) && SpillSize == 4) { MachineRegisterInfo &MRI = MF->getRegInfo(); MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0RegClass); } MachineInstrBuilder Spill = BuildMI(MBB, MI, DL, OpDesc) .addReg(SrcReg, getKillRegState(isKill)) // data .addFrameIndex(FrameIndex) // addr .addMemOperand(MMO) .addReg(MFI->getScratchRSrcReg(), RegState::Implicit) .addReg(MFI->getFrameOffsetReg(), RegState::Implicit); // Add the scratch resource registers as implicit uses because we may end up // needing them, and need to ensure that the reserved registers are // correctly handled. FrameInfo.setStackID(FrameIndex, 1); if (ST.hasScalarStores()) { // m0 is used for offset to scalar stores if used to spill. Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine | RegState::Dead); } return; } if (!ST.isVGPRSpillingEnabled(MF->getFunction())) { LLVMContext &Ctx = MF->getFunction().getContext(); Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to" " spill register"); BuildMI(MBB, MI, DL, get(AMDGPU::KILL)) .addReg(SrcReg); return; } assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected"); unsigned Opcode = getVGPRSpillSaveOpcode(SpillSize); MFI->setHasSpilledVGPRs(); BuildMI(MBB, MI, DL, get(Opcode)) .addReg(SrcReg, getKillRegState(isKill)) // data .addFrameIndex(FrameIndex) // addr .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc .addReg(MFI->getFrameOffsetReg()) // scratch_offset .addImm(0) // offset .addMemOperand(MMO); } static unsigned getSGPRSpillRestoreOpcode(unsigned Size) { switch (Size) { case 4: return AMDGPU::SI_SPILL_S32_RESTORE; case 8: return AMDGPU::SI_SPILL_S64_RESTORE; case 16: return AMDGPU::SI_SPILL_S128_RESTORE; case 32: return AMDGPU::SI_SPILL_S256_RESTORE; case 64: return AMDGPU::SI_SPILL_S512_RESTORE; default: llvm_unreachable("unknown register size"); } } static unsigned getVGPRSpillRestoreOpcode(unsigned Size) { switch (Size) { case 4: return AMDGPU::SI_SPILL_V32_RESTORE; case 8: return AMDGPU::SI_SPILL_V64_RESTORE; case 12: return AMDGPU::SI_SPILL_V96_RESTORE; case 16: return AMDGPU::SI_SPILL_V128_RESTORE; case 32: return AMDGPU::SI_SPILL_V256_RESTORE; case 64: return AMDGPU::SI_SPILL_V512_RESTORE; default: llvm_unreachable("unknown register size"); } } void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { MachineFunction *MF = MBB.getParent(); const SIMachineFunctionInfo *MFI = MF->getInfo(); MachineFrameInfo &FrameInfo = MF->getFrameInfo(); DebugLoc DL = MBB.findDebugLoc(MI); unsigned Align = FrameInfo.getObjectAlignment(FrameIndex); unsigned Size = FrameInfo.getObjectSize(FrameIndex); unsigned SpillSize = TRI->getSpillSize(*RC); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(*MF, FrameIndex); MachineMemOperand *MMO = MF->getMachineMemOperand( PtrInfo, MachineMemOperand::MOLoad, Size, Align); if (RI.isSGPRClass(RC)) { // FIXME: Maybe this should not include a memoperand because it will be // lowered to non-memory instructions. const MCInstrDesc &OpDesc = get(getSGPRSpillRestoreOpcode(SpillSize)); if (TargetRegisterInfo::isVirtualRegister(DestReg) && SpillSize == 4) { MachineRegisterInfo &MRI = MF->getRegInfo(); MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0RegClass); } FrameInfo.setStackID(FrameIndex, 1); MachineInstrBuilder Spill = BuildMI(MBB, MI, DL, OpDesc, DestReg) .addFrameIndex(FrameIndex) // addr .addMemOperand(MMO) .addReg(MFI->getScratchRSrcReg(), RegState::Implicit) .addReg(MFI->getFrameOffsetReg(), RegState::Implicit); if (ST.hasScalarStores()) { // m0 is used for offset to scalar stores if used to spill. Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine | RegState::Dead); } return; } if (!ST.isVGPRSpillingEnabled(MF->getFunction())) { LLVMContext &Ctx = MF->getFunction().getContext(); Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to" " restore register"); BuildMI(MBB, MI, DL, get(AMDGPU::IMPLICIT_DEF), DestReg); return; } assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected"); unsigned Opcode = getVGPRSpillRestoreOpcode(SpillSize); BuildMI(MBB, MI, DL, get(Opcode), DestReg) .addFrameIndex(FrameIndex) // vaddr .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc .addReg(MFI->getFrameOffsetReg()) // scratch_offset .addImm(0) // offset .addMemOperand(MMO); } /// \param @Offset Offset in bytes of the FrameIndex being spilled unsigned SIInstrInfo::calculateLDSSpillAddress( MachineBasicBlock &MBB, MachineInstr &MI, RegScavenger *RS, unsigned TmpReg, unsigned FrameOffset, unsigned Size) const { MachineFunction *MF = MBB.getParent(); SIMachineFunctionInfo *MFI = MF->getInfo(); const SISubtarget &ST = MF->getSubtarget(); DebugLoc DL = MBB.findDebugLoc(MI); unsigned WorkGroupSize = MFI->getMaxFlatWorkGroupSize(); unsigned WavefrontSize = ST.getWavefrontSize(); unsigned TIDReg = MFI->getTIDReg(); if (!MFI->hasCalculatedTID()) { MachineBasicBlock &Entry = MBB.getParent()->front(); MachineBasicBlock::iterator Insert = Entry.front(); DebugLoc DL = Insert->getDebugLoc(); TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass, *MF); if (TIDReg == AMDGPU::NoRegister) return TIDReg; if (!AMDGPU::isShader(MF->getFunction().getCallingConv()) && WorkGroupSize > WavefrontSize) { unsigned TIDIGXReg = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_X); unsigned TIDIGYReg = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_Y); unsigned TIDIGZReg = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_Z); unsigned InputPtrReg = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR); for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) { if (!Entry.isLiveIn(Reg)) Entry.addLiveIn(Reg); } RS->enterBasicBlock(Entry); // FIXME: Can we scavenge an SReg_64 and access the subregs? unsigned STmp0 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0); unsigned STmp1 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0); BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp0) .addReg(InputPtrReg) .addImm(SI::KernelInputOffsets::NGROUPS_Z); BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp1) .addReg(InputPtrReg) .addImm(SI::KernelInputOffsets::NGROUPS_Y); // NGROUPS.X * NGROUPS.Y BuildMI(Entry, Insert, DL, get(AMDGPU::S_MUL_I32), STmp1) .addReg(STmp1) .addReg(STmp0); // (NGROUPS.X * NGROUPS.Y) * TIDIG.X BuildMI(Entry, Insert, DL, get(AMDGPU::V_MUL_U32_U24_e32), TIDReg) .addReg(STmp1) .addReg(TIDIGXReg); // NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X) BuildMI(Entry, Insert, DL, get(AMDGPU::V_MAD_U32_U24), TIDReg) .addReg(STmp0) .addReg(TIDIGYReg) .addReg(TIDReg); // (NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)) + TIDIG.Z getAddNoCarry(Entry, Insert, DL, TIDReg) .addReg(TIDReg) .addReg(TIDIGZReg); } else { // Get the wave id BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_LO_U32_B32_e64), TIDReg) .addImm(-1) .addImm(0); BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_HI_U32_B32_e64), TIDReg) .addImm(-1) .addReg(TIDReg); } BuildMI(Entry, Insert, DL, get(AMDGPU::V_LSHLREV_B32_e32), TIDReg) .addImm(2) .addReg(TIDReg); MFI->setTIDReg(TIDReg); } // Add FrameIndex to LDS offset unsigned LDSOffset = MFI->getLDSSize() + (FrameOffset * WorkGroupSize); getAddNoCarry(MBB, MI, DL, TmpReg) .addImm(LDSOffset) .addReg(TIDReg); return TmpReg; } void SIInstrInfo::insertWaitStates(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, int Count) const { DebugLoc DL = MBB.findDebugLoc(MI); while (Count > 0) { int Arg; if (Count >= 8) Arg = 7; else Arg = Count - 1; Count -= 8; BuildMI(MBB, MI, DL, get(AMDGPU::S_NOP)) .addImm(Arg); } } void SIInstrInfo::insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const { insertWaitStates(MBB, MI, 1); } void SIInstrInfo::insertReturn(MachineBasicBlock &MBB) const { auto MF = MBB.getParent(); SIMachineFunctionInfo *Info = MF->getInfo(); assert(Info->isEntryFunction()); if (MBB.succ_empty()) { bool HasNoTerminator = MBB.getFirstTerminator() == MBB.end(); if (HasNoTerminator) BuildMI(MBB, MBB.end(), DebugLoc(), get(Info->returnsVoid() ? AMDGPU::S_ENDPGM : AMDGPU::SI_RETURN_TO_EPILOG)); } } unsigned SIInstrInfo::getNumWaitStates(const MachineInstr &MI) const { switch (MI.getOpcode()) { default: return 1; // FIXME: Do wait states equal cycles? case AMDGPU::S_NOP: return MI.getOperand(0).getImm() + 1; } } bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MachineBasicBlock &MBB = *MI.getParent(); DebugLoc DL = MBB.findDebugLoc(MI); switch (MI.getOpcode()) { default: return AMDGPUInstrInfo::expandPostRAPseudo(MI); case AMDGPU::S_MOV_B64_term: // This is only a terminator to get the correct spill code placement during // register allocation. MI.setDesc(get(AMDGPU::S_MOV_B64)); break; case AMDGPU::S_XOR_B64_term: // This is only a terminator to get the correct spill code placement during // register allocation. MI.setDesc(get(AMDGPU::S_XOR_B64)); break; case AMDGPU::S_ANDN2_B64_term: // This is only a terminator to get the correct spill code placement during // register allocation. MI.setDesc(get(AMDGPU::S_ANDN2_B64)); break; case AMDGPU::V_MOV_B64_PSEUDO: { unsigned Dst = MI.getOperand(0).getReg(); unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0); unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1); const MachineOperand &SrcOp = MI.getOperand(1); // FIXME: Will this work for 64-bit floating point immediates? assert(!SrcOp.isFPImm()); if (SrcOp.isImm()) { APInt Imm(64, SrcOp.getImm()); BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo) .addImm(Imm.getLoBits(32).getZExtValue()) .addReg(Dst, RegState::Implicit | RegState::Define); BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi) .addImm(Imm.getHiBits(32).getZExtValue()) .addReg(Dst, RegState::Implicit | RegState::Define); } else { assert(SrcOp.isReg()); BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo) .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub0)) .addReg(Dst, RegState::Implicit | RegState::Define); BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi) .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub1)) .addReg(Dst, RegState::Implicit | RegState::Define); } MI.eraseFromParent(); break; } case AMDGPU::V_SET_INACTIVE_B32: { BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC) .addReg(AMDGPU::EXEC); BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), MI.getOperand(0).getReg()) .add(MI.getOperand(2)); BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC) .addReg(AMDGPU::EXEC); MI.eraseFromParent(); break; } case AMDGPU::V_SET_INACTIVE_B64: { BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC) .addReg(AMDGPU::EXEC); MachineInstr *Copy = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO), MI.getOperand(0).getReg()) .add(MI.getOperand(2)); expandPostRAPseudo(*Copy); BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC) .addReg(AMDGPU::EXEC); MI.eraseFromParent(); break; } case AMDGPU::V_MOVRELD_B32_V1: case AMDGPU::V_MOVRELD_B32_V2: case AMDGPU::V_MOVRELD_B32_V4: case AMDGPU::V_MOVRELD_B32_V8: case AMDGPU::V_MOVRELD_B32_V16: { const MCInstrDesc &MovRelDesc = get(AMDGPU::V_MOVRELD_B32_e32); unsigned VecReg = MI.getOperand(0).getReg(); bool IsUndef = MI.getOperand(1).isUndef(); unsigned SubReg = AMDGPU::sub0 + MI.getOperand(3).getImm(); assert(VecReg == MI.getOperand(1).getReg()); MachineInstr *MovRel = BuildMI(MBB, MI, DL, MovRelDesc) .addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef) .add(MI.getOperand(2)) .addReg(VecReg, RegState::ImplicitDefine) .addReg(VecReg, RegState::Implicit | (IsUndef ? RegState::Undef : 0)); const int ImpDefIdx = MovRelDesc.getNumOperands() + MovRelDesc.getNumImplicitUses(); const int ImpUseIdx = ImpDefIdx + 1; MovRel->tieOperands(ImpDefIdx, ImpUseIdx); MI.eraseFromParent(); break; } case AMDGPU::SI_PC_ADD_REL_OFFSET: { MachineFunction &MF = *MBB.getParent(); unsigned Reg = MI.getOperand(0).getReg(); unsigned RegLo = RI.getSubReg(Reg, AMDGPU::sub0); unsigned RegHi = RI.getSubReg(Reg, AMDGPU::sub1); // Create a bundle so these instructions won't be re-ordered by the // post-RA scheduler. MIBundleBuilder Bundler(MBB, MI); Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_GETPC_B64), Reg)); // Add 32-bit offset from this instruction to the start of the // constant data. Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADD_U32), RegLo) .addReg(RegLo) .add(MI.getOperand(1))); MachineInstrBuilder MIB = BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi) .addReg(RegHi); if (MI.getOperand(2).getTargetFlags() == SIInstrInfo::MO_NONE) MIB.addImm(0); else MIB.add(MI.getOperand(2)); Bundler.append(MIB); finalizeBundle(MBB, Bundler.begin()); MI.eraseFromParent(); break; } case AMDGPU::EXIT_WWM: { // This only gets its own opcode so that SIFixWWMLiveness can tell when WWM // is exited. MI.setDesc(get(AMDGPU::S_MOV_B64)); break; } } return true; } bool SIInstrInfo::swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, unsigned Src0OpName, MachineOperand &Src1, unsigned Src1OpName) const { MachineOperand *Src0Mods = getNamedOperand(MI, Src0OpName); if (!Src0Mods) return false; MachineOperand *Src1Mods = getNamedOperand(MI, Src1OpName); assert(Src1Mods && "All commutable instructions have both src0 and src1 modifiers"); int Src0ModsVal = Src0Mods->getImm(); int Src1ModsVal = Src1Mods->getImm(); Src1Mods->setImm(Src0ModsVal); Src0Mods->setImm(Src1ModsVal); return true; } static MachineInstr *swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp) { unsigned Reg = RegOp.getReg(); unsigned SubReg = RegOp.getSubReg(); bool IsKill = RegOp.isKill(); bool IsDead = RegOp.isDead(); bool IsUndef = RegOp.isUndef(); bool IsDebug = RegOp.isDebug(); if (NonRegOp.isImm()) RegOp.ChangeToImmediate(NonRegOp.getImm()); else if (NonRegOp.isFI()) RegOp.ChangeToFrameIndex(NonRegOp.getIndex()); else return nullptr; NonRegOp.ChangeToRegister(Reg, false, false, IsKill, IsDead, IsUndef, IsDebug); NonRegOp.setSubReg(SubReg); return &MI; } MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned Src0Idx, unsigned Src1Idx) const { assert(!NewMI && "this should never be used"); unsigned Opc = MI.getOpcode(); int CommutedOpcode = commuteOpcode(Opc); if (CommutedOpcode == -1) return nullptr; assert(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0) == static_cast(Src0Idx) && AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1) == static_cast(Src1Idx) && "inconsistency with findCommutedOpIndices"); MachineOperand &Src0 = MI.getOperand(Src0Idx); MachineOperand &Src1 = MI.getOperand(Src1Idx); MachineInstr *CommutedMI = nullptr; if (Src0.isReg() && Src1.isReg()) { if (isOperandLegal(MI, Src1Idx, &Src0)) { // Be sure to copy the source modifiers to the right place. CommutedMI = TargetInstrInfo::commuteInstructionImpl(MI, NewMI, Src0Idx, Src1Idx); } } else if (Src0.isReg() && !Src1.isReg()) { // src0 should always be able to support any operand type, so no need to // check operand legality. CommutedMI = swapRegAndNonRegOperand(MI, Src0, Src1); } else if (!Src0.isReg() && Src1.isReg()) { if (isOperandLegal(MI, Src1Idx, &Src0)) CommutedMI = swapRegAndNonRegOperand(MI, Src1, Src0); } else { // FIXME: Found two non registers to commute. This does happen. return nullptr; } if (CommutedMI) { swapSourceModifiers(MI, Src0, AMDGPU::OpName::src0_modifiers, Src1, AMDGPU::OpName::src1_modifiers); CommutedMI->setDesc(get(CommutedOpcode)); } return CommutedMI; } // This needs to be implemented because the source modifiers may be inserted // between the true commutable operands, and the base // TargetInstrInfo::commuteInstruction uses it. bool SIInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const { if (!MI.isCommutable()) return false; unsigned Opc = MI.getOpcode(); int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); if (Src0Idx == -1) return false; int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); if (Src1Idx == -1) return false; return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx); } bool SIInstrInfo::isBranchOffsetInRange(unsigned BranchOp, int64_t BrOffset) const { // BranchRelaxation should never have to check s_setpc_b64 because its dest // block is unanalyzable. assert(BranchOp != AMDGPU::S_SETPC_B64); // Convert to dwords. BrOffset /= 4; // The branch instructions do PC += signext(SIMM16 * 4) + 4, so the offset is // from the next instruction. BrOffset -= 1; return isIntN(BranchOffsetBits, BrOffset); } MachineBasicBlock *SIInstrInfo::getBranchDestBlock( const MachineInstr &MI) const { if (MI.getOpcode() == AMDGPU::S_SETPC_B64) { // This would be a difficult analysis to perform, but can always be legal so // there's no need to analyze it. return nullptr; } return MI.getOperand(0).getMBB(); } unsigned SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &DestBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const { assert(RS && "RegScavenger required for long branching"); assert(MBB.empty() && "new block should be inserted for expanding unconditional branch"); assert(MBB.pred_size() == 1); MachineFunction *MF = MBB.getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); // FIXME: Virtual register workaround for RegScavenger not working with empty // blocks. unsigned PCReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); auto I = MBB.end(); // We need to compute the offset relative to the instruction immediately after // s_getpc_b64. Insert pc arithmetic code before last terminator. MachineInstr *GetPC = BuildMI(MBB, I, DL, get(AMDGPU::S_GETPC_B64), PCReg); // TODO: Handle > 32-bit block address. if (BrOffset >= 0) { BuildMI(MBB, I, DL, get(AMDGPU::S_ADD_U32)) .addReg(PCReg, RegState::Define, AMDGPU::sub0) .addReg(PCReg, 0, AMDGPU::sub0) .addMBB(&DestBB, AMDGPU::TF_LONG_BRANCH_FORWARD); BuildMI(MBB, I, DL, get(AMDGPU::S_ADDC_U32)) .addReg(PCReg, RegState::Define, AMDGPU::sub1) .addReg(PCReg, 0, AMDGPU::sub1) .addImm(0); } else { // Backwards branch. BuildMI(MBB, I, DL, get(AMDGPU::S_SUB_U32)) .addReg(PCReg, RegState::Define, AMDGPU::sub0) .addReg(PCReg, 0, AMDGPU::sub0) .addMBB(&DestBB, AMDGPU::TF_LONG_BRANCH_BACKWARD); BuildMI(MBB, I, DL, get(AMDGPU::S_SUBB_U32)) .addReg(PCReg, RegState::Define, AMDGPU::sub1) .addReg(PCReg, 0, AMDGPU::sub1) .addImm(0); } // Insert the indirect branch after the other terminator. BuildMI(&MBB, DL, get(AMDGPU::S_SETPC_B64)) .addReg(PCReg); // FIXME: If spilling is necessary, this will fail because this scavenger has // no emergency stack slots. It is non-trivial to spill in this situation, // because the restore code needs to be specially placed after the // jump. BranchRelaxation then needs to be made aware of the newly inserted // block. // // If a spill is needed for the pc register pair, we need to insert a spill // restore block right before the destination block, and insert a short branch // into the old destination block's fallthrough predecessor. // e.g.: // // s_cbranch_scc0 skip_long_branch: // // long_branch_bb: // spill s[8:9] // s_getpc_b64 s[8:9] // s_add_u32 s8, s8, restore_bb // s_addc_u32 s9, s9, 0 // s_setpc_b64 s[8:9] // // skip_long_branch: // foo; // // ..... // // dest_bb_fallthrough_predecessor: // bar; // s_branch dest_bb // // restore_bb: // restore s[8:9] // fallthrough dest_bb /// // dest_bb: // buzz; RS->enterBasicBlockEnd(MBB); unsigned Scav = RS->scavengeRegister(&AMDGPU::SReg_64RegClass, MachineBasicBlock::iterator(GetPC), 0); MRI.replaceRegWith(PCReg, Scav); MRI.clearVirtRegs(); RS->setRegUsed(Scav); return 4 + 8 + 4 + 4; } unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate Cond) { switch (Cond) { case SIInstrInfo::SCC_TRUE: return AMDGPU::S_CBRANCH_SCC1; case SIInstrInfo::SCC_FALSE: return AMDGPU::S_CBRANCH_SCC0; case SIInstrInfo::VCCNZ: return AMDGPU::S_CBRANCH_VCCNZ; case SIInstrInfo::VCCZ: return AMDGPU::S_CBRANCH_VCCZ; case SIInstrInfo::EXECNZ: return AMDGPU::S_CBRANCH_EXECNZ; case SIInstrInfo::EXECZ: return AMDGPU::S_CBRANCH_EXECZ; default: llvm_unreachable("invalid branch predicate"); } } SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(unsigned Opcode) { switch (Opcode) { case AMDGPU::S_CBRANCH_SCC0: return SCC_FALSE; case AMDGPU::S_CBRANCH_SCC1: return SCC_TRUE; case AMDGPU::S_CBRANCH_VCCNZ: return VCCNZ; case AMDGPU::S_CBRANCH_VCCZ: return VCCZ; case AMDGPU::S_CBRANCH_EXECNZ: return EXECNZ; case AMDGPU::S_CBRANCH_EXECZ: return EXECZ; default: return INVALID_BR; } } bool SIInstrInfo::analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl &Cond, bool AllowModify) const { if (I->getOpcode() == AMDGPU::S_BRANCH) { // Unconditional Branch TBB = I->getOperand(0).getMBB(); return false; } MachineBasicBlock *CondBB = nullptr; if (I->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) { CondBB = I->getOperand(1).getMBB(); Cond.push_back(I->getOperand(0)); } else { BranchPredicate Pred = getBranchPredicate(I->getOpcode()); if (Pred == INVALID_BR) return true; CondBB = I->getOperand(0).getMBB(); Cond.push_back(MachineOperand::CreateImm(Pred)); Cond.push_back(I->getOperand(1)); // Save the branch register. } ++I; if (I == MBB.end()) { // Conditional branch followed by fall-through. TBB = CondBB; return false; } if (I->getOpcode() == AMDGPU::S_BRANCH) { TBB = CondBB; FBB = I->getOperand(0).getMBB(); return false; } return true; } bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl &Cond, bool AllowModify) const { MachineBasicBlock::iterator I = MBB.getFirstTerminator(); if (I == MBB.end()) return false; if (I->getOpcode() != AMDGPU::SI_MASK_BRANCH) return analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify); ++I; // TODO: Should be able to treat as fallthrough? if (I == MBB.end()) return true; if (analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify)) return true; MachineBasicBlock *MaskBrDest = I->getOperand(0).getMBB(); // Specifically handle the case where the conditional branch is to the same // destination as the mask branch. e.g. // // si_mask_branch BB8 // s_cbranch_execz BB8 // s_cbranch BB9 // // This is required to understand divergent loops which may need the branches // to be relaxed. if (TBB != MaskBrDest || Cond.empty()) return true; auto Pred = Cond[0].getImm(); return (Pred != EXECZ && Pred != EXECNZ); } unsigned SIInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved) const { MachineBasicBlock::iterator I = MBB.getFirstTerminator(); unsigned Count = 0; unsigned RemovedSize = 0; while (I != MBB.end()) { MachineBasicBlock::iterator Next = std::next(I); if (I->getOpcode() == AMDGPU::SI_MASK_BRANCH) { I = Next; continue; } RemovedSize += getInstSizeInBytes(*I); I->eraseFromParent(); ++Count; I = Next; } if (BytesRemoved) *BytesRemoved = RemovedSize; return Count; } // Copy the flags onto the implicit condition register operand. static void preserveCondRegFlags(MachineOperand &CondReg, const MachineOperand &OrigCond) { CondReg.setIsUndef(OrigCond.isUndef()); CondReg.setIsKill(OrigCond.isKill()); } unsigned SIInstrInfo::insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, const DebugLoc &DL, int *BytesAdded) const { if (!FBB && Cond.empty()) { BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH)) .addMBB(TBB); if (BytesAdded) *BytesAdded = 4; return 1; } if(Cond.size() == 1 && Cond[0].isReg()) { BuildMI(&MBB, DL, get(AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO)) .add(Cond[0]) .addMBB(TBB); return 1; } assert(TBB && Cond[0].isImm()); unsigned Opcode = getBranchOpcode(static_cast(Cond[0].getImm())); if (!FBB) { Cond[1].isUndef(); MachineInstr *CondBr = BuildMI(&MBB, DL, get(Opcode)) .addMBB(TBB); // Copy the flags onto the implicit condition register operand. preserveCondRegFlags(CondBr->getOperand(1), Cond[1]); if (BytesAdded) *BytesAdded = 4; return 1; } assert(TBB && FBB); MachineInstr *CondBr = BuildMI(&MBB, DL, get(Opcode)) .addMBB(TBB); BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH)) .addMBB(FBB); MachineOperand &CondReg = CondBr->getOperand(1); CondReg.setIsUndef(Cond[1].isUndef()); CondReg.setIsKill(Cond[1].isKill()); if (BytesAdded) *BytesAdded = 8; return 2; } bool SIInstrInfo::reverseBranchCondition( SmallVectorImpl &Cond) const { if (Cond.size() != 2) { return true; } if (Cond[0].isImm()) { Cond[0].setImm(-Cond[0].getImm()); return false; } return true; } bool SIInstrInfo::canInsertSelect(const MachineBasicBlock &MBB, ArrayRef Cond, unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const { switch (Cond[0].getImm()) { case VCCNZ: case VCCZ: { const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); const TargetRegisterClass *RC = MRI.getRegClass(TrueReg); assert(MRI.getRegClass(FalseReg) == RC); int NumInsts = AMDGPU::getRegBitWidth(RC->getID()) / 32; CondCycles = TrueCycles = FalseCycles = NumInsts; // ??? // Limit to equal cost for branch vs. N v_cndmask_b32s. return !RI.isSGPRClass(RC) && NumInsts <= 6; } case SCC_TRUE: case SCC_FALSE: { // FIXME: We could insert for VGPRs if we could replace the original compare // with a vector one. const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); const TargetRegisterClass *RC = MRI.getRegClass(TrueReg); assert(MRI.getRegClass(FalseReg) == RC); int NumInsts = AMDGPU::getRegBitWidth(RC->getID()) / 32; // Multiples of 8 can do s_cselect_b64 if (NumInsts % 2 == 0) NumInsts /= 2; CondCycles = TrueCycles = FalseCycles = NumInsts; // ??? return RI.isSGPRClass(RC); } default: return false; } } void SIInstrInfo::insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, unsigned DstReg, ArrayRef Cond, unsigned TrueReg, unsigned FalseReg) const { BranchPredicate Pred = static_cast(Cond[0].getImm()); if (Pred == VCCZ || Pred == SCC_FALSE) { Pred = static_cast(-Pred); std::swap(TrueReg, FalseReg); } MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg); unsigned DstSize = RI.getRegSizeInBits(*DstRC); if (DstSize == 32) { unsigned SelOp = Pred == SCC_TRUE ? AMDGPU::S_CSELECT_B32 : AMDGPU::V_CNDMASK_B32_e32; // Instruction's operands are backwards from what is expected. MachineInstr *Select = BuildMI(MBB, I, DL, get(SelOp), DstReg) .addReg(FalseReg) .addReg(TrueReg); preserveCondRegFlags(Select->getOperand(3), Cond[1]); return; } if (DstSize == 64 && Pred == SCC_TRUE) { MachineInstr *Select = BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), DstReg) .addReg(FalseReg) .addReg(TrueReg); preserveCondRegFlags(Select->getOperand(3), Cond[1]); return; } static const int16_t Sub0_15[] = { AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11, AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, }; static const int16_t Sub0_15_64[] = { AMDGPU::sub0_sub1, AMDGPU::sub2_sub3, AMDGPU::sub4_sub5, AMDGPU::sub6_sub7, AMDGPU::sub8_sub9, AMDGPU::sub10_sub11, AMDGPU::sub12_sub13, AMDGPU::sub14_sub15, }; unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32; const TargetRegisterClass *EltRC = &AMDGPU::VGPR_32RegClass; const int16_t *SubIndices = Sub0_15; int NElts = DstSize / 32; // 64-bit select is only avaialble for SALU. if (Pred == SCC_TRUE) { SelOp = AMDGPU::S_CSELECT_B64; EltRC = &AMDGPU::SGPR_64RegClass; SubIndices = Sub0_15_64; assert(NElts % 2 == 0); NElts /= 2; } MachineInstrBuilder MIB = BuildMI( MBB, I, DL, get(AMDGPU::REG_SEQUENCE), DstReg); I = MIB->getIterator(); SmallVector Regs; for (int Idx = 0; Idx != NElts; ++Idx) { unsigned DstElt = MRI.createVirtualRegister(EltRC); Regs.push_back(DstElt); unsigned SubIdx = SubIndices[Idx]; MachineInstr *Select = BuildMI(MBB, I, DL, get(SelOp), DstElt) .addReg(FalseReg, 0, SubIdx) .addReg(TrueReg, 0, SubIdx); preserveCondRegFlags(Select->getOperand(3), Cond[1]); MIB.addReg(DstElt) .addImm(SubIdx); } } bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) const { switch (MI.getOpcode()) { case AMDGPU::V_MOV_B32_e32: case AMDGPU::V_MOV_B32_e64: case AMDGPU::V_MOV_B64_PSEUDO: { // If there are additional implicit register operands, this may be used for // register indexing so the source register operand isn't simply copied. unsigned NumOps = MI.getDesc().getNumOperands() + MI.getDesc().getNumImplicitUses(); return MI.getNumOperands() == NumOps; } case AMDGPU::S_MOV_B32: case AMDGPU::S_MOV_B64: case AMDGPU::COPY: return true; default: return false; } } unsigned SIInstrInfo::getAddressSpaceForPseudoSourceKind( PseudoSourceValue::PSVKind Kind) const { switch(Kind) { case PseudoSourceValue::Stack: case PseudoSourceValue::FixedStack: return AMDGPUASI.PRIVATE_ADDRESS; case PseudoSourceValue::ConstantPool: case PseudoSourceValue::GOT: case PseudoSourceValue::JumpTable: case PseudoSourceValue::GlobalValueCallEntry: case PseudoSourceValue::ExternalSymbolCallEntry: case PseudoSourceValue::TargetCustom: return AMDGPUASI.CONSTANT_ADDRESS; } return AMDGPUASI.FLAT_ADDRESS; } static void removeModOperands(MachineInstr &MI) { unsigned Opc = MI.getOpcode(); int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); int Src1ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers); int Src2ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers); MI.RemoveOperand(Src2ModIdx); MI.RemoveOperand(Src1ModIdx); MI.RemoveOperand(Src0ModIdx); } bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg, MachineRegisterInfo *MRI) const { if (!MRI->hasOneNonDBGUse(Reg)) return false; switch (DefMI.getOpcode()) { default: return false; case AMDGPU::S_MOV_B64: // TODO: We could fold 64-bit immediates, but this get compilicated // when there are sub-registers. return false; case AMDGPU::V_MOV_B32_e32: case AMDGPU::S_MOV_B32: break; } const MachineOperand *ImmOp = getNamedOperand(DefMI, AMDGPU::OpName::src0); assert(ImmOp); // FIXME: We could handle FrameIndex values here. if (!ImmOp->isImm()) return false; unsigned Opc = UseMI.getOpcode(); if (Opc == AMDGPU::COPY) { bool isVGPRCopy = RI.isVGPR(*MRI, UseMI.getOperand(0).getReg()); unsigned NewOpc = isVGPRCopy ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32; UseMI.setDesc(get(NewOpc)); UseMI.getOperand(1).ChangeToImmediate(ImmOp->getImm()); UseMI.addImplicitDefUseOperands(*UseMI.getParent()->getParent()); return true; } if (Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAD_F16 || Opc == AMDGPU::V_MAC_F16_e64) { // Don't fold if we are using source or output modifiers. The new VOP2 // instructions don't have them. if (hasAnyModifiersSet(UseMI)) return false; // If this is a free constant, there's no reason to do this. // TODO: We could fold this here instead of letting SIFoldOperands do it // later. MachineOperand *Src0 = getNamedOperand(UseMI, AMDGPU::OpName::src0); // Any src operand can be used for the legality check. if (isInlineConstant(UseMI, *Src0, *ImmOp)) return false; bool IsF32 = Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64; MachineOperand *Src1 = getNamedOperand(UseMI, AMDGPU::OpName::src1); MachineOperand *Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2); // Multiplied part is the constant: Use v_madmk_{f16, f32}. // We should only expect these to be on src0 due to canonicalizations. if (Src0->isReg() && Src0->getReg() == Reg) { if (!Src1->isReg() || RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))) return false; if (!Src2->isReg() || RI.isSGPRClass(MRI->getRegClass(Src2->getReg()))) return false; // We need to swap operands 0 and 1 since madmk constant is at operand 1. const int64_t Imm = ImmOp->getImm(); // FIXME: This would be a lot easier if we could return a new instruction // instead of having to modify in place. // Remove these first since they are at the end. UseMI.RemoveOperand( AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod)); UseMI.RemoveOperand( AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp)); unsigned Src1Reg = Src1->getReg(); unsigned Src1SubReg = Src1->getSubReg(); Src0->setReg(Src1Reg); Src0->setSubReg(Src1SubReg); Src0->setIsKill(Src1->isKill()); if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64) UseMI.untieRegOperand( AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); Src1->ChangeToImmediate(Imm); removeModOperands(UseMI); UseMI.setDesc(get(IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16)); bool DeleteDef = MRI->hasOneNonDBGUse(Reg); if (DeleteDef) DefMI.eraseFromParent(); return true; } // Added part is the constant: Use v_madak_{f16, f32}. if (Src2->isReg() && Src2->getReg() == Reg) { // Not allowed to use constant bus for another operand. // We can however allow an inline immediate as src0. if (!Src0->isImm() && (Src0->isReg() && RI.isSGPRClass(MRI->getRegClass(Src0->getReg())))) return false; if (!Src1->isReg() || RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))) return false; const int64_t Imm = ImmOp->getImm(); // FIXME: This would be a lot easier if we could return a new instruction // instead of having to modify in place. // Remove these first since they are at the end. UseMI.RemoveOperand( AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod)); UseMI.RemoveOperand( AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp)); if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64) UseMI.untieRegOperand( AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); // ChangingToImmediate adds Src2 back to the instruction. Src2->ChangeToImmediate(Imm); // These come before src2. removeModOperands(UseMI); UseMI.setDesc(get(IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16)); bool DeleteDef = MRI->hasOneNonDBGUse(Reg); if (DeleteDef) DefMI.eraseFromParent(); return true; } } return false; } static bool offsetsDoNotOverlap(int WidthA, int OffsetA, int WidthB, int OffsetB) { int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB; int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA; int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; return LowOffset + LowWidth <= HighOffset; } bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr &MIa, MachineInstr &MIb) const { unsigned BaseReg0, BaseReg1; int64_t Offset0, Offset1; if (getMemOpBaseRegImmOfs(MIa, BaseReg0, Offset0, &RI) && getMemOpBaseRegImmOfs(MIb, BaseReg1, Offset1, &RI)) { if (!MIa.hasOneMemOperand() || !MIb.hasOneMemOperand()) { // FIXME: Handle ds_read2 / ds_write2. return false; } unsigned Width0 = (*MIa.memoperands_begin())->getSize(); unsigned Width1 = (*MIb.memoperands_begin())->getSize(); if (BaseReg0 == BaseReg1 && offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1)) { return true; } } return false; } bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const { assert((MIa.mayLoad() || MIa.mayStore()) && "MIa must load from or modify a memory location"); assert((MIb.mayLoad() || MIb.mayStore()) && "MIb must load from or modify a memory location"); if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects()) return false; // XXX - Can we relax this between address spaces? if (MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef()) return false; if (AA && MIa.hasOneMemOperand() && MIb.hasOneMemOperand()) { const MachineMemOperand *MMOa = *MIa.memoperands_begin(); const MachineMemOperand *MMOb = *MIb.memoperands_begin(); if (MMOa->getValue() && MMOb->getValue()) { MemoryLocation LocA(MMOa->getValue(), MMOa->getSize(), MMOa->getAAInfo()); MemoryLocation LocB(MMOb->getValue(), MMOb->getSize(), MMOb->getAAInfo()); if (!AA->alias(LocA, LocB)) return true; } } // TODO: Should we check the address space from the MachineMemOperand? That // would allow us to distinguish objects we know don't alias based on the // underlying address space, even if it was lowered to a different one, // e.g. private accesses lowered to use MUBUF instructions on a scratch // buffer. if (isDS(MIa)) { if (isDS(MIb)) return checkInstOffsetsDoNotOverlap(MIa, MIb); return !isFLAT(MIb) || isSegmentSpecificFLAT(MIb); } if (isMUBUF(MIa) || isMTBUF(MIa)) { if (isMUBUF(MIb) || isMTBUF(MIb)) return checkInstOffsetsDoNotOverlap(MIa, MIb); return !isFLAT(MIb) && !isSMRD(MIb); } if (isSMRD(MIa)) { if (isSMRD(MIb)) return checkInstOffsetsDoNotOverlap(MIa, MIb); return !isFLAT(MIb) && !isMUBUF(MIa) && !isMTBUF(MIa); } if (isFLAT(MIa)) { if (isFLAT(MIb)) return checkInstOffsetsDoNotOverlap(MIa, MIb); return false; } return false; } static int64_t getFoldableImm(const MachineOperand* MO) { if (!MO->isReg()) return false; const MachineFunction *MF = MO->getParent()->getParent()->getParent(); const MachineRegisterInfo &MRI = MF->getRegInfo(); auto Def = MRI.getUniqueVRegDef(MO->getReg()); if (Def && Def->getOpcode() == AMDGPU::V_MOV_B32_e32 && Def->getOperand(1).isImm()) return Def->getOperand(1).getImm(); return AMDGPU::NoRegister; } MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB, MachineInstr &MI, LiveVariables *LV) const { bool IsF16 = false; switch (MI.getOpcode()) { default: return nullptr; case AMDGPU::V_MAC_F16_e64: IsF16 = true; LLVM_FALLTHROUGH; case AMDGPU::V_MAC_F32_e64: break; case AMDGPU::V_MAC_F16_e32: IsF16 = true; LLVM_FALLTHROUGH; case AMDGPU::V_MAC_F32_e32: { int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0); const MachineOperand *Src0 = &MI.getOperand(Src0Idx); if (!Src0->isReg() && !Src0->isImm()) return nullptr; if (Src0->isImm() && !isInlineConstant(MI, Src0Idx, *Src0)) return nullptr; break; } } const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst); const MachineOperand *Src0 = getNamedOperand(MI, AMDGPU::OpName::src0); const MachineOperand *Src0Mods = getNamedOperand(MI, AMDGPU::OpName::src0_modifiers); const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1); const MachineOperand *Src1Mods = getNamedOperand(MI, AMDGPU::OpName::src1_modifiers); const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2); const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp); const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod); if (!Src0Mods && !Src1Mods && !Clamp && !Omod && // If we have an SGPR input, we will violate the constant bus restriction. (!Src0->isReg() || !RI.isSGPRReg(MBB->getParent()->getRegInfo(), Src0->getReg()))) { if (auto Imm = getFoldableImm(Src2)) { return BuildMI(*MBB, MI, MI.getDebugLoc(), get(IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32)) .add(*Dst) .add(*Src0) .add(*Src1) .addImm(Imm); } if (auto Imm = getFoldableImm(Src1)) { return BuildMI(*MBB, MI, MI.getDebugLoc(), get(IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32)) .add(*Dst) .add(*Src0) .addImm(Imm) .add(*Src2); } if (auto Imm = getFoldableImm(Src0)) { if (isOperandLegal(MI, AMDGPU::getNamedOperandIdx(AMDGPU::V_MADMK_F32, AMDGPU::OpName::src0), Src1)) return BuildMI(*MBB, MI, MI.getDebugLoc(), get(IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32)) .add(*Dst) .add(*Src1) .addImm(Imm) .add(*Src2); } } return BuildMI(*MBB, MI, MI.getDebugLoc(), get(IsF16 ? AMDGPU::V_MAD_F16 : AMDGPU::V_MAD_F32)) .add(*Dst) .addImm(Src0Mods ? Src0Mods->getImm() : 0) .add(*Src0) .addImm(Src1Mods ? Src1Mods->getImm() : 0) .add(*Src1) .addImm(0) // Src mods .add(*Src2) .addImm(Clamp ? Clamp->getImm() : 0) .addImm(Omod ? Omod->getImm() : 0); } // It's not generally safe to move VALU instructions across these since it will // start using the register as a base index rather than directly. // XXX - Why isn't hasSideEffects sufficient for these? static bool changesVGPRIndexingMode(const MachineInstr &MI) { switch (MI.getOpcode()) { case AMDGPU::S_SET_GPR_IDX_ON: case AMDGPU::S_SET_GPR_IDX_MODE: case AMDGPU::S_SET_GPR_IDX_OFF: return true; default: return false; } } bool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const { // XXX - Do we want the SP check in the base implementation? // Target-independent instructions do not have an implicit-use of EXEC, even // when they operate on VGPRs. Treating EXEC modifications as scheduling // boundaries prevents incorrect movements of such instructions. return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF) || MI.modifiesRegister(AMDGPU::EXEC, &RI) || MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 || MI.getOpcode() == AMDGPU::S_SETREG_B32 || changesVGPRIndexingMode(MI); } bool SIInstrInfo::isInlineConstant(const APInt &Imm) const { switch (Imm.getBitWidth()) { case 32: return AMDGPU::isInlinableLiteral32(Imm.getSExtValue(), ST.hasInv2PiInlineImm()); case 64: return AMDGPU::isInlinableLiteral64(Imm.getSExtValue(), ST.hasInv2PiInlineImm()); case 16: return ST.has16BitInsts() && AMDGPU::isInlinableLiteral16(Imm.getSExtValue(), ST.hasInv2PiInlineImm()); default: llvm_unreachable("invalid bitwidth"); } } bool SIInstrInfo::isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const { if (!MO.isImm() || OperandType < AMDGPU::OPERAND_SRC_FIRST || OperandType > AMDGPU::OPERAND_SRC_LAST) return false; // MachineOperand provides no way to tell the true operand size, since it only // records a 64-bit value. We need to know the size to determine if a 32-bit // floating point immediate bit pattern is legal for an integer immediate. It // would be for any 32-bit integer operand, but would not be for a 64-bit one. int64_t Imm = MO.getImm(); switch (OperandType) { case AMDGPU::OPERAND_REG_IMM_INT32: case AMDGPU::OPERAND_REG_IMM_FP32: case AMDGPU::OPERAND_REG_INLINE_C_INT32: case AMDGPU::OPERAND_REG_INLINE_C_FP32: { int32_t Trunc = static_cast(Imm); return Trunc == Imm && AMDGPU::isInlinableLiteral32(Trunc, ST.hasInv2PiInlineImm()); } case AMDGPU::OPERAND_REG_IMM_INT64: case AMDGPU::OPERAND_REG_IMM_FP64: case AMDGPU::OPERAND_REG_INLINE_C_INT64: case AMDGPU::OPERAND_REG_INLINE_C_FP64: return AMDGPU::isInlinableLiteral64(MO.getImm(), ST.hasInv2PiInlineImm()); case AMDGPU::OPERAND_REG_IMM_INT16: case AMDGPU::OPERAND_REG_IMM_FP16: case AMDGPU::OPERAND_REG_INLINE_C_INT16: case AMDGPU::OPERAND_REG_INLINE_C_FP16: { if (isInt<16>(Imm) || isUInt<16>(Imm)) { // A few special case instructions have 16-bit operands on subtargets // where 16-bit instructions are not legal. // TODO: Do the 32-bit immediates work? We shouldn't really need to handle // constants in these cases int16_t Trunc = static_cast(Imm); return ST.has16BitInsts() && AMDGPU::isInlinableLiteral16(Trunc, ST.hasInv2PiInlineImm()); } return false; } case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { uint32_t Trunc = static_cast(Imm); return AMDGPU::isInlinableLiteralV216(Trunc, ST.hasInv2PiInlineImm()); } default: llvm_unreachable("invalid bitwidth"); } } bool SIInstrInfo::isLiteralConstantLike(const MachineOperand &MO, const MCOperandInfo &OpInfo) const { switch (MO.getType()) { case MachineOperand::MO_Register: return false; case MachineOperand::MO_Immediate: return !isInlineConstant(MO, OpInfo); case MachineOperand::MO_FrameIndex: case MachineOperand::MO_MachineBasicBlock: case MachineOperand::MO_ExternalSymbol: case MachineOperand::MO_GlobalAddress: case MachineOperand::MO_MCSymbol: return true; default: llvm_unreachable("unexpected operand type"); } } static bool compareMachineOp(const MachineOperand &Op0, const MachineOperand &Op1) { if (Op0.getType() != Op1.getType()) return false; switch (Op0.getType()) { case MachineOperand::MO_Register: return Op0.getReg() == Op1.getReg(); case MachineOperand::MO_Immediate: return Op0.getImm() == Op1.getImm(); default: llvm_unreachable("Didn't expect to be comparing these operand types"); } } bool SIInstrInfo::isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, const MachineOperand &MO) const { const MCOperandInfo &OpInfo = get(MI.getOpcode()).OpInfo[OpNo]; assert(MO.isImm() || MO.isTargetIndex() || MO.isFI()); if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE) return true; if (OpInfo.RegClass < 0) return false; if (MO.isImm() && isInlineConstant(MO, OpInfo)) return RI.opCanUseInlineConstant(OpInfo.OperandType); return RI.opCanUseLiteralConstant(OpInfo.OperandType); } bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const { int Op32 = AMDGPU::getVOPe32(Opcode); if (Op32 == -1) return false; return pseudoToMCOpcode(Op32) != -1; } bool SIInstrInfo::hasModifiers(unsigned Opcode) const { // The src0_modifier operand is present on all instructions // that have modifiers. return AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0_modifiers) != -1; } bool SIInstrInfo::hasModifiersSet(const MachineInstr &MI, unsigned OpName) const { const MachineOperand *Mods = getNamedOperand(MI, OpName); return Mods && Mods->getImm(); } bool SIInstrInfo::hasAnyModifiersSet(const MachineInstr &MI) const { return hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) || hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) || hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers) || hasModifiersSet(MI, AMDGPU::OpName::clamp) || hasModifiersSet(MI, AMDGPU::OpName::omod); } bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const { // Literal constants use the constant bus. //if (isLiteralConstantLike(MO, OpInfo)) // return true; if (MO.isImm()) return !isInlineConstant(MO, OpInfo); if (!MO.isReg()) return true; // Misc other operands like FrameIndex if (!MO.isUse()) return false; if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) return RI.isSGPRClass(MRI.getRegClass(MO.getReg())); // FLAT_SCR is just an SGPR pair. if (!MO.isImplicit() && (MO.getReg() == AMDGPU::FLAT_SCR)) return true; // EXEC register uses the constant bus. if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC) return true; // SGPRs use the constant bus return (MO.getReg() == AMDGPU::VCC || MO.getReg() == AMDGPU::M0 || (!MO.isImplicit() && (AMDGPU::SGPR_32RegClass.contains(MO.getReg()) || AMDGPU::SGPR_64RegClass.contains(MO.getReg())))); } static unsigned findImplicitSGPRRead(const MachineInstr &MI) { for (const MachineOperand &MO : MI.implicit_operands()) { // We only care about reads. if (MO.isDef()) continue; switch (MO.getReg()) { case AMDGPU::VCC: case AMDGPU::M0: case AMDGPU::FLAT_SCR: return MO.getReg(); default: break; } } return AMDGPU::NoRegister; } static bool shouldReadExec(const MachineInstr &MI) { if (SIInstrInfo::isVALU(MI)) { switch (MI.getOpcode()) { case AMDGPU::V_READLANE_B32: case AMDGPU::V_READLANE_B32_si: case AMDGPU::V_READLANE_B32_vi: case AMDGPU::V_WRITELANE_B32: case AMDGPU::V_WRITELANE_B32_si: case AMDGPU::V_WRITELANE_B32_vi: return false; } return true; } if (SIInstrInfo::isGenericOpcode(MI.getOpcode()) || SIInstrInfo::isSALU(MI) || SIInstrInfo::isSMRD(MI)) return false; return true; } static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg) { if (TargetRegisterInfo::isPhysicalRegister(SubReg.getReg())) return TRI.isSubRegister(SuperVec.getReg(), SubReg.getReg()); return SubReg.getSubReg() != AMDGPU::NoSubRegister && SubReg.getReg() == SuperVec.getReg(); } bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const { uint16_t Opcode = MI.getOpcode(); if (SIInstrInfo::isGenericOpcode(MI.getOpcode())) return true; const MachineFunction *MF = MI.getParent()->getParent(); const MachineRegisterInfo &MRI = MF->getRegInfo(); int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); // Make sure the number of operands is correct. const MCInstrDesc &Desc = get(Opcode); if (!Desc.isVariadic() && Desc.getNumOperands() != MI.getNumExplicitOperands()) { ErrInfo = "Instruction has wrong number of operands."; return false; } if (MI.isInlineAsm()) { // Verify register classes for inlineasm constraints. for (unsigned I = InlineAsm::MIOp_FirstOperand, E = MI.getNumOperands(); I != E; ++I) { const TargetRegisterClass *RC = MI.getRegClassConstraint(I, this, &RI); if (!RC) continue; const MachineOperand &Op = MI.getOperand(I); if (!Op.isReg()) continue; unsigned Reg = Op.getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg) && !RC->contains(Reg)) { ErrInfo = "inlineasm operand has incorrect register class."; return false; } } return true; } // Make sure the register classes are correct. for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) { if (MI.getOperand(i).isFPImm()) { ErrInfo = "FPImm Machine Operands are not supported. ISel should bitcast " "all fp values to integers."; return false; } int RegClass = Desc.OpInfo[i].RegClass; switch (Desc.OpInfo[i].OperandType) { case MCOI::OPERAND_REGISTER: if (MI.getOperand(i).isImm()) { ErrInfo = "Illegal immediate value for operand."; return false; } break; case AMDGPU::OPERAND_REG_IMM_INT32: case AMDGPU::OPERAND_REG_IMM_FP32: break; case AMDGPU::OPERAND_REG_INLINE_C_INT32: case AMDGPU::OPERAND_REG_INLINE_C_FP32: case AMDGPU::OPERAND_REG_INLINE_C_INT64: case AMDGPU::OPERAND_REG_INLINE_C_FP64: case AMDGPU::OPERAND_REG_INLINE_C_INT16: case AMDGPU::OPERAND_REG_INLINE_C_FP16: { const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() && (!MO.isImm() || !isInlineConstant(MI, i))) { ErrInfo = "Illegal immediate value for operand."; return false; } break; } case MCOI::OPERAND_IMMEDIATE: case AMDGPU::OPERAND_KIMM32: // Check if this operand is an immediate. // FrameIndex operands will be replaced by immediates, so they are // allowed. if (!MI.getOperand(i).isImm() && !MI.getOperand(i).isFI()) { ErrInfo = "Expected immediate, but got non-immediate"; return false; } LLVM_FALLTHROUGH; default: continue; } if (!MI.getOperand(i).isReg()) continue; if (RegClass != -1) { unsigned Reg = MI.getOperand(i).getReg(); if (Reg == AMDGPU::NoRegister || TargetRegisterInfo::isVirtualRegister(Reg)) continue; const TargetRegisterClass *RC = RI.getRegClass(RegClass); if (!RC->contains(Reg)) { ErrInfo = "Operand has incorrect register class."; return false; } } } // Verify SDWA if (isSDWA(MI)) { if (!ST.hasSDWA()) { ErrInfo = "SDWA is not supported on this target"; return false; } int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); const int OpIndicies[] = { DstIdx, Src0Idx, Src1Idx, Src2Idx }; for (int OpIdx: OpIndicies) { if (OpIdx == -1) continue; const MachineOperand &MO = MI.getOperand(OpIdx); if (!ST.hasSDWAScalar()) { // Only VGPRS on VI if (!MO.isReg() || !RI.hasVGPRs(RI.getRegClassForReg(MRI, MO.getReg()))) { ErrInfo = "Only VGPRs allowed as operands in SDWA instructions on VI"; return false; } } else { // No immediates on GFX9 if (!MO.isReg()) { ErrInfo = "Only reg allowed as operands in SDWA instructions on GFX9"; return false; } } } if (!ST.hasSDWAOmod()) { // No omod allowed on VI const MachineOperand *OMod = getNamedOperand(MI, AMDGPU::OpName::omod); if (OMod != nullptr && (!OMod->isImm() || OMod->getImm() != 0)) { ErrInfo = "OMod not allowed in SDWA instructions on VI"; return false; } } uint16_t BasicOpcode = AMDGPU::getBasicFromSDWAOp(Opcode); if (isVOPC(BasicOpcode)) { if (!ST.hasSDWASdst() && DstIdx != -1) { // Only vcc allowed as dst on VI for VOPC const MachineOperand &Dst = MI.getOperand(DstIdx); if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) { ErrInfo = "Only VCC allowed as dst in SDWA instructions on VI"; return false; } } else if (!ST.hasSDWAOutModsVOPC()) { // No clamp allowed on GFX9 for VOPC const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp); if (Clamp && (!Clamp->isImm() || Clamp->getImm() != 0)) { ErrInfo = "Clamp not allowed in VOPC SDWA instructions on VI"; return false; } // No omod allowed on GFX9 for VOPC const MachineOperand *OMod = getNamedOperand(MI, AMDGPU::OpName::omod); if (OMod && (!OMod->isImm() || OMod->getImm() != 0)) { ErrInfo = "OMod not allowed in VOPC SDWA instructions on VI"; return false; } } } const MachineOperand *DstUnused = getNamedOperand(MI, AMDGPU::OpName::dst_unused); if (DstUnused && DstUnused->isImm() && DstUnused->getImm() == AMDGPU::SDWA::UNUSED_PRESERVE) { const MachineOperand &Dst = MI.getOperand(DstIdx); if (!Dst.isReg() || !Dst.isTied()) { ErrInfo = "Dst register should have tied register"; return false; } const MachineOperand &TiedMO = MI.getOperand(MI.findTiedOperandIdx(DstIdx)); if (!TiedMO.isReg() || !TiedMO.isImplicit() || !TiedMO.isUse()) { ErrInfo = "Dst register should be tied to implicit use of preserved register"; return false; } else if (TargetRegisterInfo::isPhysicalRegister(TiedMO.getReg()) && Dst.getReg() != TiedMO.getReg()) { ErrInfo = "Dst register should use same physical register as preserved"; return false; } } } // Verify VOP* if (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isVOPC(MI) || isSDWA(MI)) { // Only look at the true operands. Only a real operand can use the constant // bus, and we don't want to check pseudo-operands like the source modifier // flags. const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; unsigned ConstantBusCount = 0; if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) ++ConstantBusCount; unsigned SGPRUsed = findImplicitSGPRRead(MI); if (SGPRUsed != AMDGPU::NoRegister) ++ConstantBusCount; for (int OpIdx : OpIndices) { if (OpIdx == -1) break; const MachineOperand &MO = MI.getOperand(OpIdx); if (usesConstantBus(MRI, MO, MI.getDesc().OpInfo[OpIdx])) { if (MO.isReg()) { if (MO.getReg() != SGPRUsed) ++ConstantBusCount; SGPRUsed = MO.getReg(); } else { ++ConstantBusCount; } } } if (ConstantBusCount > 1) { ErrInfo = "VOP* instruction uses the constant bus more than once"; return false; } } // Verify misc. restrictions on specific instructions. if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 || Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) { const MachineOperand &Src0 = MI.getOperand(Src0Idx); const MachineOperand &Src1 = MI.getOperand(Src1Idx); const MachineOperand &Src2 = MI.getOperand(Src2Idx); if (Src0.isReg() && Src1.isReg() && Src2.isReg()) { if (!compareMachineOp(Src0, Src1) && !compareMachineOp(Src0, Src2)) { ErrInfo = "v_div_scale_{f32|f64} require src0 = src1 or src2"; return false; } } } if (isSOPK(MI)) { int64_t Imm = getNamedOperand(MI, AMDGPU::OpName::simm16)->getImm(); if (sopkIsZext(MI)) { if (!isUInt<16>(Imm)) { ErrInfo = "invalid immediate for SOPK instruction"; return false; } } else { if (!isInt<16>(Imm)) { ErrInfo = "invalid immediate for SOPK instruction"; return false; } } } if (Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 || Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 || Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 || Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) { const bool IsDst = Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 || Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64; const unsigned StaticNumOps = Desc.getNumOperands() + Desc.getNumImplicitUses(); const unsigned NumImplicitOps = IsDst ? 2 : 1; // Allow additional implicit operands. This allows a fixup done by the post // RA scheduler where the main implicit operand is killed and implicit-defs // are added for sub-registers that remain live after this instruction. if (MI.getNumOperands() < StaticNumOps + NumImplicitOps) { ErrInfo = "missing implicit register operands"; return false; } const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst); if (IsDst) { if (!Dst->isUse()) { ErrInfo = "v_movreld_b32 vdst should be a use operand"; return false; } unsigned UseOpIdx; if (!MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) || UseOpIdx != StaticNumOps + 1) { ErrInfo = "movrel implicit operands should be tied"; return false; } } const MachineOperand &Src0 = MI.getOperand(Src0Idx); const MachineOperand &ImpUse = MI.getOperand(StaticNumOps + NumImplicitOps - 1); if (!ImpUse.isReg() || !ImpUse.isUse() || !isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) { ErrInfo = "src0 should be subreg of implicit vector use"; return false; } } // Make sure we aren't losing exec uses in the td files. This mostly requires // being careful when using let Uses to try to add other use registers. if (shouldReadExec(MI)) { if (!MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) { ErrInfo = "VALU instruction does not implicitly read exec mask"; return false; } } if (isSMRD(MI)) { if (MI.mayStore()) { // The register offset form of scalar stores may only use m0 as the // soffset register. const MachineOperand *Soff = getNamedOperand(MI, AMDGPU::OpName::soff); if (Soff && Soff->getReg() != AMDGPU::M0) { ErrInfo = "scalar stores must use m0 as offset register"; return false; } } } if (isFLAT(MI) && !MF->getSubtarget().hasFlatInstOffsets()) { const MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset); if (Offset->getImm() != 0) { ErrInfo = "subtarget does not support offsets in flat instructions"; return false; } } return true; } unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const { switch (MI.getOpcode()) { default: return AMDGPU::INSTRUCTION_LIST_END; case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE; case AMDGPU::COPY: return AMDGPU::COPY; case AMDGPU::PHI: return AMDGPU::PHI; case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG; case AMDGPU::WQM: return AMDGPU::WQM; case AMDGPU::WWM: return AMDGPU::WWM; case AMDGPU::S_MOV_B32: return MI.getOperand(1).isReg() ? AMDGPU::COPY : AMDGPU::V_MOV_B32_e32; case AMDGPU::S_ADD_I32: return ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_I32_e32; case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32; case AMDGPU::S_SUB_I32: return ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32; // FIXME: These are not consistently handled, and selected when the carry is // used. case AMDGPU::S_ADD_U32: return AMDGPU::V_ADD_I32_e32; case AMDGPU::S_SUB_U32: return AMDGPU::V_SUB_I32_e32; case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32; case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_I32; case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e64; case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e64; case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e64; case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e64; case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e64; case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e64; case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e64; case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32; case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64; case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32; case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64; case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32; case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64; case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32; case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32; case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32; case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32; case AMDGPU::S_BFM_B32: return AMDGPU::V_BFM_B32_e64; case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32; case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32; case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32; case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32; case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32; case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32; case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e32; case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32; case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32; case AMDGPU::S_CMP_EQ_U32: return AMDGPU::V_CMP_EQ_U32_e32; case AMDGPU::S_CMP_LG_U32: return AMDGPU::V_CMP_NE_U32_e32; case AMDGPU::S_CMP_GT_U32: return AMDGPU::V_CMP_GT_U32_e32; case AMDGPU::S_CMP_GE_U32: return AMDGPU::V_CMP_GE_U32_e32; case AMDGPU::S_CMP_LT_U32: return AMDGPU::V_CMP_LT_U32_e32; case AMDGPU::S_CMP_LE_U32: return AMDGPU::V_CMP_LE_U32_e32; case AMDGPU::S_CMP_EQ_U64: return AMDGPU::V_CMP_EQ_U64_e32; case AMDGPU::S_CMP_LG_U64: return AMDGPU::V_CMP_NE_U64_e32; case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64; case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32; case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32; case AMDGPU::S_FLBIT_I32: return AMDGPU::V_FFBH_I32_e64; case AMDGPU::S_CBRANCH_SCC0: return AMDGPU::S_CBRANCH_VCCZ; case AMDGPU::S_CBRANCH_SCC1: return AMDGPU::S_CBRANCH_VCCNZ; } } const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI, unsigned OpNo) const { const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); const MCInstrDesc &Desc = get(MI.getOpcode()); if (MI.isVariadic() || OpNo >= Desc.getNumOperands() || Desc.OpInfo[OpNo].RegClass == -1) { unsigned Reg = MI.getOperand(OpNo).getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) return MRI.getRegClass(Reg); return RI.getPhysRegClass(Reg); } unsigned RCID = Desc.OpInfo[OpNo].RegClass; return RI.getRegClass(RCID); } bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const { switch (MI.getOpcode()) { case AMDGPU::COPY: case AMDGPU::REG_SEQUENCE: case AMDGPU::PHI: case AMDGPU::INSERT_SUBREG: return RI.hasVGPRs(getOpRegClass(MI, 0)); default: return RI.hasVGPRs(getOpRegClass(MI, OpNo)); } } void SIInstrInfo::legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const { MachineBasicBlock::iterator I = MI; MachineBasicBlock *MBB = MI.getParent(); MachineOperand &MO = MI.getOperand(OpIdx); MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); unsigned RCID = get(MI.getOpcode()).OpInfo[OpIdx].RegClass; const TargetRegisterClass *RC = RI.getRegClass(RCID); unsigned Opcode = AMDGPU::V_MOV_B32_e32; if (MO.isReg()) Opcode = AMDGPU::COPY; else if (RI.isSGPRClass(RC)) Opcode = AMDGPU::S_MOV_B32; const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC); if (RI.getCommonSubClass(&AMDGPU::VReg_64RegClass, VRC)) VRC = &AMDGPU::VReg_64RegClass; else VRC = &AMDGPU::VGPR_32RegClass; unsigned Reg = MRI.createVirtualRegister(VRC); DebugLoc DL = MBB->findDebugLoc(I); BuildMI(*MI.getParent(), I, DL, get(Opcode), Reg).add(MO); MO.ChangeToRegister(Reg, false); } unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const { MachineBasicBlock *MBB = MI->getParent(); DebugLoc DL = MI->getDebugLoc(); unsigned SubReg = MRI.createVirtualRegister(SubRC); if (SuperReg.getSubReg() == AMDGPU::NoSubRegister) { BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg) .addReg(SuperReg.getReg(), 0, SubIdx); return SubReg; } // Just in case the super register is itself a sub-register, copy it to a new // value so we don't need to worry about merging its subreg index with the // SubIdx passed to this function. The register coalescer should be able to // eliminate this extra copy. unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC); BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg) .addReg(SuperReg.getReg(), 0, SuperReg.getSubReg()); BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg) .addReg(NewSuperReg, 0, SubIdx); return SubReg; } MachineOperand SIInstrInfo::buildExtractSubRegOrImm( MachineBasicBlock::iterator MII, MachineRegisterInfo &MRI, MachineOperand &Op, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const { if (Op.isImm()) { if (SubIdx == AMDGPU::sub0) return MachineOperand::CreateImm(static_cast(Op.getImm())); if (SubIdx == AMDGPU::sub1) return MachineOperand::CreateImm(static_cast(Op.getImm() >> 32)); llvm_unreachable("Unhandled register index for immediate"); } unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC, SubIdx, SubRC); return MachineOperand::CreateReg(SubReg, false); } // Change the order of operands from (0, 1, 2) to (0, 2, 1) void SIInstrInfo::swapOperands(MachineInstr &Inst) const { assert(Inst.getNumExplicitOperands() == 3); MachineOperand Op1 = Inst.getOperand(1); Inst.RemoveOperand(1); Inst.addOperand(Op1); } bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const { if (!MO.isReg()) return false; unsigned Reg = MO.getReg(); const TargetRegisterClass *RC = TargetRegisterInfo::isVirtualRegister(Reg) ? MRI.getRegClass(Reg) : RI.getPhysRegClass(Reg); const SIRegisterInfo *TRI = static_cast(MRI.getTargetRegisterInfo()); RC = TRI->getSubRegClass(RC, MO.getSubReg()); // In order to be legal, the common sub-class must be equal to the // class of the current operand. For example: // // v_mov_b32 s0 ; Operand defined as vsrc_b32 // ; RI.getCommonSubClass(s0,vsrc_b32) = sgpr ; LEGAL // // s_sendmsg 0, s0 ; Operand defined as m0reg // ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC; } bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const { if (MO.isReg()) return isLegalRegOperand(MRI, OpInfo, MO); // Handle non-register types that are treated like immediates. assert(MO.isImm() || MO.isTargetIndex() || MO.isFI()); return true; } bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO) const { const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); const MCInstrDesc &InstDesc = MI.getDesc(); const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx]; const TargetRegisterClass *DefinedRC = OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr; if (!MO) MO = &MI.getOperand(OpIdx); if (isVALU(MI) && usesConstantBus(MRI, *MO, OpInfo)) { RegSubRegPair SGPRUsed; if (MO->isReg()) SGPRUsed = RegSubRegPair(MO->getReg(), MO->getSubReg()); for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { if (i == OpIdx) continue; const MachineOperand &Op = MI.getOperand(i); if (Op.isReg()) { if ((Op.getReg() != SGPRUsed.Reg || Op.getSubReg() != SGPRUsed.SubReg) && usesConstantBus(MRI, Op, InstDesc.OpInfo[i])) { return false; } } else if (InstDesc.OpInfo[i].OperandType == AMDGPU::OPERAND_KIMM32) { return false; } } } if (MO->isReg()) { assert(DefinedRC); return isLegalRegOperand(MRI, OpInfo, *MO); } // Handle non-register types that are treated like immediates. assert(MO->isImm() || MO->isTargetIndex() || MO->isFI()); if (!DefinedRC) { // This operand expects an immediate. return true; } return isImmOperandLegal(MI, OpIdx, *MO); } void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const { unsigned Opc = MI.getOpcode(); const MCInstrDesc &InstrDesc = get(Opc); int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); MachineOperand &Src1 = MI.getOperand(Src1Idx); // If there is an implicit SGPR use such as VCC use for v_addc_u32/v_subb_u32 // we need to only have one constant bus use. // // Note we do not need to worry about literal constants here. They are // disabled for the operand type for instructions because they will always // violate the one constant bus use rule. bool HasImplicitSGPR = findImplicitSGPRRead(MI) != AMDGPU::NoRegister; if (HasImplicitSGPR) { int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); MachineOperand &Src0 = MI.getOperand(Src0Idx); if (Src0.isReg() && RI.isSGPRReg(MRI, Src0.getReg())) legalizeOpWithMove(MI, Src0Idx); } // VOP2 src0 instructions support all operand types, so we don't need to check // their legality. If src1 is already legal, we don't need to do anything. if (isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src1)) return; // Special case: V_READLANE_B32 accepts only immediate or SGPR operands for // lane select. Fix up using V_READFIRSTLANE, since we assume that the lane // select is uniform. if (Opc == AMDGPU::V_READLANE_B32 && Src1.isReg() && RI.isVGPR(MRI, Src1.getReg())) { unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); const DebugLoc &DL = MI.getDebugLoc(); BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) .add(Src1); Src1.ChangeToRegister(Reg, false); return; } // We do not use commuteInstruction here because it is too aggressive and will // commute if it is possible. We only want to commute here if it improves // legality. This can be called a fairly large number of times so don't waste // compile time pointlessly swapping and checking legality again. if (HasImplicitSGPR || !MI.isCommutable()) { legalizeOpWithMove(MI, Src1Idx); return; } int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); MachineOperand &Src0 = MI.getOperand(Src0Idx); // If src0 can be used as src1, commuting will make the operands legal. // Otherwise we have to give up and insert a move. // // TODO: Other immediate-like operand kinds could be commuted if there was a // MachineOperand::ChangeTo* for them. if ((!Src1.isImm() && !Src1.isReg()) || !isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0)) { legalizeOpWithMove(MI, Src1Idx); return; } int CommutedOpc = commuteOpcode(MI); if (CommutedOpc == -1) { legalizeOpWithMove(MI, Src1Idx); return; } MI.setDesc(get(CommutedOpc)); unsigned Src0Reg = Src0.getReg(); unsigned Src0SubReg = Src0.getSubReg(); bool Src0Kill = Src0.isKill(); if (Src1.isImm()) Src0.ChangeToImmediate(Src1.getImm()); else if (Src1.isReg()) { Src0.ChangeToRegister(Src1.getReg(), false, false, Src1.isKill()); Src0.setSubReg(Src1.getSubReg()); } else llvm_unreachable("Should only have register or immediate operands"); Src1.ChangeToRegister(Src0Reg, false, false, Src0Kill); Src1.setSubReg(Src0SubReg); } // Legalize VOP3 operands. Because all operand types are supported for any // operand, and since literal constants are not allowed and should never be // seen, we only need to worry about inserting copies if we use multiple SGPR // operands. void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const { unsigned Opc = MI.getOpcode(); int VOP3Idx[3] = { AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0), AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1), AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2) }; // Find the one SGPR operand we are allowed to use. unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx); for (unsigned i = 0; i < 3; ++i) { int Idx = VOP3Idx[i]; if (Idx == -1) break; MachineOperand &MO = MI.getOperand(Idx); // We should never see a VOP3 instruction with an illegal immediate operand. if (!MO.isReg()) continue; if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg()))) continue; // VGPRs are legal if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) { SGPRReg = MO.getReg(); // We can use one SGPR in each VOP3 instruction. continue; } // If we make it this far, then the operand is not legal and we must // legalize it. legalizeOpWithMove(MI, Idx); } } unsigned SIInstrInfo::readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI) const { const TargetRegisterClass *VRC = MRI.getRegClass(SrcReg); const TargetRegisterClass *SRC = RI.getEquivalentSGPRClass(VRC); unsigned DstReg = MRI.createVirtualRegister(SRC); unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32; SmallVector SRegs; for (unsigned i = 0; i < SubRegs; ++i) { unsigned SGPR = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(AMDGPU::V_READFIRSTLANE_B32), SGPR) .addReg(SrcReg, 0, RI.getSubRegFromChannel(i)); SRegs.push_back(SGPR); } MachineInstrBuilder MIB = BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(AMDGPU::REG_SEQUENCE), DstReg); for (unsigned i = 0; i < SubRegs; ++i) { MIB.addReg(SRegs[i]); MIB.addImm(RI.getSubRegFromChannel(i)); } return DstReg; } void SIInstrInfo::legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const { // If the pointer is store in VGPRs, then we need to move them to // SGPRs using v_readfirstlane. This is safe because we only select // loads with uniform pointers to SMRD instruction so we know the // pointer value is uniform. MachineOperand *SBase = getNamedOperand(MI, AMDGPU::OpName::sbase); if (SBase && !RI.isSGPRClass(MRI.getRegClass(SBase->getReg()))) { unsigned SGPR = readlaneVGPRToSGPR(SBase->getReg(), MI, MRI); SBase->setReg(SGPR); } } void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DstRC, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const { unsigned OpReg = Op.getReg(); unsigned OpSubReg = Op.getSubReg(); const TargetRegisterClass *OpRC = RI.getSubClassWithSubReg( RI.getRegClassForReg(MRI, OpReg), OpSubReg); // Check if operand is already the correct register class. if (DstRC == OpRC) return; unsigned DstReg = MRI.createVirtualRegister(DstRC); MachineInstr *Copy = BuildMI(InsertMBB, I, DL, get(AMDGPU::COPY), DstReg).add(Op); Op.setReg(DstReg); Op.setSubReg(0); MachineInstr *Def = MRI.getVRegDef(OpReg); if (!Def) return; // Try to eliminate the copy if it is copying an immediate value. if (Def->isMoveImmediate()) FoldImmediate(*Copy, *Def, OpReg, &MRI); } void SIInstrInfo::legalizeOperands(MachineInstr &MI) const { MachineFunction &MF = *MI.getParent()->getParent(); MachineRegisterInfo &MRI = MF.getRegInfo(); // Legalize VOP2 if (isVOP2(MI) || isVOPC(MI)) { legalizeOperandsVOP2(MRI, MI); return; } // Legalize VOP3 if (isVOP3(MI)) { legalizeOperandsVOP3(MRI, MI); return; } // Legalize SMRD if (isSMRD(MI)) { legalizeOperandsSMRD(MRI, MI); return; } // Legalize REG_SEQUENCE and PHI // The register class of the operands much be the same type as the register // class of the output. if (MI.getOpcode() == AMDGPU::PHI) { const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr; for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) { if (!MI.getOperand(i).isReg() || !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg())) continue; const TargetRegisterClass *OpRC = MRI.getRegClass(MI.getOperand(i).getReg()); if (RI.hasVGPRs(OpRC)) { VRC = OpRC; } else { SRC = OpRC; } } // If any of the operands are VGPR registers, then they all most be // otherwise we will create illegal VGPR->SGPR copies when legalizing // them. if (VRC || !RI.isSGPRClass(getOpRegClass(MI, 0))) { if (!VRC) { assert(SRC); VRC = RI.getEquivalentVGPRClass(SRC); } RC = VRC; } else { RC = SRC; } // Update all the operands so they have the same type. for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { MachineOperand &Op = MI.getOperand(I); if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg())) continue; // MI is a PHI instruction. MachineBasicBlock *InsertBB = MI.getOperand(I + 1).getMBB(); MachineBasicBlock::iterator Insert = InsertBB->getFirstTerminator(); // Avoid creating no-op copies with the same src and dst reg class. These // confuse some of the machine passes. legalizeGenericOperand(*InsertBB, Insert, RC, Op, MRI, MI.getDebugLoc()); } } // REG_SEQUENCE doesn't really require operand legalization, but if one has a // VGPR dest type and SGPR sources, insert copies so all operands are // VGPRs. This seems to help operand folding / the register coalescer. if (MI.getOpcode() == AMDGPU::REG_SEQUENCE) { MachineBasicBlock *MBB = MI.getParent(); const TargetRegisterClass *DstRC = getOpRegClass(MI, 0); if (RI.hasVGPRs(DstRC)) { // Update all the operands so they are VGPR register classes. These may // not be the same register class because REG_SEQUENCE supports mixing // subregister index types e.g. sub0_sub1 + sub2 + sub3 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { MachineOperand &Op = MI.getOperand(I); if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg())) continue; const TargetRegisterClass *OpRC = MRI.getRegClass(Op.getReg()); const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(OpRC); if (VRC == OpRC) continue; legalizeGenericOperand(*MBB, MI, VRC, Op, MRI, MI.getDebugLoc()); Op.setIsKill(); } } return; } // Legalize INSERT_SUBREG // src0 must have the same register class as dst if (MI.getOpcode() == AMDGPU::INSERT_SUBREG) { unsigned Dst = MI.getOperand(0).getReg(); unsigned Src0 = MI.getOperand(1).getReg(); const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0); if (DstRC != Src0RC) { MachineBasicBlock *MBB = MI.getParent(); MachineOperand &Op = MI.getOperand(1); legalizeGenericOperand(*MBB, MI, DstRC, Op, MRI, MI.getDebugLoc()); } return; } // Legalize MIMG and MUBUF/MTBUF for shaders. // // Shaders only generate MUBUF/MTBUF instructions via intrinsics or via // scratch memory access. In both cases, the legalization never involves // conversion to the addr64 form. if (isMIMG(MI) || (AMDGPU::isShader(MF.getFunction().getCallingConv()) && (isMUBUF(MI) || isMTBUF(MI)))) { MachineOperand *SRsrc = getNamedOperand(MI, AMDGPU::OpName::srsrc); if (SRsrc && !RI.isSGPRClass(MRI.getRegClass(SRsrc->getReg()))) { unsigned SGPR = readlaneVGPRToSGPR(SRsrc->getReg(), MI, MRI); SRsrc->setReg(SGPR); } MachineOperand *SSamp = getNamedOperand(MI, AMDGPU::OpName::ssamp); if (SSamp && !RI.isSGPRClass(MRI.getRegClass(SSamp->getReg()))) { unsigned SGPR = readlaneVGPRToSGPR(SSamp->getReg(), MI, MRI); SSamp->setReg(SGPR); } return; } // Legalize MUBUF* instructions by converting to addr64 form. // FIXME: If we start using the non-addr64 instructions for compute, we // may need to legalize them as above. This especially applies to the // buffer_load_format_* variants and variants with idxen (or bothen). int SRsrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc); if (SRsrcIdx != -1) { // We have an MUBUF instruction MachineOperand *SRsrc = &MI.getOperand(SRsrcIdx); unsigned SRsrcRC = get(MI.getOpcode()).OpInfo[SRsrcIdx].RegClass; if (RI.getCommonSubClass(MRI.getRegClass(SRsrc->getReg()), RI.getRegClass(SRsrcRC))) { // The operands are legal. // FIXME: We may need to legalize operands besided srsrc. return; } MachineBasicBlock &MBB = *MI.getParent(); // Extract the ptr from the resource descriptor. unsigned SRsrcPtr = buildExtractSubReg(MI, MRI, *SRsrc, &AMDGPU::VReg_128RegClass, AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass); // Create an empty resource descriptor unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass); uint64_t RsrcDataFormat = getDefaultRsrcDataFormat(); // Zero64 = 0 BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::S_MOV_B64), Zero64) .addImm(0); // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0} BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::S_MOV_B32), SRsrcFormatLo) .addImm(RsrcDataFormat & 0xFFFFFFFF); // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32} BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::S_MOV_B32), SRsrcFormatHi) .addImm(RsrcDataFormat >> 32); // NewSRsrc = {Zero64, SRsrcFormat} BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewSRsrc) .addReg(Zero64) .addImm(AMDGPU::sub0_sub1) .addReg(SRsrcFormatLo) .addImm(AMDGPU::sub2) .addReg(SRsrcFormatHi) .addImm(AMDGPU::sub3); MachineOperand *VAddr = getNamedOperand(MI, AMDGPU::OpName::vaddr); unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); if (VAddr) { // This is already an ADDR64 instruction so we need to add the pointer // extracted from the resource descriptor to the current value of VAddr. unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); // NewVaddrLo = SRsrcPtr:sub0 + VAddr:sub0 DebugLoc DL = MI.getDebugLoc(); BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), NewVAddrLo) .addReg(SRsrcPtr, 0, AMDGPU::sub0) .addReg(VAddr->getReg(), 0, AMDGPU::sub0); // NewVaddrHi = SRsrcPtr:sub1 + VAddr:sub1 BuildMI(MBB, MI, DL, get(AMDGPU::V_ADDC_U32_e32), NewVAddrHi) .addReg(SRsrcPtr, 0, AMDGPU::sub1) .addReg(VAddr->getReg(), 0, AMDGPU::sub1); // NewVaddr = {NewVaddrHi, NewVaddrLo} BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr) .addReg(NewVAddrLo) .addImm(AMDGPU::sub0) .addReg(NewVAddrHi) .addImm(AMDGPU::sub1); } else { // This instructions is the _OFFSET variant, so we need to convert it to // ADDR64. assert(MBB.getParent()->getSubtarget().getGeneration() < SISubtarget::VOLCANIC_ISLANDS && "FIXME: Need to emit flat atomics here"); MachineOperand *VData = getNamedOperand(MI, AMDGPU::OpName::vdata); MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset); MachineOperand *SOffset = getNamedOperand(MI, AMDGPU::OpName::soffset); unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI.getOpcode()); // Atomics rith return have have an additional tied operand and are // missing some of the special bits. MachineOperand *VDataIn = getNamedOperand(MI, AMDGPU::OpName::vdata_in); MachineInstr *Addr64; if (!VDataIn) { // Regular buffer load / store. MachineInstrBuilder MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode)) .add(*VData) .addReg(AMDGPU::NoRegister) // Dummy value for vaddr. // This will be replaced later // with the new value of vaddr. .add(*SRsrc) .add(*SOffset) .add(*Offset); // Atomics do not have this operand. if (const MachineOperand *GLC = getNamedOperand(MI, AMDGPU::OpName::glc)) { MIB.addImm(GLC->getImm()); } MIB.addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc)); if (const MachineOperand *TFE = getNamedOperand(MI, AMDGPU::OpName::tfe)) { MIB.addImm(TFE->getImm()); } MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); Addr64 = MIB; } else { // Atomics with return. Addr64 = BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode)) .add(*VData) .add(*VDataIn) .addReg(AMDGPU::NoRegister) // Dummy value for vaddr. // This will be replaced later // with the new value of vaddr. .add(*SRsrc) .add(*SOffset) .add(*Offset) .addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc)) .setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); } MI.removeFromParent(); // NewVaddr = {NewVaddrHi, NewVaddrLo} BuildMI(MBB, Addr64, Addr64->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr) .addReg(SRsrcPtr, 0, AMDGPU::sub0) .addImm(AMDGPU::sub0) .addReg(SRsrcPtr, 0, AMDGPU::sub1) .addImm(AMDGPU::sub1); VAddr = getNamedOperand(*Addr64, AMDGPU::OpName::vaddr); SRsrc = getNamedOperand(*Addr64, AMDGPU::OpName::srsrc); } // Update the instruction to use NewVaddr VAddr->setReg(NewVAddr); // Update the instruction to use NewSRsrc SRsrc->setReg(NewSRsrc); } } void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { SetVectorType Worklist; Worklist.insert(&TopInst); while (!Worklist.empty()) { MachineInstr &Inst = *Worklist.pop_back_val(); MachineBasicBlock *MBB = Inst.getParent(); MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); unsigned Opcode = Inst.getOpcode(); unsigned NewOpcode = getVALUOp(Inst); // Handle some special cases switch (Opcode) { default: break; case AMDGPU::S_ADD_U64_PSEUDO: case AMDGPU::S_SUB_U64_PSEUDO: splitScalar64BitAddSub(Worklist, Inst); Inst.eraseFromParent(); continue; case AMDGPU::S_ADD_I32: case AMDGPU::S_SUB_I32: // FIXME: The u32 versions currently selected use the carry. if (moveScalarAddSub(Worklist, Inst)) continue; // Default handling break; case AMDGPU::S_AND_B64: splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_AND_B32_e64); Inst.eraseFromParent(); continue; case AMDGPU::S_OR_B64: splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_OR_B32_e64); Inst.eraseFromParent(); continue; case AMDGPU::S_XOR_B64: splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_XOR_B32_e64); Inst.eraseFromParent(); continue; case AMDGPU::S_NOT_B64: splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::V_NOT_B32_e32); Inst.eraseFromParent(); continue; case AMDGPU::S_BCNT1_I32_B64: splitScalar64BitBCNT(Worklist, Inst); Inst.eraseFromParent(); continue; case AMDGPU::S_BFE_I64: splitScalar64BitBFE(Worklist, Inst); Inst.eraseFromParent(); continue; case AMDGPU::S_LSHL_B32: if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { NewOpcode = AMDGPU::V_LSHLREV_B32_e64; swapOperands(Inst); } break; case AMDGPU::S_ASHR_I32: if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { NewOpcode = AMDGPU::V_ASHRREV_I32_e64; swapOperands(Inst); } break; case AMDGPU::S_LSHR_B32: if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { NewOpcode = AMDGPU::V_LSHRREV_B32_e64; swapOperands(Inst); } break; case AMDGPU::S_LSHL_B64: if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { NewOpcode = AMDGPU::V_LSHLREV_B64; swapOperands(Inst); } break; case AMDGPU::S_ASHR_I64: if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { NewOpcode = AMDGPU::V_ASHRREV_I64; swapOperands(Inst); } break; case AMDGPU::S_LSHR_B64: if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { NewOpcode = AMDGPU::V_LSHRREV_B64; swapOperands(Inst); } break; case AMDGPU::S_ABS_I32: lowerScalarAbs(Worklist, Inst); Inst.eraseFromParent(); continue; case AMDGPU::S_CBRANCH_SCC0: case AMDGPU::S_CBRANCH_SCC1: // Clear unused bits of vcc BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(AMDGPU::S_AND_B64), AMDGPU::VCC) .addReg(AMDGPU::EXEC) .addReg(AMDGPU::VCC); break; case AMDGPU::S_BFE_U64: case AMDGPU::S_BFM_B64: llvm_unreachable("Moving this op to VALU not implemented"); case AMDGPU::S_PACK_LL_B32_B16: case AMDGPU::S_PACK_LH_B32_B16: case AMDGPU::S_PACK_HH_B32_B16: movePackToVALU(Worklist, MRI, Inst); Inst.eraseFromParent(); continue; case AMDGPU::S_XNOR_B32: lowerScalarXnor(Worklist, Inst); Inst.eraseFromParent(); continue; case AMDGPU::S_XNOR_B64: splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32); Inst.eraseFromParent(); continue; case AMDGPU::S_BUFFER_LOAD_DWORD_SGPR: { unsigned VDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); const MachineOperand *VAddr = getNamedOperand(Inst, AMDGPU::OpName::soff); auto Add = MRI.getUniqueVRegDef(VAddr->getReg()); unsigned Offset = 0; // FIXME: This isn't safe because the addressing mode doesn't work // correctly if vaddr is negative. // // FIXME: Should probably be done somewhere else, maybe SIFoldOperands. // // See if we can extract an immediate offset by recognizing one of these: // V_ADD_I32_e32 dst, imm, src1 // V_ADD_I32_e32 dst, (S_MOV_B32 imm), src1 // V_ADD will be removed by "Remove dead machine instructions". if (Add && (Add->getOpcode() == AMDGPU::V_ADD_I32_e32 || Add->getOpcode() == AMDGPU::V_ADD_U32_e64)) { static const unsigned SrcNames[2] = { AMDGPU::OpName::src0, AMDGPU::OpName::src1, }; // Find a literal offset in one of source operands. for (int i = 0; i < 2; i++) { const MachineOperand *Src = getNamedOperand(*Add, SrcNames[i]); if (Src->isReg()) { auto Mov = MRI.getUniqueVRegDef(Src->getReg()); if (Mov && Mov->getOpcode() == AMDGPU::S_MOV_B32) Src = &Mov->getOperand(1); } if (Src) { if (Src->isImm()) Offset = Src->getImm(); else if (Src->isCImm()) Offset = Src->getCImm()->getZExtValue(); } if (Offset && isLegalMUBUFImmOffset(Offset)) { VAddr = getNamedOperand(*Add, SrcNames[!i]); break; } Offset = 0; } } - BuildMI(*MBB, Inst, Inst.getDebugLoc(), + MachineInstr *NewInstr = + BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(AMDGPU::BUFFER_LOAD_DWORD_OFFEN), VDst) .add(*VAddr) // vaddr .add(*getNamedOperand(Inst, AMDGPU::OpName::sbase)) // srsrc .addImm(0) // soffset .addImm(Offset) // offset .addImm(getNamedOperand(Inst, AMDGPU::OpName::glc)->getImm()) .addImm(0) // slc .addImm(0) // tfe - .setMemRefs(Inst.memoperands_begin(), Inst.memoperands_end()); + .setMemRefs(Inst.memoperands_begin(), Inst.memoperands_end()) + .getInstr(); MRI.replaceRegWith(getNamedOperand(Inst, AMDGPU::OpName::sdst)->getReg(), VDst); addUsersToMoveToVALUWorklist(VDst, MRI, Worklist); Inst.eraseFromParent(); + + // Legalize all operands other than the offset. Notably, convert the srsrc + // into SGPRs using v_readfirstlane if needed. + legalizeOperands(*NewInstr); continue; } } if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) { // We cannot move this instruction to the VALU, so we should try to // legalize its operands instead. legalizeOperands(Inst); continue; } // Use the new VALU Opcode. const MCInstrDesc &NewDesc = get(NewOpcode); Inst.setDesc(NewDesc); // Remove any references to SCC. Vector instructions can't read from it, and // We're just about to add the implicit use / defs of VCC, and we don't want // both. for (unsigned i = Inst.getNumOperands() - 1; i > 0; --i) { MachineOperand &Op = Inst.getOperand(i); if (Op.isReg() && Op.getReg() == AMDGPU::SCC) { Inst.RemoveOperand(i); addSCCDefUsersToVALUWorklist(Inst, Worklist); } } if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) { // We are converting these to a BFE, so we need to add the missing // operands for the size and offset. unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16; Inst.addOperand(MachineOperand::CreateImm(0)); Inst.addOperand(MachineOperand::CreateImm(Size)); } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) { // The VALU version adds the second operand to the result, so insert an // extra 0 operand. Inst.addOperand(MachineOperand::CreateImm(0)); } Inst.addImplicitDefUseOperands(*Inst.getParent()->getParent()); if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) { const MachineOperand &OffsetWidthOp = Inst.getOperand(2); // If we need to move this to VGPRs, we need to unpack the second operand // back into the 2 separate ones for bit offset and width. assert(OffsetWidthOp.isImm() && "Scalar BFE is only implemented for constant width and offset"); uint32_t Imm = OffsetWidthOp.getImm(); uint32_t Offset = Imm & 0x3f; // Extract bits [5:0]. uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16]. Inst.RemoveOperand(2); // Remove old immediate. Inst.addOperand(MachineOperand::CreateImm(Offset)); Inst.addOperand(MachineOperand::CreateImm(BitWidth)); } bool HasDst = Inst.getOperand(0).isReg() && Inst.getOperand(0).isDef(); unsigned NewDstReg = AMDGPU::NoRegister; if (HasDst) { unsigned DstReg = Inst.getOperand(0).getReg(); if (TargetRegisterInfo::isPhysicalRegister(DstReg)) continue; // Update the destination register class. const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(Inst); if (!NewDstRC) continue; if (Inst.isCopy() && TargetRegisterInfo::isVirtualRegister(Inst.getOperand(1).getReg()) && NewDstRC == RI.getRegClassForReg(MRI, Inst.getOperand(1).getReg())) { // Instead of creating a copy where src and dst are the same register // class, we just replace all uses of dst with src. These kinds of // copies interfere with the heuristics MachineSink uses to decide // whether or not to split a critical edge. Since the pass assumes // that copies will end up as machine instructions and not be // eliminated. addUsersToMoveToVALUWorklist(DstReg, MRI, Worklist); MRI.replaceRegWith(DstReg, Inst.getOperand(1).getReg()); MRI.clearKillFlags(Inst.getOperand(1).getReg()); Inst.getOperand(0).setReg(DstReg); continue; } NewDstReg = MRI.createVirtualRegister(NewDstRC); MRI.replaceRegWith(DstReg, NewDstReg); } // Legalize the operands legalizeOperands(Inst); if (HasDst) addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist); } } // Add/sub require special handling to deal with carry outs. bool SIInstrInfo::moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst) const { if (ST.hasAddNoCarry()) { // Assume there is no user of scc since we don't select this in that case. // Since scc isn't used, it doesn't really matter if the i32 or u32 variant // is used. MachineBasicBlock &MBB = *Inst.getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); unsigned OldDstReg = Inst.getOperand(0).getReg(); unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); unsigned Opc = Inst.getOpcode(); assert(Opc == AMDGPU::S_ADD_I32 || Opc == AMDGPU::S_SUB_I32); unsigned NewOpc = Opc == AMDGPU::S_ADD_I32 ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64; assert(Inst.getOperand(3).getReg() == AMDGPU::SCC); Inst.RemoveOperand(3); Inst.setDesc(get(NewOpc)); Inst.addImplicitDefUseOperands(*MBB.getParent()); MRI.replaceRegWith(OldDstReg, ResultReg); legalizeOperands(Inst); addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); return true; } return false; } void SIInstrInfo::lowerScalarAbs(SetVectorType &Worklist, MachineInstr &Inst) const { MachineBasicBlock &MBB = *Inst.getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); MachineBasicBlock::iterator MII = Inst; DebugLoc DL = Inst.getDebugLoc(); MachineOperand &Dest = Inst.getOperand(0); MachineOperand &Src = Inst.getOperand(1); unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); unsigned SubOp = ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_I32_e32; BuildMI(MBB, MII, DL, get(SubOp), TmpReg) .addImm(0) .addReg(Src.getReg()); BuildMI(MBB, MII, DL, get(AMDGPU::V_MAX_I32_e64), ResultReg) .addReg(Src.getReg()) .addReg(TmpReg); MRI.replaceRegWith(Dest.getReg(), ResultReg); addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); } void SIInstrInfo::lowerScalarXnor(SetVectorType &Worklist, MachineInstr &Inst) const { MachineBasicBlock &MBB = *Inst.getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); MachineBasicBlock::iterator MII = Inst; const DebugLoc &DL = Inst.getDebugLoc(); MachineOperand &Dest = Inst.getOperand(0); MachineOperand &Src0 = Inst.getOperand(1); MachineOperand &Src1 = Inst.getOperand(2); legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src0, MRI, DL); legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src1, MRI, DL); unsigned Xor = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); BuildMI(MBB, MII, DL, get(AMDGPU::V_XOR_B32_e64), Xor) .add(Src0) .add(Src1); unsigned Not = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); BuildMI(MBB, MII, DL, get(AMDGPU::V_NOT_B32_e64), Not) .addReg(Xor); MRI.replaceRegWith(Dest.getReg(), Not); addUsersToMoveToVALUWorklist(Not, MRI, Worklist); } void SIInstrInfo::splitScalar64BitUnaryOp( SetVectorType &Worklist, MachineInstr &Inst, unsigned Opcode) const { MachineBasicBlock &MBB = *Inst.getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); MachineOperand &Dest = Inst.getOperand(0); MachineOperand &Src0 = Inst.getOperand(1); DebugLoc DL = Inst.getDebugLoc(); MachineBasicBlock::iterator MII = Inst; const MCInstrDesc &InstDesc = get(Opcode); const TargetRegisterClass *Src0RC = Src0.isReg() ? MRI.getRegClass(Src0.getReg()) : &AMDGPU::SGPR_32RegClass; const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0); MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub0, Src0SubRC); const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC); const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0); unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC); BuildMI(MBB, MII, DL, InstDesc, DestSub0).add(SrcReg0Sub0); MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub1, Src0SubRC); unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC); BuildMI(MBB, MII, DL, InstDesc, DestSub1).add(SrcReg0Sub1); unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC); BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) .addReg(DestSub0) .addImm(AMDGPU::sub0) .addReg(DestSub1) .addImm(AMDGPU::sub1); MRI.replaceRegWith(Dest.getReg(), FullDestReg); // We don't need to legalizeOperands here because for a single operand, src0 // will support any kind of input. // Move all users of this moved value. addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist); } void SIInstrInfo::splitScalar64BitAddSub( SetVectorType &Worklist, MachineInstr &Inst) const { bool IsAdd = (Inst.getOpcode() == AMDGPU::S_ADD_U64_PSEUDO); MachineBasicBlock &MBB = *Inst.getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); unsigned FullDestReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); unsigned DestSub0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); unsigned DestSub1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); unsigned CarryReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); unsigned DeadCarryReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); MachineOperand &Dest = Inst.getOperand(0); MachineOperand &Src0 = Inst.getOperand(1); MachineOperand &Src1 = Inst.getOperand(2); const DebugLoc &DL = Inst.getDebugLoc(); MachineBasicBlock::iterator MII = Inst; const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0.getReg()); const TargetRegisterClass *Src1RC = MRI.getRegClass(Src1.getReg()); const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0); const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0); MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub0, Src0SubRC); MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, AMDGPU::sub0, Src1SubRC); MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub1, Src0SubRC); MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, AMDGPU::sub1, Src1SubRC); unsigned LoOpc = IsAdd ? AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64; MachineInstr *LoHalf = BuildMI(MBB, MII, DL, get(LoOpc), DestSub0) .addReg(CarryReg, RegState::Define) .add(SrcReg0Sub0) .add(SrcReg1Sub0); unsigned HiOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64; MachineInstr *HiHalf = BuildMI(MBB, MII, DL, get(HiOpc), DestSub1) .addReg(DeadCarryReg, RegState::Define | RegState::Dead) .add(SrcReg0Sub1) .add(SrcReg1Sub1) .addReg(CarryReg, RegState::Kill); BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) .addReg(DestSub0) .addImm(AMDGPU::sub0) .addReg(DestSub1) .addImm(AMDGPU::sub1); MRI.replaceRegWith(Dest.getReg(), FullDestReg); // Try to legalize the operands in case we need to swap the order to keep it // valid. legalizeOperands(*LoHalf); legalizeOperands(*HiHalf); // Move all users of this moved vlaue. addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist); } void SIInstrInfo::splitScalar64BitBinaryOp( SetVectorType &Worklist, MachineInstr &Inst, unsigned Opcode) const { MachineBasicBlock &MBB = *Inst.getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); MachineOperand &Dest = Inst.getOperand(0); MachineOperand &Src0 = Inst.getOperand(1); MachineOperand &Src1 = Inst.getOperand(2); DebugLoc DL = Inst.getDebugLoc(); MachineBasicBlock::iterator MII = Inst; const MCInstrDesc &InstDesc = get(Opcode); const TargetRegisterClass *Src0RC = Src0.isReg() ? MRI.getRegClass(Src0.getReg()) : &AMDGPU::SGPR_32RegClass; const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0); const TargetRegisterClass *Src1RC = Src1.isReg() ? MRI.getRegClass(Src1.getReg()) : &AMDGPU::SGPR_32RegClass; const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0); MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub0, Src0SubRC); MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, AMDGPU::sub0, Src1SubRC); const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC); const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0); unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC); MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0) .add(SrcReg0Sub0) .add(SrcReg1Sub0); MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub1, Src0SubRC); MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, AMDGPU::sub1, Src1SubRC); unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC); MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1) .add(SrcReg0Sub1) .add(SrcReg1Sub1); unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC); BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) .addReg(DestSub0) .addImm(AMDGPU::sub0) .addReg(DestSub1) .addImm(AMDGPU::sub1); MRI.replaceRegWith(Dest.getReg(), FullDestReg); // Try to legalize the operands in case we need to swap the order to keep it // valid. legalizeOperands(LoHalf); legalizeOperands(HiHalf); // Move all users of this moved vlaue. addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist); } void SIInstrInfo::splitScalar64BitBCNT( SetVectorType &Worklist, MachineInstr &Inst) const { MachineBasicBlock &MBB = *Inst.getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); MachineBasicBlock::iterator MII = Inst; DebugLoc DL = Inst.getDebugLoc(); MachineOperand &Dest = Inst.getOperand(0); MachineOperand &Src = Inst.getOperand(1); const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e64); const TargetRegisterClass *SrcRC = Src.isReg() ? MRI.getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass; unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0); MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, AMDGPU::sub0, SrcSubRC); MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, AMDGPU::sub1, SrcSubRC); BuildMI(MBB, MII, DL, InstDesc, MidReg).add(SrcRegSub0).addImm(0); BuildMI(MBB, MII, DL, InstDesc, ResultReg).add(SrcRegSub1).addReg(MidReg); MRI.replaceRegWith(Dest.getReg(), ResultReg); // We don't need to legalize operands here. src0 for etiher instruction can be // an SGPR, and the second input is unused or determined here. addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); } void SIInstrInfo::splitScalar64BitBFE(SetVectorType &Worklist, MachineInstr &Inst) const { MachineBasicBlock &MBB = *Inst.getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); MachineBasicBlock::iterator MII = Inst; DebugLoc DL = Inst.getDebugLoc(); MachineOperand &Dest = Inst.getOperand(0); uint32_t Imm = Inst.getOperand(2).getImm(); uint32_t Offset = Imm & 0x3f; // Extract bits [5:0]. uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16]. (void) Offset; // Only sext_inreg cases handled. assert(Inst.getOpcode() == AMDGPU::S_BFE_I64 && BitWidth <= 32 && Offset == 0 && "Not implemented"); if (BitWidth < 32) { unsigned MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); unsigned MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); BuildMI(MBB, MII, DL, get(AMDGPU::V_BFE_I32), MidRegLo) .addReg(Inst.getOperand(1).getReg(), 0, AMDGPU::sub0) .addImm(0) .addImm(BitWidth); BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e32), MidRegHi) .addImm(31) .addReg(MidRegLo); BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg) .addReg(MidRegLo) .addImm(AMDGPU::sub0) .addReg(MidRegHi) .addImm(AMDGPU::sub1); MRI.replaceRegWith(Dest.getReg(), ResultReg); addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); return; } MachineOperand &Src = Inst.getOperand(1); unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e64), TmpReg) .addImm(31) .addReg(Src.getReg(), 0, AMDGPU::sub0); BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg) .addReg(Src.getReg(), 0, AMDGPU::sub0) .addImm(AMDGPU::sub0) .addReg(TmpReg) .addImm(AMDGPU::sub1); MRI.replaceRegWith(Dest.getReg(), ResultReg); addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); } void SIInstrInfo::addUsersToMoveToVALUWorklist( unsigned DstReg, MachineRegisterInfo &MRI, SetVectorType &Worklist) const { for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg), E = MRI.use_end(); I != E;) { MachineInstr &UseMI = *I->getParent(); if (!canReadVGPR(UseMI, I.getOperandNo())) { Worklist.insert(&UseMI); do { ++I; } while (I != E && I->getParent() == &UseMI); } else { ++I; } } } void SIInstrInfo::movePackToVALU(SetVectorType &Worklist, MachineRegisterInfo &MRI, MachineInstr &Inst) const { unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); MachineBasicBlock *MBB = Inst.getParent(); MachineOperand &Src0 = Inst.getOperand(1); MachineOperand &Src1 = Inst.getOperand(2); const DebugLoc &DL = Inst.getDebugLoc(); switch (Inst.getOpcode()) { case AMDGPU::S_PACK_LL_B32_B16: { unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); // FIXME: Can do a lot better if we know the high bits of src0 or src1 are // 0. BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg) .addImm(0xffff); BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_B32_e64), TmpReg) .addReg(ImmReg, RegState::Kill) .add(Src0); BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHL_OR_B32), ResultReg) .add(Src1) .addImm(16) .addReg(TmpReg, RegState::Kill); break; } case AMDGPU::S_PACK_LH_B32_B16: { unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg) .addImm(0xffff); BuildMI(*MBB, Inst, DL, get(AMDGPU::V_BFI_B32), ResultReg) .addReg(ImmReg, RegState::Kill) .add(Src0) .add(Src1); break; } case AMDGPU::S_PACK_HH_B32_B16: { unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHRREV_B32_e64), TmpReg) .addImm(16) .add(Src0); BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg) .addImm(0xffff0000); BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_OR_B32), ResultReg) .add(Src1) .addReg(ImmReg, RegState::Kill) .addReg(TmpReg, RegState::Kill); break; } default: llvm_unreachable("unhandled s_pack_* instruction"); } MachineOperand &Dest = Inst.getOperand(0); MRI.replaceRegWith(Dest.getReg(), ResultReg); addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); } void SIInstrInfo::addSCCDefUsersToVALUWorklist( MachineInstr &SCCDefInst, SetVectorType &Worklist) const { // This assumes that all the users of SCC are in the same block // as the SCC def. for (MachineInstr &MI : make_range(MachineBasicBlock::iterator(SCCDefInst), SCCDefInst.getParent()->end())) { // Exit if we find another SCC def. if (MI.findRegisterDefOperandIdx(AMDGPU::SCC) != -1) return; if (MI.findRegisterUseOperandIdx(AMDGPU::SCC) != -1) Worklist.insert(&MI); } } const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass( const MachineInstr &Inst) const { const TargetRegisterClass *NewDstRC = getOpRegClass(Inst, 0); switch (Inst.getOpcode()) { // For target instructions, getOpRegClass just returns the virtual register // class associated with the operand, so we need to find an equivalent VGPR // register class in order to move the instruction to the VALU. case AMDGPU::COPY: case AMDGPU::PHI: case AMDGPU::REG_SEQUENCE: case AMDGPU::INSERT_SUBREG: case AMDGPU::WQM: case AMDGPU::WWM: if (RI.hasVGPRs(NewDstRC)) return nullptr; NewDstRC = RI.getEquivalentVGPRClass(NewDstRC); if (!NewDstRC) return nullptr; return NewDstRC; default: return NewDstRC; } } // Find the one SGPR operand we are allowed to use. unsigned SIInstrInfo::findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const { const MCInstrDesc &Desc = MI.getDesc(); // Find the one SGPR operand we are allowed to use. // // First we need to consider the instruction's operand requirements before // legalizing. Some operands are required to be SGPRs, such as implicit uses // of VCC, but we are still bound by the constant bus requirement to only use // one. // // If the operand's class is an SGPR, we can never move it. unsigned SGPRReg = findImplicitSGPRRead(MI); if (SGPRReg != AMDGPU::NoRegister) return SGPRReg; unsigned UsedSGPRs[3] = { AMDGPU::NoRegister }; const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); for (unsigned i = 0; i < 3; ++i) { int Idx = OpIndices[i]; if (Idx == -1) break; const MachineOperand &MO = MI.getOperand(Idx); if (!MO.isReg()) continue; // Is this operand statically required to be an SGPR based on the operand // constraints? const TargetRegisterClass *OpRC = RI.getRegClass(Desc.OpInfo[Idx].RegClass); bool IsRequiredSGPR = RI.isSGPRClass(OpRC); if (IsRequiredSGPR) return MO.getReg(); // If this could be a VGPR or an SGPR, Check the dynamic register class. unsigned Reg = MO.getReg(); const TargetRegisterClass *RegRC = MRI.getRegClass(Reg); if (RI.isSGPRClass(RegRC)) UsedSGPRs[i] = Reg; } // We don't have a required SGPR operand, so we have a bit more freedom in // selecting operands to move. // Try to select the most used SGPR. If an SGPR is equal to one of the // others, we choose that. // // e.g. // V_FMA_F32 v0, s0, s0, s0 -> No moves // V_FMA_F32 v0, s0, s1, s0 -> Move s1 // TODO: If some of the operands are 64-bit SGPRs and some 32, we should // prefer those. if (UsedSGPRs[0] != AMDGPU::NoRegister) { if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2]) SGPRReg = UsedSGPRs[0]; } if (SGPRReg == AMDGPU::NoRegister && UsedSGPRs[1] != AMDGPU::NoRegister) { if (UsedSGPRs[1] == UsedSGPRs[2]) SGPRReg = UsedSGPRs[1]; } return SGPRReg; } MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI, unsigned OperandName) const { int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName); if (Idx == -1) return nullptr; return &MI.getOperand(Idx); } uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const { uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT; if (ST.isAmdHsaOS()) { // Set ATC = 1. GFX9 doesn't have this bit. if (ST.getGeneration() <= SISubtarget::VOLCANIC_ISLANDS) RsrcDataFormat |= (1ULL << 56); // Set MTYPE = 2 (MTYPE_UC = uncached). GFX9 doesn't have this. // BTW, it disables TC L2 and therefore decreases performance. if (ST.getGeneration() == SISubtarget::VOLCANIC_ISLANDS) RsrcDataFormat |= (2ULL << 59); } return RsrcDataFormat; } uint64_t SIInstrInfo::getScratchRsrcWords23() const { uint64_t Rsrc23 = getDefaultRsrcDataFormat() | AMDGPU::RSRC_TID_ENABLE | 0xffffffff; // Size; // GFX9 doesn't have ELEMENT_SIZE. if (ST.getGeneration() <= SISubtarget::VOLCANIC_ISLANDS) { uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize()) - 1; Rsrc23 |= EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT; } // IndexStride = 64. Rsrc23 |= UINT64_C(3) << AMDGPU::RSRC_INDEX_STRIDE_SHIFT; // If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17]. // Clear them unless we want a huge stride. if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT; return Rsrc23; } bool SIInstrInfo::isLowLatencyInstruction(const MachineInstr &MI) const { unsigned Opc = MI.getOpcode(); return isSMRD(Opc); } bool SIInstrInfo::isHighLatencyInstruction(const MachineInstr &MI) const { unsigned Opc = MI.getOpcode(); return isMUBUF(Opc) || isMTBUF(Opc) || isMIMG(Opc); } unsigned SIInstrInfo::isStackAccess(const MachineInstr &MI, int &FrameIndex) const { const MachineOperand *Addr = getNamedOperand(MI, AMDGPU::OpName::vaddr); if (!Addr || !Addr->isFI()) return AMDGPU::NoRegister; assert(!MI.memoperands_empty() && (*MI.memoperands_begin())->getAddrSpace() == AMDGPUASI.PRIVATE_ADDRESS); FrameIndex = Addr->getIndex(); return getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg(); } unsigned SIInstrInfo::isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const { const MachineOperand *Addr = getNamedOperand(MI, AMDGPU::OpName::addr); assert(Addr && Addr->isFI()); FrameIndex = Addr->getIndex(); return getNamedOperand(MI, AMDGPU::OpName::data)->getReg(); } unsigned SIInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const { if (!MI.mayLoad()) return AMDGPU::NoRegister; if (isMUBUF(MI) || isVGPRSpill(MI)) return isStackAccess(MI, FrameIndex); if (isSGPRSpill(MI)) return isSGPRStackAccess(MI, FrameIndex); return AMDGPU::NoRegister; } unsigned SIInstrInfo::isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const { if (!MI.mayStore()) return AMDGPU::NoRegister; if (isMUBUF(MI) || isVGPRSpill(MI)) return isStackAccess(MI, FrameIndex); if (isSGPRSpill(MI)) return isSGPRStackAccess(MI, FrameIndex); return AMDGPU::NoRegister; } unsigned SIInstrInfo::getInstBundleSize(const MachineInstr &MI) const { unsigned Size = 0; MachineBasicBlock::const_instr_iterator I = MI.getIterator(); MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); while (++I != E && I->isInsideBundle()) { assert(!I->isBundle() && "No nested bundle!"); Size += getInstSizeInBytes(*I); } return Size; } unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { unsigned Opc = MI.getOpcode(); const MCInstrDesc &Desc = getMCOpcodeFromPseudo(Opc); unsigned DescSize = Desc.getSize(); // If we have a definitive size, we can use it. Otherwise we need to inspect // the operands to know the size. // // FIXME: Instructions that have a base 32-bit encoding report their size as // 4, even though they are really 8 bytes if they have a literal operand. if (DescSize != 0 && DescSize != 4) return DescSize; // 4-byte instructions may have a 32-bit literal encoded after them. Check // operands that coud ever be literals. if (isVALU(MI) || isSALU(MI)) { if (isFixedSize(MI)) return DescSize; int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); if (Src0Idx == -1) return 4; // No operands. if (isLiteralConstantLike(MI.getOperand(Src0Idx), Desc.OpInfo[Src0Idx])) return 8; int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); if (Src1Idx == -1) return 4; if (isLiteralConstantLike(MI.getOperand(Src1Idx), Desc.OpInfo[Src1Idx])) return 8; return 4; } if (DescSize == 4) return 4; switch (Opc) { case TargetOpcode::IMPLICIT_DEF: case TargetOpcode::KILL: case TargetOpcode::DBG_VALUE: case TargetOpcode::EH_LABEL: return 0; case TargetOpcode::BUNDLE: return getInstBundleSize(MI); case TargetOpcode::INLINEASM: { const MachineFunction *MF = MI.getParent()->getParent(); const char *AsmStr = MI.getOperand(0).getSymbolName(); return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo()); } default: llvm_unreachable("unable to find instruction size"); } } bool SIInstrInfo::mayAccessFlatAddressSpace(const MachineInstr &MI) const { if (!isFLAT(MI)) return false; if (MI.memoperands_empty()) return true; for (const MachineMemOperand *MMO : MI.memoperands()) { if (MMO->getAddrSpace() == AMDGPUASI.FLAT_ADDRESS) return true; } return false; } bool SIInstrInfo::isNonUniformBranchInstr(MachineInstr &Branch) const { return Branch.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO; } void SIInstrInfo::convertNonUniformIfRegion(MachineBasicBlock *IfEntry, MachineBasicBlock *IfEnd) const { MachineBasicBlock::iterator TI = IfEntry->getFirstTerminator(); assert(TI != IfEntry->end()); MachineInstr *Branch = &(*TI); MachineFunction *MF = IfEntry->getParent(); MachineRegisterInfo &MRI = IfEntry->getParent()->getRegInfo(); if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) { unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); MachineInstr *SIIF = BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_IF), DstReg) .add(Branch->getOperand(0)) .add(Branch->getOperand(1)); MachineInstr *SIEND = BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_END_CF)) .addReg(DstReg); IfEntry->erase(TI); IfEntry->insert(IfEntry->end(), SIIF); IfEnd->insert(IfEnd->getFirstNonPHI(), SIEND); } } void SIInstrInfo::convertNonUniformLoopRegion( MachineBasicBlock *LoopEntry, MachineBasicBlock *LoopEnd) const { MachineBasicBlock::iterator TI = LoopEnd->getFirstTerminator(); // We expect 2 terminators, one conditional and one unconditional. assert(TI != LoopEnd->end()); MachineInstr *Branch = &(*TI); MachineFunction *MF = LoopEnd->getParent(); MachineRegisterInfo &MRI = LoopEnd->getParent()->getRegInfo(); if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) { unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); unsigned BackEdgeReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); MachineInstrBuilder HeaderPHIBuilder = BuildMI(*(MF), Branch->getDebugLoc(), get(TargetOpcode::PHI), DstReg); for (MachineBasicBlock::pred_iterator PI = LoopEntry->pred_begin(), E = LoopEntry->pred_end(); PI != E; ++PI) { if (*PI == LoopEnd) { HeaderPHIBuilder.addReg(BackEdgeReg); } else { MachineBasicBlock *PMBB = *PI; unsigned ZeroReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); materializeImmediate(*PMBB, PMBB->getFirstTerminator(), DebugLoc(), ZeroReg, 0); HeaderPHIBuilder.addReg(ZeroReg); } HeaderPHIBuilder.addMBB(*PI); } MachineInstr *HeaderPhi = HeaderPHIBuilder; MachineInstr *SIIFBREAK = BuildMI(*(MF), Branch->getDebugLoc(), get(AMDGPU::SI_IF_BREAK), BackEdgeReg) .addReg(DstReg) .add(Branch->getOperand(0)); MachineInstr *SILOOP = BuildMI(*(MF), Branch->getDebugLoc(), get(AMDGPU::SI_LOOP)) .addReg(BackEdgeReg) .addMBB(LoopEntry); LoopEntry->insert(LoopEntry->begin(), HeaderPhi); LoopEnd->erase(TI); LoopEnd->insert(LoopEnd->end(), SIIFBREAK); LoopEnd->insert(LoopEnd->end(), SILOOP); } } ArrayRef> SIInstrInfo::getSerializableTargetIndices() const { static const std::pair TargetIndices[] = { {AMDGPU::TI_CONSTDATA_START, "amdgpu-constdata-start"}, {AMDGPU::TI_SCRATCH_RSRC_DWORD0, "amdgpu-scratch-rsrc-dword0"}, {AMDGPU::TI_SCRATCH_RSRC_DWORD1, "amdgpu-scratch-rsrc-dword1"}, {AMDGPU::TI_SCRATCH_RSRC_DWORD2, "amdgpu-scratch-rsrc-dword2"}, {AMDGPU::TI_SCRATCH_RSRC_DWORD3, "amdgpu-scratch-rsrc-dword3"}}; return makeArrayRef(TargetIndices); } /// This is used by the post-RA scheduler (SchedulePostRAList.cpp). The /// post-RA version of misched uses CreateTargetMIHazardRecognizer. ScheduleHazardRecognizer * SIInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const { return new GCNHazardRecognizer(DAG->MF); } /// This is the hazard recognizer used at -O0 by the PostRAHazardRecognizer /// pass. ScheduleHazardRecognizer * SIInstrInfo::CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const { return new GCNHazardRecognizer(MF); } std::pair SIInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { return std::make_pair(TF & MO_MASK, TF & ~MO_MASK); } ArrayRef> SIInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { static const std::pair TargetFlags[] = { { MO_GOTPCREL, "amdgpu-gotprel" }, { MO_GOTPCREL32_LO, "amdgpu-gotprel32-lo" }, { MO_GOTPCREL32_HI, "amdgpu-gotprel32-hi" }, { MO_REL32_LO, "amdgpu-rel32-lo" }, { MO_REL32_HI, "amdgpu-rel32-hi" } }; return makeArrayRef(TargetFlags); } bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI) const { return !MI.isTerminator() && MI.getOpcode() != AMDGPU::COPY && MI.modifiesRegister(AMDGPU::EXEC, &RI); } MachineInstrBuilder SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, unsigned DestReg) const { if (ST.hasAddNoCarry()) return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_U32_e64), DestReg); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); MRI.setRegAllocationHint(UnusedCarry, 0, AMDGPU::VCC); return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_I32_e64), DestReg) .addReg(UnusedCarry, RegState::Define | RegState::Dead); } bool SIInstrInfo::isKillTerminator(unsigned Opcode) { switch (Opcode) { case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR: case AMDGPU::SI_KILL_I1_TERMINATOR: return true; default: return false; } } const MCInstrDesc &SIInstrInfo::getKillTerminatorFromPseudo(unsigned Opcode) const { switch (Opcode) { case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO: return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR); case AMDGPU::SI_KILL_I1_PSEUDO: return get(AMDGPU::SI_KILL_I1_TERMINATOR); default: llvm_unreachable("invalid opcode, expected SI_KILL_*_PSEUDO"); } } Index: head/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp =================================================================== --- head/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp (revision 329982) +++ head/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp (revision 329983) @@ -1,757 +1,760 @@ //===-- PPCCTRLoops.cpp - Identify and generate CTR loops -----------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This pass identifies loops where we can generate the PPC branch instructions // that decrement and test the count register (CTR) (bdnz and friends). // // The pattern that defines the induction variable can changed depending on // prior optimizations. For example, the IndVarSimplify phase run by 'opt' // normalizes induction variables, and the Loop Strength Reduction pass // run by 'llc' may also make changes to the induction variable. // // Criteria for CTR loops: // - Countable loops (w/ ind. var for a trip count) // - Try inner-most loops first // - No nested CTR loops. // - No function calls in loops. // //===----------------------------------------------------------------------===// #include "PPC.h" #include "PPCSubtarget.h" #include "PPCTargetMachine.h" #include "PPCTargetTransformInfo.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/ValueHandle.h" #include "llvm/PassSupport.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" #ifndef NDEBUG #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #endif using namespace llvm; #define DEBUG_TYPE "ctrloops" #ifndef NDEBUG static cl::opt CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1)); #endif // The latency of mtctr is only justified if there are more than 4 // comparisons that will be removed as a result. static cl::opt SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden, cl::desc("Loops with a constant trip count smaller than " "this value will not use the count register.")); STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops"); namespace llvm { void initializePPCCTRLoopsPass(PassRegistry&); #ifndef NDEBUG void initializePPCCTRLoopsVerifyPass(PassRegistry&); #endif } namespace { struct PPCCTRLoops : public FunctionPass { #ifndef NDEBUG static int Counter; #endif public: static char ID; PPCCTRLoops() : FunctionPass(ID) { initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override; void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addPreserved(); AU.addRequired(); AU.addPreserved(); AU.addRequired(); AU.addRequired(); AU.addRequired(); } private: bool mightUseCTR(BasicBlock *BB); bool convertToCTRLoop(Loop *L); private: const PPCTargetMachine *TM; const PPCSubtarget *STI; const PPCTargetLowering *TLI; const DataLayout *DL; const TargetLibraryInfo *LibInfo; const TargetTransformInfo *TTI; LoopInfo *LI; ScalarEvolution *SE; DominatorTree *DT; bool PreserveLCSSA; TargetSchedModel SchedModel; }; char PPCCTRLoops::ID = 0; #ifndef NDEBUG int PPCCTRLoops::Counter = 0; #endif #ifndef NDEBUG struct PPCCTRLoopsVerify : public MachineFunctionPass { public: static char ID; PPCCTRLoopsVerify() : MachineFunctionPass(ID) { initializePPCCTRLoopsVerifyPass(*PassRegistry::getPassRegistry()); } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } bool runOnMachineFunction(MachineFunction &MF) override; private: MachineDominatorTree *MDT; }; char PPCCTRLoopsVerify::ID = 0; #endif // NDEBUG } // end anonymous namespace INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", false, false) FunctionPass *llvm::createPPCCTRLoops() { return new PPCCTRLoops(); } #ifndef NDEBUG INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify", "PowerPC CTR Loops Verify", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_END(PPCCTRLoopsVerify, "ppc-ctr-loops-verify", "PowerPC CTR Loops Verify", false, false) FunctionPass *llvm::createPPCCTRLoopsVerify() { return new PPCCTRLoopsVerify(); } #endif // NDEBUG bool PPCCTRLoops::runOnFunction(Function &F) { if (skipFunction(F)) return false; auto *TPC = getAnalysisIfAvailable(); if (!TPC) return false; TM = &TPC->getTM(); STI = TM->getSubtargetImpl(F); TLI = STI->getTargetLowering(); LI = &getAnalysis().getLoopInfo(); SE = &getAnalysis().getSE(); DT = &getAnalysis().getDomTree(); TTI = &getAnalysis().getTTI(F); DL = &F.getParent()->getDataLayout(); auto *TLIP = getAnalysisIfAvailable(); LibInfo = TLIP ? &TLIP->getTLI() : nullptr; PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); bool MadeChange = false; for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) { Loop *L = *I; if (!L->getParentLoop()) MadeChange |= convertToCTRLoop(L); } return MadeChange; } static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) { if (IntegerType *ITy = dyn_cast(Ty)) return ITy->getBitWidth() > (Is32Bit ? 32U : 64U); return false; } // Determining the address of a TLS variable results in a function call in // certain TLS models. static bool memAddrUsesCTR(const PPCTargetMachine &TM, const Value *MemAddr) { const auto *GV = dyn_cast(MemAddr); if (!GV) { // Recurse to check for constants that refer to TLS global variables. if (const auto *CV = dyn_cast(MemAddr)) for (const auto &CO : CV->operands()) if (memAddrUsesCTR(TM, CO)) return true; return false; } if (!GV->isThreadLocal()) return false; TLSModel::Model Model = TM.getTLSModel(GV); return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic; } // Loop through the inline asm constraints and look for something that clobbers // ctr. static bool asmClobbersCTR(InlineAsm *IA) { InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints(); for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) { InlineAsm::ConstraintInfo &C = CIV[i]; if (C.Type != InlineAsm::isInput) for (unsigned j = 0, je = C.Codes.size(); j < je; ++j) if (StringRef(C.Codes[j]).equals_lower("{ctr}")) return true; } return false; } bool PPCCTRLoops::mightUseCTR(BasicBlock *BB) { for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J) { if (CallInst *CI = dyn_cast(J)) { // Inline ASM is okay, unless it clobbers the ctr register. if (InlineAsm *IA = dyn_cast(CI->getCalledValue())) { if (asmClobbersCTR(IA)) return true; continue; } if (Function *F = CI->getCalledFunction()) { // Most intrinsics don't become function calls, but some might. // sin, cos, exp and log are always calls. unsigned Opcode = 0; if (F->getIntrinsicID() != Intrinsic::not_intrinsic) { switch (F->getIntrinsicID()) { default: continue; // If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr // we're definitely using CTR. case Intrinsic::ppc_is_decremented_ctr_nonzero: case Intrinsic::ppc_mtctr: return true; // VisualStudio defines setjmp as _setjmp #if defined(_MSC_VER) && defined(setjmp) && \ !defined(setjmp_undefined_for_msvc) # pragma push_macro("setjmp") # undef setjmp # define setjmp_undefined_for_msvc #endif case Intrinsic::setjmp: #if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc) // let's return it to _setjmp state # pragma pop_macro("setjmp") # undef setjmp_undefined_for_msvc #endif case Intrinsic::longjmp: // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp // because, although it does clobber the counter register, the // control can't then return to inside the loop unless there is also // an eh_sjlj_setjmp. case Intrinsic::eh_sjlj_setjmp: case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::memset: case Intrinsic::powi: case Intrinsic::log: case Intrinsic::log2: case Intrinsic::log10: case Intrinsic::exp: case Intrinsic::exp2: case Intrinsic::pow: case Intrinsic::sin: case Intrinsic::cos: return true; case Intrinsic::copysign: if (CI->getArgOperand(0)->getType()->getScalarType()-> isPPC_FP128Ty()) return true; else continue; // ISD::FCOPYSIGN is never a library call. case Intrinsic::sqrt: Opcode = ISD::FSQRT; break; case Intrinsic::floor: Opcode = ISD::FFLOOR; break; case Intrinsic::ceil: Opcode = ISD::FCEIL; break; case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; case Intrinsic::rint: Opcode = ISD::FRINT; break; case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; case Intrinsic::round: Opcode = ISD::FROUND; break; case Intrinsic::minnum: Opcode = ISD::FMINNUM; break; case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break; case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO; break; case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO; break; } } // PowerPC does not use [US]DIVREM or other library calls for // operations on regular types which are not otherwise library calls // (i.e. soft float or atomics). If adapting for targets that do, // additional care is required here. LibFunc Func; if (!F->hasLocalLinkage() && F->hasName() && LibInfo && LibInfo->getLibFunc(F->getName(), Func) && LibInfo->hasOptimizedCodeGen(Func)) { // Non-read-only functions are never treated as intrinsics. if (!CI->onlyReadsMemory()) return true; // Conversion happens only for FP calls. if (!CI->getArgOperand(0)->getType()->isFloatingPointTy()) return true; switch (Func) { default: return true; case LibFunc_copysign: case LibFunc_copysignf: continue; // ISD::FCOPYSIGN is never a library call. case LibFunc_copysignl: return true; case LibFunc_fabs: case LibFunc_fabsf: case LibFunc_fabsl: continue; // ISD::FABS is never a library call. case LibFunc_sqrt: case LibFunc_sqrtf: case LibFunc_sqrtl: Opcode = ISD::FSQRT; break; case LibFunc_floor: case LibFunc_floorf: case LibFunc_floorl: Opcode = ISD::FFLOOR; break; case LibFunc_nearbyint: case LibFunc_nearbyintf: case LibFunc_nearbyintl: Opcode = ISD::FNEARBYINT; break; case LibFunc_ceil: case LibFunc_ceilf: case LibFunc_ceill: Opcode = ISD::FCEIL; break; case LibFunc_rint: case LibFunc_rintf: case LibFunc_rintl: Opcode = ISD::FRINT; break; case LibFunc_round: case LibFunc_roundf: case LibFunc_roundl: Opcode = ISD::FROUND; break; case LibFunc_trunc: case LibFunc_truncf: case LibFunc_truncl: Opcode = ISD::FTRUNC; break; case LibFunc_fmin: case LibFunc_fminf: case LibFunc_fminl: Opcode = ISD::FMINNUM; break; case LibFunc_fmax: case LibFunc_fmaxf: case LibFunc_fmaxl: Opcode = ISD::FMAXNUM; break; } } if (Opcode) { MVT VTy = TLI->getSimpleValueType( *DL, CI->getArgOperand(0)->getType(), true); if (VTy == MVT::Other) return true; if (TLI->isOperationLegalOrCustom(Opcode, VTy)) continue; else if (VTy.isVector() && TLI->isOperationLegalOrCustom(Opcode, VTy.getScalarType())) continue; return true; } } return true; } else if (isa(J) && J->getType()->getScalarType()->isPPC_FP128Ty()) { // Most operations on ppc_f128 values become calls. return true; } else if (isa(J) || isa(J) || isa(J) || isa(J)) { CastInst *CI = cast(J); if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() || CI->getDestTy()->getScalarType()->isPPC_FP128Ty() || isLargeIntegerTy(!TM->isPPC64(), CI->getSrcTy()->getScalarType()) || isLargeIntegerTy(!TM->isPPC64(), CI->getDestTy()->getScalarType())) return true; } else if (isLargeIntegerTy(!TM->isPPC64(), J->getType()->getScalarType()) && (J->getOpcode() == Instruction::UDiv || J->getOpcode() == Instruction::SDiv || J->getOpcode() == Instruction::URem || J->getOpcode() == Instruction::SRem)) { return true; } else if (!TM->isPPC64() && isLargeIntegerTy(false, J->getType()->getScalarType()) && (J->getOpcode() == Instruction::Shl || J->getOpcode() == Instruction::AShr || J->getOpcode() == Instruction::LShr)) { // Only on PPC32, for 128-bit integers (specifically not 64-bit // integers), these might be runtime calls. return true; } else if (isa(J) || isa(J)) { // On PowerPC, indirect jumps use the counter register. return true; } else if (SwitchInst *SI = dyn_cast(J)) { if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries()) return true; } + // FREM is always a call. + if (J->getOpcode() == Instruction::FRem) + return true; + if (STI->useSoftFloat()) { switch(J->getOpcode()) { case Instruction::FAdd: case Instruction::FSub: case Instruction::FMul: case Instruction::FDiv: - case Instruction::FRem: case Instruction::FPTrunc: case Instruction::FPExt: case Instruction::FPToUI: case Instruction::FPToSI: case Instruction::UIToFP: case Instruction::SIToFP: case Instruction::FCmp: return true; } } for (Value *Operand : J->operands()) if (memAddrUsesCTR(*TM, Operand)) return true; } return false; } bool PPCCTRLoops::convertToCTRLoop(Loop *L) { bool MadeChange = false; // Do not convert small short loops to CTR loop. unsigned ConstTripCount = SE->getSmallConstantTripCount(L); if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) { SmallPtrSet EphValues; auto AC = getAnalysis().getAssumptionCache( *L->getHeader()->getParent()); CodeMetrics::collectEphemeralValues(L, &AC, EphValues); CodeMetrics Metrics; for (BasicBlock *BB : L->blocks()) Metrics.analyzeBasicBlock(BB, *TTI, EphValues); // 6 is an approximate latency for the mtctr instruction. if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth())) return false; } // Process nested loops first. for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) { MadeChange |= convertToCTRLoop(*I); DEBUG(dbgs() << "Nested loop converted\n"); } // If a nested loop has been converted, then we can't convert this loop. if (MadeChange) return MadeChange; #ifndef NDEBUG // Stop trying after reaching the limit (if any). int Limit = CTRLoopLimit; if (Limit >= 0) { if (Counter >= CTRLoopLimit) return false; Counter++; } #endif // We don't want to spill/restore the counter register, and so we don't // want to use the counter register if the loop contains calls. for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); I != IE; ++I) if (mightUseCTR(*I)) return MadeChange; SmallVector ExitingBlocks; L->getExitingBlocks(ExitingBlocks); BasicBlock *CountedExitBlock = nullptr; const SCEV *ExitCount = nullptr; BranchInst *CountedExitBranch = nullptr; for (SmallVectorImpl::iterator I = ExitingBlocks.begin(), IE = ExitingBlocks.end(); I != IE; ++I) { const SCEV *EC = SE->getExitCount(L, *I); DEBUG(dbgs() << "Exit Count for " << *L << " from block " << (*I)->getName() << ": " << *EC << "\n"); if (isa(EC)) continue; if (const SCEVConstant *ConstEC = dyn_cast(EC)) { if (ConstEC->getValue()->isZero()) continue; } else if (!SE->isLoopInvariant(EC, L)) continue; if (SE->getTypeSizeInBits(EC->getType()) > (TM->isPPC64() ? 64 : 32)) continue; // We now have a loop-invariant count of loop iterations (which is not the // constant zero) for which we know that this loop will not exit via this // exisiting block. // We need to make sure that this block will run on every loop iteration. // For this to be true, we must dominate all blocks with backedges. Such // blocks are in-loop predecessors to the header block. bool NotAlways = false; for (pred_iterator PI = pred_begin(L->getHeader()), PIE = pred_end(L->getHeader()); PI != PIE; ++PI) { if (!L->contains(*PI)) continue; if (!DT->dominates(*I, *PI)) { NotAlways = true; break; } } if (NotAlways) continue; // Make sure this blocks ends with a conditional branch. Instruction *TI = (*I)->getTerminator(); if (!TI) continue; if (BranchInst *BI = dyn_cast(TI)) { if (!BI->isConditional()) continue; CountedExitBranch = BI; } else continue; // Note that this block may not be the loop latch block, even if the loop // has a latch block. CountedExitBlock = *I; ExitCount = EC; break; } if (!CountedExitBlock) return MadeChange; BasicBlock *Preheader = L->getLoopPreheader(); // If we don't have a preheader, then insert one. If we already have a // preheader, then we can use it (except if the preheader contains a use of // the CTR register because some such uses might be reordered by the // selection DAG after the mtctr instruction). if (!Preheader || mightUseCTR(Preheader)) Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA); if (!Preheader) return MadeChange; DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName() << "\n"); // Insert the count into the preheader and replace the condition used by the // selected branch. MadeChange = true; SCEVExpander SCEVE(*SE, *DL, "loopcnt"); LLVMContext &C = SE->getContext(); Type *CountType = TM->isPPC64() ? Type::getInt64Ty(C) : Type::getInt32Ty(C); if (!ExitCount->getType()->isPointerTy() && ExitCount->getType() != CountType) ExitCount = SE->getZeroExtendExpr(ExitCount, CountType); ExitCount = SE->getAddExpr(ExitCount, SE->getOne(CountType)); Value *ECValue = SCEVE.expandCodeFor(ExitCount, CountType, Preheader->getTerminator()); IRBuilder<> CountBuilder(Preheader->getTerminator()); Module *M = Preheader->getParent()->getParent(); Value *MTCTRFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr, CountType); CountBuilder.CreateCall(MTCTRFunc, ECValue); IRBuilder<> CondBuilder(CountedExitBranch); Value *DecFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero); Value *NewCond = CondBuilder.CreateCall(DecFunc, {}); Value *OldCond = CountedExitBranch->getCondition(); CountedExitBranch->setCondition(NewCond); // The false branch must exit the loop. if (!L->contains(CountedExitBranch->getSuccessor(0))) CountedExitBranch->swapSuccessors(); // The old condition may be dead now, and may have even created a dead PHI // (the original induction variable). RecursivelyDeleteTriviallyDeadInstructions(OldCond); // Run through the basic blocks of the loop and see if any of them have dead // PHIs that can be removed. for (auto I : L->blocks()) DeleteDeadPHIs(I); ++NumCTRLoops; return MadeChange; } #ifndef NDEBUG static bool clobbersCTR(const MachineInstr &MI) { for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI.getOperand(i); if (MO.isReg()) { if (MO.isDef() && (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8)) return true; } else if (MO.isRegMask()) { if (MO.clobbersPhysReg(PPC::CTR) || MO.clobbersPhysReg(PPC::CTR8)) return true; } } return false; } static bool verifyCTRBranch(MachineBasicBlock *MBB, MachineBasicBlock::iterator I) { MachineBasicBlock::iterator BI = I; SmallSet Visited; SmallVector Preds; bool CheckPreds; if (I == MBB->begin()) { Visited.insert(MBB); goto queue_preds; } else --I; check_block: Visited.insert(MBB); if (I == MBB->end()) goto queue_preds; CheckPreds = true; for (MachineBasicBlock::iterator IE = MBB->begin();; --I) { unsigned Opc = I->getOpcode(); if (Opc == PPC::MTCTRloop || Opc == PPC::MTCTR8loop) { CheckPreds = false; break; } if (I != BI && clobbersCTR(*I)) { DEBUG(dbgs() << printMBBReference(*MBB) << " (" << MBB->getFullName() << ") instruction " << *I << " clobbers CTR, invalidating " << printMBBReference(*BI->getParent()) << " (" << BI->getParent()->getFullName() << ") instruction " << *BI << "\n"); return false; } if (I == IE) break; } if (!CheckPreds && Preds.empty()) return true; if (CheckPreds) { queue_preds: if (MachineFunction::iterator(MBB) == MBB->getParent()->begin()) { DEBUG(dbgs() << "Unable to find a MTCTR instruction for " << printMBBReference(*BI->getParent()) << " (" << BI->getParent()->getFullName() << ") instruction " << *BI << "\n"); return false; } for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), PIE = MBB->pred_end(); PI != PIE; ++PI) Preds.push_back(*PI); } do { MBB = Preds.pop_back_val(); if (!Visited.count(MBB)) { I = MBB->getLastNonDebugInstr(); goto check_block; } } while (!Preds.empty()); return true; } bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) { MDT = &getAnalysis(); // Verify that all bdnz/bdz instructions are dominated by a loop mtctr before // any other instructions that might clobber the ctr register. for (MachineFunction::iterator I = MF.begin(), IE = MF.end(); I != IE; ++I) { MachineBasicBlock *MBB = &*I; if (!MDT->isReachableFromEntry(MBB)) continue; for (MachineBasicBlock::iterator MII = MBB->getFirstTerminator(), MIIE = MBB->end(); MII != MIIE; ++MII) { unsigned Opc = MII->getOpcode(); if (Opc == PPC::BDNZ8 || Opc == PPC::BDNZ || Opc == PPC::BDZ8 || Opc == PPC::BDZ) if (!verifyCTRBranch(MBB, MII)) llvm_unreachable("Invalid PPC CTR loop!"); } } return false; } #endif // NDEBUG Index: head/contrib/llvm/lib/Target/X86/X86.td =================================================================== --- head/contrib/llvm/lib/Target/X86/X86.td (revision 329982) +++ head/contrib/llvm/lib/Target/X86/X86.td (revision 329983) @@ -1,1069 +1,1075 @@ //===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This is a target description file for the Intel i386 architecture, referred // to here as the "X86" architecture. // //===----------------------------------------------------------------------===// // Get the target-independent interfaces which we are implementing... // include "llvm/Target/Target.td" //===----------------------------------------------------------------------===// // X86 Subtarget state // def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true", "64-bit mode (x86_64)">; def Mode32Bit : SubtargetFeature<"32bit-mode", "In32BitMode", "true", "32-bit mode (80386)">; def Mode16Bit : SubtargetFeature<"16bit-mode", "In16BitMode", "true", "16-bit mode (i8086)">; //===----------------------------------------------------------------------===// // X86 Subtarget features //===----------------------------------------------------------------------===// def FeatureX87 : SubtargetFeature<"x87","HasX87", "true", "Enable X87 float instructions">; def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true", "Enable conditional move instructions">; def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true", "Support POPCNT instruction">; def FeatureFXSR : SubtargetFeature<"fxsr", "HasFXSR", "true", "Support fxsave/fxrestore instructions">; def FeatureXSAVE : SubtargetFeature<"xsave", "HasXSAVE", "true", "Support xsave instructions">; def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true", "Support xsaveopt instructions">; def FeatureXSAVEC : SubtargetFeature<"xsavec", "HasXSAVEC", "true", "Support xsavec instructions">; def FeatureXSAVES : SubtargetFeature<"xsaves", "HasXSAVES", "true", "Support xsaves instructions">; def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1", "Enable SSE instructions", // SSE codegen depends on cmovs, and all // SSE1+ processors support them. [FeatureCMOV]>; def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2", "Enable SSE2 instructions", [FeatureSSE1]>; def FeatureSSE3 : SubtargetFeature<"sse3", "X86SSELevel", "SSE3", "Enable SSE3 instructions", [FeatureSSE2]>; def FeatureSSSE3 : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3", "Enable SSSE3 instructions", [FeatureSSE3]>; def FeatureSSE41 : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41", "Enable SSE 4.1 instructions", [FeatureSSSE3]>; def FeatureSSE42 : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42", "Enable SSE 4.2 instructions", [FeatureSSE41]>; // The MMX subtarget feature is separate from the rest of the SSE features // because it's important (for odd compatibility reasons) to be able to // turn it off explicitly while allowing SSE+ to be on. def FeatureMMX : SubtargetFeature<"mmx","X863DNowLevel", "MMX", "Enable MMX instructions">; def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow", "Enable 3DNow! instructions", [FeatureMMX]>; def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA", "Enable 3DNow! Athlon instructions", [Feature3DNow]>; // All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied // feature, because SSE2 can be disabled (e.g. for compiling OS kernels) // without disabling 64-bit mode. def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true", "Support 64-bit instructions", [FeatureCMOV]>; def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true", "64-bit with cmpxchg16b", [Feature64Bit]>; def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true", "SHLD instruction is slow">; def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true", "PMULLD instruction is slow">; // FIXME: This should not apply to CPUs that do not have SSE. def FeatureSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16", "IsUAMem16Slow", "true", "Slow unaligned 16-byte memory access">; def FeatureSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32", "IsUAMem32Slow", "true", "Slow unaligned 32-byte memory access">; def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true", "Support SSE 4a instructions", [FeatureSSE3]>; def FeatureAVX : SubtargetFeature<"avx", "X86SSELevel", "AVX", "Enable AVX instructions", [FeatureSSE42]>; def FeatureAVX2 : SubtargetFeature<"avx2", "X86SSELevel", "AVX2", "Enable AVX2 instructions", [FeatureAVX]>; def FeatureFMA : SubtargetFeature<"fma", "HasFMA", "true", "Enable three-operand fused multiple-add", [FeatureAVX]>; def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true", "Support 16-bit floating point conversion instructions", [FeatureAVX]>; def FeatureAVX512 : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512F", "Enable AVX-512 instructions", [FeatureAVX2, FeatureFMA, FeatureF16C]>; def FeatureERI : SubtargetFeature<"avx512er", "HasERI", "true", "Enable AVX-512 Exponential and Reciprocal Instructions", [FeatureAVX512]>; def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true", "Enable AVX-512 Conflict Detection Instructions", [FeatureAVX512]>; def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ", "true", "Enable AVX-512 Population Count Instructions", [FeatureAVX512]>; def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true", "Enable AVX-512 PreFetch Instructions", [FeatureAVX512]>; def FeaturePREFETCHWT1 : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1", "true", "Prefetch with Intent to Write and T1 Hint">; def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true", "Enable AVX-512 Doubleword and Quadword Instructions", [FeatureAVX512]>; def FeatureBWI : SubtargetFeature<"avx512bw", "HasBWI", "true", "Enable AVX-512 Byte and Word Instructions", [FeatureAVX512]>; def FeatureVLX : SubtargetFeature<"avx512vl", "HasVLX", "true", "Enable AVX-512 Vector Length eXtensions", [FeatureAVX512]>; def FeatureVBMI : SubtargetFeature<"avx512vbmi", "HasVBMI", "true", "Enable AVX-512 Vector Byte Manipulation Instructions", [FeatureBWI]>; def FeatureVBMI2 : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true", "Enable AVX-512 further Vector Byte Manipulation Instructions", [FeatureBWI]>; def FeatureIFMA : SubtargetFeature<"avx512ifma", "HasIFMA", "true", "Enable AVX-512 Integer Fused Multiple-Add", [FeatureAVX512]>; def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true", "Enable protection keys">; def FeatureVNNI : SubtargetFeature<"avx512vnni", "HasVNNI", "true", "Enable AVX-512 Vector Neural Network Instructions", [FeatureAVX512]>; def FeatureBITALG : SubtargetFeature<"avx512bitalg", "HasBITALG", "true", "Enable AVX-512 Bit Algorithms", [FeatureBWI]>; def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true", "Enable packed carry-less multiplication instructions", [FeatureSSE2]>; def FeatureGFNI : SubtargetFeature<"gfni", "HasGFNI", "true", "Enable Galois Field Arithmetic Instructions", [FeatureSSE2]>; def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true", "Enable vpclmulqdq instructions", [FeatureAVX, FeaturePCLMUL]>; def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true", "Enable four-operand fused multiple-add", [FeatureAVX, FeatureSSE4A]>; def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true", "Enable XOP instructions", [FeatureFMA4]>; def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem", "HasSSEUnalignedMem", "true", "Allow unaligned memory operands with SSE instructions">; def FeatureAES : SubtargetFeature<"aes", "HasAES", "true", "Enable AES instructions", [FeatureSSE2]>; def FeatureVAES : SubtargetFeature<"vaes", "HasVAES", "true", "Promote selected AES instructions to AVX512/AVX registers", [FeatureAVX, FeatureAES]>; def FeatureTBM : SubtargetFeature<"tbm", "HasTBM", "true", "Enable TBM instructions">; def FeatureLWP : SubtargetFeature<"lwp", "HasLWP", "true", "Enable LWP instructions">; def FeatureMOVBE : SubtargetFeature<"movbe", "HasMOVBE", "true", "Support MOVBE instruction">; def FeatureRDRAND : SubtargetFeature<"rdrnd", "HasRDRAND", "true", "Support RDRAND instruction">; def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true", "Support FS/GS Base instructions">; def FeatureLZCNT : SubtargetFeature<"lzcnt", "HasLZCNT", "true", "Support LZCNT instruction">; def FeatureBMI : SubtargetFeature<"bmi", "HasBMI", "true", "Support BMI instructions">; def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true", "Support BMI2 instructions">; def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true", "Support RTM instructions">; def FeatureADX : SubtargetFeature<"adx", "HasADX", "true", "Support ADX instructions">; def FeatureSHA : SubtargetFeature<"sha", "HasSHA", "true", "Enable SHA instructions", [FeatureSSE2]>; def FeatureSHSTK : SubtargetFeature<"shstk", "HasSHSTK", "true", "Support CET Shadow-Stack instructions">; def FeatureIBT : SubtargetFeature<"ibt", "HasIBT", "true", "Support CET Indirect-Branch-Tracking instructions">; def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true", "Support PRFCHW instructions">; def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true", "Support RDSEED instruction">; def FeatureLAHFSAHF : SubtargetFeature<"sahf", "HasLAHFSAHF", "true", "Support LAHF and SAHF instructions">; def FeatureMWAITX : SubtargetFeature<"mwaitx", "HasMWAITX", "true", "Enable MONITORX/MWAITX timer functionality">; def FeatureCLZERO : SubtargetFeature<"clzero", "HasCLZERO", "true", "Enable Cache Line Zero">; def FeatureMPX : SubtargetFeature<"mpx", "HasMPX", "true", "Support MPX instructions">; def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", "Use LEA for adjusting the stack pointer">; def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb", "HasSlowDivide32", "true", "Use 8-bit divide for positive values less than 256">; def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divl", "HasSlowDivide64", "true", "Use 32-bit divide for positive values less than 2^32">; def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions", "PadShortFunctions", "true", "Pad short functions">; def FeatureSGX : SubtargetFeature<"sgx", "HasSGX", "true", "Enable Software Guard Extensions">; def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true", "Flush A Cache Line Optimized">; def FeatureCLWB : SubtargetFeature<"clwb", "HasCLWB", "true", "Cache Line Write Back">; // On some processors, instructions that implicitly take two memory operands are // slow. In practice, this means that CALL, PUSH, and POP with memory operands // should be avoided in favor of a MOV + register CALL/PUSH/POP. def FeatureSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops", "SlowTwoMemOps", "true", "Two memory operand instructions are slow">; def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true", "LEA instruction needs inputs at AG stage">; def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true", "LEA instruction with certain arguments is slow">; def FeatureSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true", "LEA instruction with 3 ops or certain registers is slow">; def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true", "INC and DEC instructions are slower than ADD and SUB">; def FeatureSoftFloat : SubtargetFeature<"soft-float", "UseSoftFloat", "true", "Use software floating point features.">; // On recent X86 (port bound) processors, its preferable to combine to a single shuffle // using a variable mask over multiple fixed shuffles. def FeatureFastVariableShuffle : SubtargetFeature<"fast-variable-shuffle", "HasFastVariableShuffle", "true", "Shuffles with variable masks are fast">; // On some X86 processors, there is no performance hazard to writing only the // lower parts of a YMM or ZMM register without clearing the upper part. def FeatureFastPartialYMMorZMMWrite : SubtargetFeature<"fast-partial-ymm-or-zmm-write", "HasFastPartialYMMorZMMWrite", "true", "Partial writes to YMM/ZMM registers are fast">; // FeatureFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency // than the corresponding NR code. FeatureFastVectorFSQRT should be enabled if // vector FSQRT has higher throughput than the corresponding NR code. // The idea is that throughput bound code is likely to be vectorized, so for // vectorized code we should care about the throughput of SQRT operations. // But if the code is scalar that probably means that the code has some kind of // dependency and we should care more about reducing the latency. def FeatureFastScalarFSQRT : SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT", "true", "Scalar SQRT is fast (disable Newton-Raphson)">; def FeatureFastVectorFSQRT : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT", "true", "Vector SQRT is fast (disable Newton-Raphson)">; // If lzcnt has equivalent latency/throughput to most simple integer ops, it can // be used to replace test/set sequences. def FeatureFastLZCNT : SubtargetFeature< "fast-lzcnt", "HasFastLZCNT", "true", "LZCNT instructions are as fast as most simple integer ops">; // Sandy Bridge and newer processors can use SHLD with the same source on both // inputs to implement rotate to avoid the partial flag update of the normal // rotate instructions. def FeatureFastSHLDRotate : SubtargetFeature< "fast-shld-rotate", "HasFastSHLDRotate", "true", "SHLD can be used as a faster rotate">; // Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka // "string operations"). See "REP String Enhancement" in the Intel Software // Development Manual. This feature essentially means that REP MOVSB will copy // using the largest available size instead of copying bytes one by one, making // it at least as fast as REPMOVS{W,D,Q}. def FeatureERMSB : SubtargetFeature< "ermsb", "HasERMSB", "true", "REP MOVS/STOS are fast">; // Sandy Bridge and newer processors have many instructions that can be // fused with conditional branches and pass through the CPU as a single // operation. def FeatureMacroFusion : SubtargetFeature<"macrofusion", "HasMacroFusion", "true", "Various instructions can be fused with conditional branches">; // Gather is available since Haswell (AVX2 set). So technically, we can // generate Gathers on all AVX2 processors. But the overhead on HSW is high. // Skylake Client processor has faster Gathers than HSW and performance is // similar to Skylake Server (AVX-512). def FeatureHasFastGather : SubtargetFeature<"fast-gather", "HasFastGather", "true", "Indicates if gather is reasonably fast.">; // Enable mitigation of some aspects of speculative execution related // vulnerabilities by removing speculatable indirect branches. This disables // jump-table formation, rewrites explicit `indirectbr` instructions into // `switch` instructions, and uses a special construct called a "retpoline" to // prevent speculation of the remaining indirect branches (indirect calls and // tail calls). def FeatureRetpoline : SubtargetFeature<"retpoline", "UseRetpoline", "true", "Remove speculation of indirect branches from the " "generated code, either by avoiding them entirely or " "lowering them with a speculation blocking construct.">; // Rely on external thunks for the emitted retpoline calls. This allows users // to provide their own custom thunk definitions in highly specialized // environments such as a kernel that does boot-time hot patching. def FeatureRetpolineExternalThunk : SubtargetFeature< "retpoline-external-thunk", "UseRetpolineExternalThunk", "true", "Enable retpoline, but with an externally provided thunk.", [FeatureRetpoline]>; //===----------------------------------------------------------------------===// // Register File Description //===----------------------------------------------------------------------===// include "X86RegisterInfo.td" include "X86RegisterBanks.td" //===----------------------------------------------------------------------===// // Instruction Descriptions //===----------------------------------------------------------------------===// include "X86Schedule.td" include "X86InstrInfo.td" def X86InstrInfo : InstrInfo; //===----------------------------------------------------------------------===// // X86 processors supported. //===----------------------------------------------------------------------===// include "X86ScheduleAtom.td" include "X86SchedSandyBridge.td" include "X86SchedHaswell.td" include "X86SchedBroadwell.td" include "X86ScheduleSLM.td" include "X86ScheduleZnver1.td" include "X86ScheduleBtVer2.td" include "X86SchedSkylakeClient.td" include "X86SchedSkylakeServer.td" def ProcIntelAtom : SubtargetFeature<"atom", "X86ProcFamily", "IntelAtom", "Intel Atom processors">; def ProcIntelSLM : SubtargetFeature<"slm", "X86ProcFamily", "IntelSLM", "Intel Silvermont processors">; def ProcIntelGLM : SubtargetFeature<"glm", "X86ProcFamily", "IntelGLM", "Intel Goldmont processors">; def ProcIntelHSW : SubtargetFeature<"haswell", "X86ProcFamily", "IntelHaswell", "Intel Haswell processors">; def ProcIntelBDW : SubtargetFeature<"broadwell", "X86ProcFamily", "IntelBroadwell", "Intel Broadwell processors">; def ProcIntelSKL : SubtargetFeature<"skylake", "X86ProcFamily", "IntelSkylake", "Intel Skylake processors">; def ProcIntelKNL : SubtargetFeature<"knl", "X86ProcFamily", "IntelKNL", "Intel Knights Landing processors">; def ProcIntelSKX : SubtargetFeature<"skx", "X86ProcFamily", "IntelSKX", "Intel Skylake Server processors">; def ProcIntelCNL : SubtargetFeature<"cannonlake", "X86ProcFamily", "IntelCannonlake", "Intel Cannonlake processors">; def ProcIntelICL : SubtargetFeature<"icelake", "X86ProcFamily", "IntelIcelake", "Intel Icelake processors">; class Proc Features> : ProcessorModel; def : Proc<"generic", [FeatureX87, FeatureSlowUAMem16]>; def : Proc<"i386", [FeatureX87, FeatureSlowUAMem16]>; def : Proc<"i486", [FeatureX87, FeatureSlowUAMem16]>; def : Proc<"i586", [FeatureX87, FeatureSlowUAMem16]>; def : Proc<"pentium", [FeatureX87, FeatureSlowUAMem16]>; def : Proc<"pentium-mmx", [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>; foreach P = ["i686", "pentiumpro"] in { def : Proc; } def : Proc<"pentium2", [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureCMOV, FeatureFXSR]>; foreach P = ["pentium3", "pentium3m"] in { def : Proc; } // Enable the PostRAScheduler for SSE2 and SSE3 class cpus. // The intent is to enable it for pentium4 which is the current default // processor in a vanilla 32-bit clang compilation when no specific // architecture is specified. This generally gives a nice performance // increase on silvermont, with largely neutral behavior on other // contemporary large core processors. // pentium-m, pentium4m, prescott and nocona are included as a preventative // measure to avoid performance surprises, in case clang's default cpu // changes slightly. def : ProcessorModel<"pentium-m", GenericPostRAModel, [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE2, FeatureFXSR]>; foreach P = ["pentium4", "pentium4m"] in { def : ProcessorModel; } // Intel Quark. def : Proc<"lakemont", []>; // Intel Core Duo. def : ProcessorModel<"yonah", SandyBridgeModel, [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, FeatureFXSR]>; // NetBurst. def : ProcessorModel<"prescott", GenericPostRAModel, [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, FeatureFXSR]>; def : ProcessorModel<"nocona", GenericPostRAModel, [ FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, FeatureFXSR, FeatureCMPXCHG16B ]>; // Intel Core 2 Solo/Duo. def : ProcessorModel<"core2", SandyBridgeModel, [ FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSSE3, FeatureFXSR, FeatureCMPXCHG16B, FeatureLAHFSAHF, FeatureMacroFusion ]>; def : ProcessorModel<"penryn", SandyBridgeModel, [ FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE41, FeatureFXSR, FeatureCMPXCHG16B, FeatureLAHFSAHF, FeatureMacroFusion ]>; // Atom CPUs. class BonnellProc : ProcessorModel; def : BonnellProc<"bonnell">; def : BonnellProc<"atom">; // Pin the generic name to the baseline. class SilvermontProc : ProcessorModel; def : SilvermontProc<"silvermont">; def : SilvermontProc<"slm">; // Legacy alias. class GoldmontProc : ProcessorModel; def : GoldmontProc<"goldmont">; // "Arrandale" along with corei3 and corei5 class NehalemProc : ProcessorModel; def : NehalemProc<"nehalem">; def : NehalemProc<"corei7">; // Westmere is a similar machine to nehalem with some additional features. // Westmere is the corei3/i5/i7 path from nehalem to sandybridge class WestmereProc : ProcessorModel; def : WestmereProc<"westmere">; class ProcessorFeatures Inherited, list NewFeatures> { list Value = !listconcat(Inherited, NewFeatures); } class ProcModel ProcFeatures, list OtherFeatures> : ProcessorModel; // SSE is not listed here since llvm treats AVX as a reimplementation of SSE, // rather than a superset. def SNBFeatures : ProcessorFeatures<[], [ FeatureX87, FeatureMMX, FeatureAVX, FeatureFXSR, FeatureCMPXCHG16B, FeaturePOPCNT, FeatureAES, FeatureSlowDivide64, FeaturePCLMUL, FeatureXSAVE, FeatureXSAVEOPT, FeatureLAHFSAHF, FeatureSlow3OpsLEA, FeatureFastScalarFSQRT, FeatureFastSHLDRotate, FeatureSlowIncDec, FeatureMacroFusion ]>; class SandyBridgeProc : ProcModel; def : SandyBridgeProc<"sandybridge">; def : SandyBridgeProc<"corei7-avx">; // Legacy alias. def IVBFeatures : ProcessorFeatures; class IvyBridgeProc : ProcModel; def : IvyBridgeProc<"ivybridge">; def : IvyBridgeProc<"core-avx-i">; // Legacy alias. def HSWFeatures : ProcessorFeatures; class HaswellProc : ProcModel; def : HaswellProc<"haswell">; def : HaswellProc<"core-avx2">; // Legacy alias. def BDWFeatures : ProcessorFeatures; class BroadwellProc : ProcModel; def : BroadwellProc<"broadwell">; def SKLFeatures : ProcessorFeatures; class SkylakeClientProc : ProcModel; def : SkylakeClientProc<"skylake">; def KNLFeatures : ProcessorFeatures; // FIXME: define KNL model class KnightsLandingProc : ProcModel; def : KnightsLandingProc<"knl">; class KnightsMillProc : ProcModel; def : KnightsMillProc<"knm">; // TODO Add AVX5124FMAPS/AVX5124VNNIW features def SKXFeatures : ProcessorFeatures; class SkylakeServerProc : ProcModel; def : SkylakeServerProc<"skylake-avx512">; def : SkylakeServerProc<"skx">; // Legacy alias. -def CNLFeatures : ProcessorFeatures; class CannonlakeProc : ProcModel; def : CannonlakeProc<"cannonlake">; def ICLFeatures : ProcessorFeatures; class IcelakeProc : ProcModel; def : IcelakeProc<"icelake">; // AMD CPUs. def : Proc<"k6", [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>; def : Proc<"k6-2", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>; def : Proc<"k6-3", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>; foreach P = ["athlon", "athlon-tbird"] in { def : Proc; } foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in { def : Proc; } foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in { def : Proc; } foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in { def : Proc; } foreach P = ["amdfam10", "barcelona"] in { def : Proc; } // Bobcat def : Proc<"btver1", [ FeatureX87, FeatureMMX, FeatureSSSE3, FeatureSSE4A, FeatureFXSR, FeatureCMPXCHG16B, FeaturePRFCHW, FeatureLZCNT, FeaturePOPCNT, FeatureSlowSHLD, FeatureLAHFSAHF ]>; // Jaguar def : ProcessorModel<"btver2", BtVer2Model, [ FeatureX87, FeatureMMX, FeatureAVX, FeatureFXSR, FeatureSSE4A, FeatureCMPXCHG16B, FeaturePRFCHW, FeatureAES, FeaturePCLMUL, FeatureBMI, FeatureF16C, FeatureMOVBE, FeatureLZCNT, FeatureFastLZCNT, FeaturePOPCNT, FeatureXSAVE, FeatureXSAVEOPT, FeatureSlowSHLD, FeatureLAHFSAHF, FeatureFastPartialYMMorZMMWrite ]>; // Bulldozer def : Proc<"bdver1", [ FeatureX87, FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, FeatureAES, FeaturePRFCHW, FeaturePCLMUL, FeatureMMX, FeatureAVX, FeatureFXSR, FeatureSSE4A, FeatureLZCNT, FeaturePOPCNT, FeatureXSAVE, FeatureLWP, FeatureSlowSHLD, FeatureLAHFSAHF, FeatureMacroFusion ]>; // Piledriver def : Proc<"bdver2", [ FeatureX87, FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, FeatureAES, FeaturePRFCHW, FeaturePCLMUL, FeatureMMX, FeatureAVX, FeatureFXSR, FeatureSSE4A, FeatureF16C, FeatureLZCNT, FeaturePOPCNT, FeatureXSAVE, FeatureBMI, FeatureTBM, FeatureLWP, FeatureFMA, FeatureSlowSHLD, FeatureLAHFSAHF, FeatureMacroFusion ]>; // Steamroller def : Proc<"bdver3", [ FeatureX87, FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, FeatureAES, FeaturePRFCHW, FeaturePCLMUL, FeatureMMX, FeatureAVX, FeatureFXSR, FeatureSSE4A, FeatureF16C, FeatureLZCNT, FeaturePOPCNT, FeatureXSAVE, FeatureBMI, FeatureTBM, FeatureLWP, FeatureFMA, FeatureXSAVEOPT, FeatureSlowSHLD, FeatureFSGSBase, FeatureLAHFSAHF, FeatureMacroFusion ]>; // Excavator def : Proc<"bdver4", [ FeatureX87, FeatureMMX, FeatureAVX2, FeatureFXSR, FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, FeatureAES, FeaturePRFCHW, FeaturePCLMUL, FeatureF16C, FeatureLZCNT, FeaturePOPCNT, FeatureXSAVE, FeatureBMI, FeatureBMI2, FeatureTBM, FeatureLWP, FeatureFMA, FeatureXSAVEOPT, FeatureSlowSHLD, FeatureFSGSBase, FeatureLAHFSAHF, FeatureMWAITX, FeatureMacroFusion ]>; // Znver1 def: ProcessorModel<"znver1", Znver1Model, [ FeatureADX, FeatureAES, FeatureAVX2, FeatureBMI, FeatureBMI2, FeatureCLFLUSHOPT, FeatureCLZERO, FeatureCMPXCHG16B, FeatureF16C, FeatureFMA, FeatureFSGSBase, FeatureFXSR, FeatureFastLZCNT, FeatureLAHFSAHF, FeatureLZCNT, FeatureMacroFusion, FeatureMMX, FeatureMOVBE, FeatureMWAITX, FeaturePCLMUL, FeaturePOPCNT, FeaturePRFCHW, FeatureRDRAND, FeatureRDSEED, FeatureSHA, FeatureSSE4A, FeatureSlowSHLD, FeatureX87, FeatureXSAVE, FeatureXSAVEC, FeatureXSAVEOPT, FeatureXSAVES]>; def : Proc<"geode", [FeatureX87, FeatureSlowUAMem16, Feature3DNowA]>; def : Proc<"winchip-c6", [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>; def : Proc<"winchip2", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>; def : Proc<"c3", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>; def : Proc<"c3-2", [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE1, FeatureFXSR]>; // We also provide a generic 64-bit specific x86 processor model which tries to // be good for modern chips without enabling instruction set encodings past the // basic SSE2 and 64-bit ones. It disables slow things from any mainstream and // modern 64-bit x86 chip, and enables features that are generally beneficial. // // We currently use the Sandy Bridge model as the default scheduling model as // we use it across Nehalem, Westmere, Sandy Bridge, and Ivy Bridge which // covers a huge swath of x86 processors. If there are specific scheduling // knobs which need to be tuned differently for AMD chips, we might consider // forming a common base for them. def : ProcessorModel<"x86-64", SandyBridgeModel, [ FeatureX87, FeatureMMX, FeatureSSE2, FeatureFXSR, Feature64Bit, FeatureSlow3OpsLEA, FeatureSlowIncDec, FeatureMacroFusion ]>; //===----------------------------------------------------------------------===// // Calling Conventions //===----------------------------------------------------------------------===// include "X86CallingConv.td" //===----------------------------------------------------------------------===// // Assembly Parser //===----------------------------------------------------------------------===// def ATTAsmParserVariant : AsmParserVariant { int Variant = 0; // Variant name. string Name = "att"; // Discard comments in assembly strings. string CommentDelimiter = "#"; // Recognize hard coded registers. string RegisterPrefix = "%"; } def IntelAsmParserVariant : AsmParserVariant { int Variant = 1; // Variant name. string Name = "intel"; // Discard comments in assembly strings. string CommentDelimiter = ";"; // Recognize hard coded registers. string RegisterPrefix = ""; } //===----------------------------------------------------------------------===// // Assembly Printers //===----------------------------------------------------------------------===// // The X86 target supports two different syntaxes for emitting machine code. // This is controlled by the -x86-asm-syntax={att|intel} def ATTAsmWriter : AsmWriter { string AsmWriterClassName = "ATTInstPrinter"; int Variant = 0; } def IntelAsmWriter : AsmWriter { string AsmWriterClassName = "IntelInstPrinter"; int Variant = 1; } def X86 : Target { // Information about the instructions... let InstructionSet = X86InstrInfo; let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant]; let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter]; } Index: head/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp =================================================================== --- head/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp (revision 329982) +++ head/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp (revision 329983) @@ -1,1744 +1,1759 @@ //===- InstCombineSelect.cpp ----------------------------------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file implements the visitSelect function. // //===----------------------------------------------------------------------===// #include "InstCombineInternal.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CmpInstAnalysis.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Transforms/InstCombine/InstCombineWorklist.h" #include #include using namespace llvm; using namespace PatternMatch; #define DEBUG_TYPE "instcombine" static SelectPatternFlavor getInverseMinMaxSelectPattern(SelectPatternFlavor SPF) { switch (SPF) { default: llvm_unreachable("unhandled!"); case SPF_SMIN: return SPF_SMAX; case SPF_UMIN: return SPF_UMAX; case SPF_SMAX: return SPF_SMIN; case SPF_UMAX: return SPF_UMIN; } } static CmpInst::Predicate getCmpPredicateForMinMax(SelectPatternFlavor SPF, bool Ordered=false) { switch (SPF) { default: llvm_unreachable("unhandled!"); case SPF_SMIN: return ICmpInst::ICMP_SLT; case SPF_UMIN: return ICmpInst::ICMP_ULT; case SPF_SMAX: return ICmpInst::ICMP_SGT; case SPF_UMAX: return ICmpInst::ICMP_UGT; case SPF_FMINNUM: return Ordered ? FCmpInst::FCMP_OLT : FCmpInst::FCMP_ULT; case SPF_FMAXNUM: return Ordered ? FCmpInst::FCMP_OGT : FCmpInst::FCMP_UGT; } } static Value *generateMinMaxSelectPattern(InstCombiner::BuilderTy &Builder, SelectPatternFlavor SPF, Value *A, Value *B) { CmpInst::Predicate Pred = getCmpPredicateForMinMax(SPF); assert(CmpInst::isIntPredicate(Pred)); return Builder.CreateSelect(Builder.CreateICmp(Pred, A, B), A, B); } /// If one of the constants is zero (we know they can't both be) and we have an /// icmp instruction with zero, and we have an 'and' with the non-constant value /// and a power of two we can turn the select into a shift on the result of the /// 'and'. /// This folds: /// select (icmp eq (and X, C1)), C2, C3 /// iff C1 is a power 2 and the difference between C2 and C3 is a power of 2. /// To something like: /// (shr (and (X, C1)), (log2(C1) - log2(C2-C3))) + C3 /// Or: /// (shl (and (X, C1)), (log2(C2-C3) - log2(C1))) + C3 /// With some variations depending if C3 is larger than C2, or the shift /// isn't needed, or the bit widths don't match. static Value *foldSelectICmpAnd(Type *SelType, const ICmpInst *IC, APInt TrueVal, APInt FalseVal, InstCombiner::BuilderTy &Builder) { assert(SelType->isIntOrIntVectorTy() && "Not an integer select?"); // If this is a vector select, we need a vector compare. if (SelType->isVectorTy() != IC->getType()->isVectorTy()) return nullptr; Value *V; APInt AndMask; bool CreateAnd = false; ICmpInst::Predicate Pred = IC->getPredicate(); if (ICmpInst::isEquality(Pred)) { if (!match(IC->getOperand(1), m_Zero())) return nullptr; V = IC->getOperand(0); const APInt *AndRHS; if (!match(V, m_And(m_Value(), m_Power2(AndRHS)))) return nullptr; AndMask = *AndRHS; } else if (decomposeBitTestICmp(IC->getOperand(0), IC->getOperand(1), Pred, V, AndMask)) { assert(ICmpInst::isEquality(Pred) && "Not equality test?"); if (!AndMask.isPowerOf2()) return nullptr; CreateAnd = true; } else { return nullptr; } // If both select arms are non-zero see if we have a select of the form // 'x ? 2^n + C : C'. Then we can offset both arms by C, use the logic // for 'x ? 2^n : 0' and fix the thing up at the end. APInt Offset(TrueVal.getBitWidth(), 0); if (!TrueVal.isNullValue() && !FalseVal.isNullValue()) { if ((TrueVal - FalseVal).isPowerOf2()) Offset = FalseVal; else if ((FalseVal - TrueVal).isPowerOf2()) Offset = TrueVal; else return nullptr; // Adjust TrueVal and FalseVal to the offset. TrueVal -= Offset; FalseVal -= Offset; } // Make sure one of the select arms is a power of 2. if (!TrueVal.isPowerOf2() && !FalseVal.isPowerOf2()) return nullptr; // Determine which shift is needed to transform result of the 'and' into the // desired result. const APInt &ValC = !TrueVal.isNullValue() ? TrueVal : FalseVal; unsigned ValZeros = ValC.logBase2(); unsigned AndZeros = AndMask.logBase2(); if (CreateAnd) { // Insert the AND instruction on the input to the truncate. V = Builder.CreateAnd(V, ConstantInt::get(V->getType(), AndMask)); } // If types don't match we can still convert the select by introducing a zext // or a trunc of the 'and'. if (ValZeros > AndZeros) { V = Builder.CreateZExtOrTrunc(V, SelType); V = Builder.CreateShl(V, ValZeros - AndZeros); } else if (ValZeros < AndZeros) { V = Builder.CreateLShr(V, AndZeros - ValZeros); V = Builder.CreateZExtOrTrunc(V, SelType); } else V = Builder.CreateZExtOrTrunc(V, SelType); // Okay, now we know that everything is set up, we just don't know whether we // have a icmp_ne or icmp_eq and whether the true or false val is the zero. bool ShouldNotVal = !TrueVal.isNullValue(); ShouldNotVal ^= Pred == ICmpInst::ICMP_NE; if (ShouldNotVal) V = Builder.CreateXor(V, ValC); // Apply an offset if needed. if (!Offset.isNullValue()) V = Builder.CreateAdd(V, ConstantInt::get(V->getType(), Offset)); return V; } /// We want to turn code that looks like this: /// %C = or %A, %B /// %D = select %cond, %C, %A /// into: /// %C = select %cond, %B, 0 /// %D = or %A, %C /// /// Assuming that the specified instruction is an operand to the select, return /// a bitmask indicating which operands of this instruction are foldable if they /// equal the other incoming value of the select. static unsigned getSelectFoldableOperands(BinaryOperator *I) { switch (I->getOpcode()) { case Instruction::Add: case Instruction::Mul: case Instruction::And: case Instruction::Or: case Instruction::Xor: return 3; // Can fold through either operand. case Instruction::Sub: // Can only fold on the amount subtracted. case Instruction::Shl: // Can only fold on the shift amount. case Instruction::LShr: case Instruction::AShr: return 1; default: return 0; // Cannot fold } } /// For the same transformation as the previous function, return the identity /// constant that goes into the select. static APInt getSelectFoldableConstant(BinaryOperator *I) { switch (I->getOpcode()) { default: llvm_unreachable("This cannot happen!"); case Instruction::Add: case Instruction::Sub: case Instruction::Or: case Instruction::Xor: case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: return APInt::getNullValue(I->getType()->getScalarSizeInBits()); case Instruction::And: return APInt::getAllOnesValue(I->getType()->getScalarSizeInBits()); case Instruction::Mul: return APInt(I->getType()->getScalarSizeInBits(), 1); } } /// We have (select c, TI, FI), and we know that TI and FI have the same opcode. Instruction *InstCombiner::foldSelectOpOp(SelectInst &SI, Instruction *TI, Instruction *FI) { // Don't break up min/max patterns. The hasOneUse checks below prevent that // for most cases, but vector min/max with bitcasts can be transformed. If the // one-use restrictions are eased for other patterns, we still don't want to // obfuscate min/max. if ((match(&SI, m_SMin(m_Value(), m_Value())) || match(&SI, m_SMax(m_Value(), m_Value())) || match(&SI, m_UMin(m_Value(), m_Value())) || match(&SI, m_UMax(m_Value(), m_Value())))) return nullptr; // If this is a cast from the same type, merge. if (TI->getNumOperands() == 1 && TI->isCast()) { Type *FIOpndTy = FI->getOperand(0)->getType(); if (TI->getOperand(0)->getType() != FIOpndTy) return nullptr; // The select condition may be a vector. We may only change the operand // type if the vector width remains the same (and matches the condition). Type *CondTy = SI.getCondition()->getType(); if (CondTy->isVectorTy()) { if (!FIOpndTy->isVectorTy()) return nullptr; if (CondTy->getVectorNumElements() != FIOpndTy->getVectorNumElements()) return nullptr; // TODO: If the backend knew how to deal with casts better, we could // remove this limitation. For now, there's too much potential to create // worse codegen by promoting the select ahead of size-altering casts // (PR28160). // // Note that ValueTracking's matchSelectPattern() looks through casts // without checking 'hasOneUse' when it matches min/max patterns, so this // transform may end up happening anyway. if (TI->getOpcode() != Instruction::BitCast && (!TI->hasOneUse() || !FI->hasOneUse())) return nullptr; } else if (!TI->hasOneUse() || !FI->hasOneUse()) { // TODO: The one-use restrictions for a scalar select could be eased if // the fold of a select in visitLoadInst() was enhanced to match a pattern // that includes a cast. return nullptr; } // Fold this by inserting a select from the input values. Value *NewSI = Builder.CreateSelect(SI.getCondition(), TI->getOperand(0), FI->getOperand(0), SI.getName() + ".v", &SI); return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI, TI->getType()); } // Only handle binary operators with one-use here. As with the cast case // above, it may be possible to relax the one-use constraint, but that needs // be examined carefully since it may not reduce the total number of // instructions. BinaryOperator *BO = dyn_cast(TI); if (!BO || !TI->hasOneUse() || !FI->hasOneUse()) return nullptr; // Figure out if the operations have any operands in common. Value *MatchOp, *OtherOpT, *OtherOpF; bool MatchIsOpZero; if (TI->getOperand(0) == FI->getOperand(0)) { MatchOp = TI->getOperand(0); OtherOpT = TI->getOperand(1); OtherOpF = FI->getOperand(1); MatchIsOpZero = true; } else if (TI->getOperand(1) == FI->getOperand(1)) { MatchOp = TI->getOperand(1); OtherOpT = TI->getOperand(0); OtherOpF = FI->getOperand(0); MatchIsOpZero = false; } else if (!TI->isCommutative()) { return nullptr; } else if (TI->getOperand(0) == FI->getOperand(1)) { MatchOp = TI->getOperand(0); OtherOpT = TI->getOperand(1); OtherOpF = FI->getOperand(0); MatchIsOpZero = true; } else if (TI->getOperand(1) == FI->getOperand(0)) { MatchOp = TI->getOperand(1); OtherOpT = TI->getOperand(0); OtherOpF = FI->getOperand(1); MatchIsOpZero = true; } else { return nullptr; } // If we reach here, they do have operations in common. Value *NewSI = Builder.CreateSelect(SI.getCondition(), OtherOpT, OtherOpF, SI.getName() + ".v", &SI); Value *Op0 = MatchIsOpZero ? MatchOp : NewSI; Value *Op1 = MatchIsOpZero ? NewSI : MatchOp; return BinaryOperator::Create(BO->getOpcode(), Op0, Op1); } static bool isSelect01(const APInt &C1I, const APInt &C2I) { if (!C1I.isNullValue() && !C2I.isNullValue()) // One side must be zero. return false; return C1I.isOneValue() || C1I.isAllOnesValue() || C2I.isOneValue() || C2I.isAllOnesValue(); } /// Try to fold the select into one of the operands to allow further /// optimization. Instruction *InstCombiner::foldSelectIntoOp(SelectInst &SI, Value *TrueVal, Value *FalseVal) { // See the comment above GetSelectFoldableOperands for a description of the // transformation we are doing here. if (auto *TVI = dyn_cast(TrueVal)) { if (TVI->hasOneUse() && !isa(FalseVal)) { if (unsigned SFO = getSelectFoldableOperands(TVI)) { unsigned OpToFold = 0; if ((SFO & 1) && FalseVal == TVI->getOperand(0)) { OpToFold = 1; } else if ((SFO & 2) && FalseVal == TVI->getOperand(1)) { OpToFold = 2; } if (OpToFold) { APInt CI = getSelectFoldableConstant(TVI); Value *OOp = TVI->getOperand(2-OpToFold); // Avoid creating select between 2 constants unless it's selecting // between 0, 1 and -1. const APInt *OOpC; bool OOpIsAPInt = match(OOp, m_APInt(OOpC)); if (!isa(OOp) || (OOpIsAPInt && isSelect01(CI, *OOpC))) { Value *C = ConstantInt::get(OOp->getType(), CI); Value *NewSel = Builder.CreateSelect(SI.getCondition(), OOp, C); NewSel->takeName(TVI); BinaryOperator *BO = BinaryOperator::Create(TVI->getOpcode(), FalseVal, NewSel); BO->copyIRFlags(TVI); return BO; } } } } } if (auto *FVI = dyn_cast(FalseVal)) { if (FVI->hasOneUse() && !isa(TrueVal)) { if (unsigned SFO = getSelectFoldableOperands(FVI)) { unsigned OpToFold = 0; if ((SFO & 1) && TrueVal == FVI->getOperand(0)) { OpToFold = 1; } else if ((SFO & 2) && TrueVal == FVI->getOperand(1)) { OpToFold = 2; } if (OpToFold) { APInt CI = getSelectFoldableConstant(FVI); Value *OOp = FVI->getOperand(2-OpToFold); // Avoid creating select between 2 constants unless it's selecting // between 0, 1 and -1. const APInt *OOpC; bool OOpIsAPInt = match(OOp, m_APInt(OOpC)); if (!isa(OOp) || (OOpIsAPInt && isSelect01(CI, *OOpC))) { Value *C = ConstantInt::get(OOp->getType(), CI); Value *NewSel = Builder.CreateSelect(SI.getCondition(), C, OOp); NewSel->takeName(FVI); BinaryOperator *BO = BinaryOperator::Create(FVI->getOpcode(), TrueVal, NewSel); BO->copyIRFlags(FVI); return BO; } } } } } return nullptr; } /// We want to turn: /// (select (icmp eq (and X, C1), 0), Y, (or Y, C2)) /// into: /// (or (shl (and X, C1), C3), Y) /// iff: /// C1 and C2 are both powers of 2 /// where: /// C3 = Log(C2) - Log(C1) /// /// This transform handles cases where: /// 1. The icmp predicate is inverted /// 2. The select operands are reversed /// 3. The magnitude of C2 and C1 are flipped static Value *foldSelectICmpAndOr(const ICmpInst *IC, Value *TrueVal, Value *FalseVal, InstCombiner::BuilderTy &Builder) { // Only handle integer compares. Also, if this is a vector select, we need a // vector compare. if (!TrueVal->getType()->isIntOrIntVectorTy() || TrueVal->getType()->isVectorTy() != IC->getType()->isVectorTy()) return nullptr; Value *CmpLHS = IC->getOperand(0); Value *CmpRHS = IC->getOperand(1); Value *V; unsigned C1Log; bool IsEqualZero; bool NeedAnd = false; if (IC->isEquality()) { if (!match(CmpRHS, m_Zero())) return nullptr; const APInt *C1; if (!match(CmpLHS, m_And(m_Value(), m_Power2(C1)))) return nullptr; V = CmpLHS; C1Log = C1->logBase2(); IsEqualZero = IC->getPredicate() == ICmpInst::ICMP_EQ; } else if (IC->getPredicate() == ICmpInst::ICMP_SLT || IC->getPredicate() == ICmpInst::ICMP_SGT) { // We also need to recognize (icmp slt (trunc (X)), 0) and // (icmp sgt (trunc (X)), -1). IsEqualZero = IC->getPredicate() == ICmpInst::ICMP_SGT; if ((IsEqualZero && !match(CmpRHS, m_AllOnes())) || (!IsEqualZero && !match(CmpRHS, m_Zero()))) return nullptr; if (!match(CmpLHS, m_OneUse(m_Trunc(m_Value(V))))) return nullptr; C1Log = CmpLHS->getType()->getScalarSizeInBits() - 1; NeedAnd = true; } else { return nullptr; } const APInt *C2; bool OrOnTrueVal = false; bool OrOnFalseVal = match(FalseVal, m_Or(m_Specific(TrueVal), m_Power2(C2))); if (!OrOnFalseVal) OrOnTrueVal = match(TrueVal, m_Or(m_Specific(FalseVal), m_Power2(C2))); if (!OrOnFalseVal && !OrOnTrueVal) return nullptr; Value *Y = OrOnFalseVal ? TrueVal : FalseVal; unsigned C2Log = C2->logBase2(); bool NeedXor = (!IsEqualZero && OrOnFalseVal) || (IsEqualZero && OrOnTrueVal); bool NeedShift = C1Log != C2Log; bool NeedZExtTrunc = Y->getType()->getScalarSizeInBits() != V->getType()->getScalarSizeInBits(); // Make sure we don't create more instructions than we save. Value *Or = OrOnFalseVal ? FalseVal : TrueVal; if ((NeedShift + NeedXor + NeedZExtTrunc) > (IC->hasOneUse() + Or->hasOneUse())) return nullptr; if (NeedAnd) { // Insert the AND instruction on the input to the truncate. APInt C1 = APInt::getOneBitSet(V->getType()->getScalarSizeInBits(), C1Log); V = Builder.CreateAnd(V, ConstantInt::get(V->getType(), C1)); } if (C2Log > C1Log) { V = Builder.CreateZExtOrTrunc(V, Y->getType()); V = Builder.CreateShl(V, C2Log - C1Log); } else if (C1Log > C2Log) { V = Builder.CreateLShr(V, C1Log - C2Log); V = Builder.CreateZExtOrTrunc(V, Y->getType()); } else V = Builder.CreateZExtOrTrunc(V, Y->getType()); if (NeedXor) V = Builder.CreateXor(V, *C2); return Builder.CreateOr(V, Y); } /// Attempt to fold a cttz/ctlz followed by a icmp plus select into a single /// call to cttz/ctlz with flag 'is_zero_undef' cleared. /// /// For example, we can fold the following code sequence: /// \code /// %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true) /// %1 = icmp ne i32 %x, 0 /// %2 = select i1 %1, i32 %0, i32 32 /// \code /// /// into: /// %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 false) static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal, InstCombiner::BuilderTy &Builder) { ICmpInst::Predicate Pred = ICI->getPredicate(); Value *CmpLHS = ICI->getOperand(0); Value *CmpRHS = ICI->getOperand(1); // Check if the condition value compares a value for equality against zero. if (!ICI->isEquality() || !match(CmpRHS, m_Zero())) return nullptr; Value *Count = FalseVal; Value *ValueOnZero = TrueVal; if (Pred == ICmpInst::ICMP_NE) std::swap(Count, ValueOnZero); // Skip zero extend/truncate. Value *V = nullptr; if (match(Count, m_ZExt(m_Value(V))) || match(Count, m_Trunc(m_Value(V)))) Count = V; // Check if the value propagated on zero is a constant number equal to the // sizeof in bits of 'Count'. unsigned SizeOfInBits = Count->getType()->getScalarSizeInBits(); if (!match(ValueOnZero, m_SpecificInt(SizeOfInBits))) return nullptr; // Check that 'Count' is a call to intrinsic cttz/ctlz. Also check that the // input to the cttz/ctlz is used as LHS for the compare instruction. if (match(Count, m_Intrinsic(m_Specific(CmpLHS))) || match(Count, m_Intrinsic(m_Specific(CmpLHS)))) { IntrinsicInst *II = cast(Count); // Explicitly clear the 'undef_on_zero' flag. IntrinsicInst *NewI = cast(II->clone()); NewI->setArgOperand(1, ConstantInt::getFalse(NewI->getContext())); Builder.Insert(NewI); return Builder.CreateZExtOrTrunc(NewI, ValueOnZero->getType()); } return nullptr; } /// Return true if we find and adjust an icmp+select pattern where the compare /// is with a constant that can be incremented or decremented to match the /// minimum or maximum idiom. static bool adjustMinMax(SelectInst &Sel, ICmpInst &Cmp) { ICmpInst::Predicate Pred = Cmp.getPredicate(); Value *CmpLHS = Cmp.getOperand(0); Value *CmpRHS = Cmp.getOperand(1); Value *TrueVal = Sel.getTrueValue(); Value *FalseVal = Sel.getFalseValue(); // We may move or edit the compare, so make sure the select is the only user. const APInt *CmpC; if (!Cmp.hasOneUse() || !match(CmpRHS, m_APInt(CmpC))) return false; // These transforms only work for selects of integers or vector selects of // integer vectors. Type *SelTy = Sel.getType(); auto *SelEltTy = dyn_cast(SelTy->getScalarType()); if (!SelEltTy || SelTy->isVectorTy() != Cmp.getType()->isVectorTy()) return false; Constant *AdjustedRHS; if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_SGT) AdjustedRHS = ConstantInt::get(CmpRHS->getType(), *CmpC + 1); else if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_SLT) AdjustedRHS = ConstantInt::get(CmpRHS->getType(), *CmpC - 1); else return false; // X > C ? X : C+1 --> X < C+1 ? C+1 : X // X < C ? X : C-1 --> X > C-1 ? C-1 : X if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) || (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) { ; // Nothing to do here. Values match without any sign/zero extension. } // Types do not match. Instead of calculating this with mixed types, promote // all to the larger type. This enables scalar evolution to analyze this // expression. else if (CmpRHS->getType()->getScalarSizeInBits() < SelEltTy->getBitWidth()) { Constant *SextRHS = ConstantExpr::getSExt(AdjustedRHS, SelTy); // X = sext x; x >s c ? X : C+1 --> X = sext x; X X = sext x; X >s C-1 ? C-1 : X // X = sext x; x >u c ? X : C+1 --> X = sext x; X X = sext x; X >u C-1 ? C-1 : X if (match(TrueVal, m_SExt(m_Specific(CmpLHS))) && SextRHS == FalseVal) { CmpLHS = TrueVal; AdjustedRHS = SextRHS; } else if (match(FalseVal, m_SExt(m_Specific(CmpLHS))) && SextRHS == TrueVal) { CmpLHS = FalseVal; AdjustedRHS = SextRHS; } else if (Cmp.isUnsigned()) { Constant *ZextRHS = ConstantExpr::getZExt(AdjustedRHS, SelTy); // X = zext x; x >u c ? X : C+1 --> X = zext x; X X = zext x; X >u C-1 ? C-1 : X // zext + signed compare cannot be changed: // 0xff s 0x0000 if (match(TrueVal, m_ZExt(m_Specific(CmpLHS))) && ZextRHS == FalseVal) { CmpLHS = TrueVal; AdjustedRHS = ZextRHS; } else if (match(FalseVal, m_ZExt(m_Specific(CmpLHS))) && ZextRHS == TrueVal) { CmpLHS = FalseVal; AdjustedRHS = ZextRHS; } else { return false; } } else { return false; } } else { return false; } Pred = ICmpInst::getSwappedPredicate(Pred); CmpRHS = AdjustedRHS; std::swap(FalseVal, TrueVal); Cmp.setPredicate(Pred); Cmp.setOperand(0, CmpLHS); Cmp.setOperand(1, CmpRHS); Sel.setOperand(1, TrueVal); Sel.setOperand(2, FalseVal); Sel.swapProfMetadata(); // Move the compare instruction right before the select instruction. Otherwise // the sext/zext value may be defined after the compare instruction uses it. Cmp.moveBefore(&Sel); return true; } /// If this is an integer min/max (icmp + select) with a constant operand, /// create the canonical icmp for the min/max operation and canonicalize the /// constant to the 'false' operand of the select: /// select (icmp Pred X, C1), C2, X --> select (icmp Pred' X, C2), X, C2 /// Note: if C1 != C2, this will change the icmp constant to the existing /// constant operand of the select. static Instruction * canonicalizeMinMaxWithConstant(SelectInst &Sel, ICmpInst &Cmp, InstCombiner::BuilderTy &Builder) { if (!Cmp.hasOneUse() || !isa(Cmp.getOperand(1))) return nullptr; // Canonicalize the compare predicate based on whether we have min or max. Value *LHS, *RHS; ICmpInst::Predicate NewPred; SelectPatternResult SPR = matchSelectPattern(&Sel, LHS, RHS); switch (SPR.Flavor) { case SPF_SMIN: NewPred = ICmpInst::ICMP_SLT; break; case SPF_UMIN: NewPred = ICmpInst::ICMP_ULT; break; case SPF_SMAX: NewPred = ICmpInst::ICMP_SGT; break; case SPF_UMAX: NewPred = ICmpInst::ICMP_UGT; break; default: return nullptr; } // Is this already canonical? if (Cmp.getOperand(0) == LHS && Cmp.getOperand(1) == RHS && Cmp.getPredicate() == NewPred) return nullptr; // Create the canonical compare and plug it into the select. Sel.setCondition(Builder.CreateICmp(NewPred, LHS, RHS)); // If the select operands did not change, we're done. if (Sel.getTrueValue() == LHS && Sel.getFalseValue() == RHS) return &Sel; // If we are swapping the select operands, swap the metadata too. assert(Sel.getTrueValue() == RHS && Sel.getFalseValue() == LHS && "Unexpected results from matchSelectPattern"); Sel.setTrueValue(LHS); Sel.setFalseValue(RHS); Sel.swapProfMetadata(); return &Sel; } /// Visit a SelectInst that has an ICmpInst as its first operand. Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI) { Value *TrueVal = SI.getTrueValue(); Value *FalseVal = SI.getFalseValue(); if (Instruction *NewSel = canonicalizeMinMaxWithConstant(SI, *ICI, Builder)) return NewSel; bool Changed = adjustMinMax(SI, *ICI); ICmpInst::Predicate Pred = ICI->getPredicate(); Value *CmpLHS = ICI->getOperand(0); Value *CmpRHS = ICI->getOperand(1); // Transform (X >s -1) ? C1 : C2 --> ((X >>s 31) & (C2 - C1)) + C1 // and (X ((X >>s 31) & (C2 - C1)) + C1 // FIXME: Type and constness constraints could be lifted, but we have to // watch code size carefully. We should consider xor instead of // sub/add when we decide to do that. // TODO: Merge this with foldSelectICmpAnd somehow. if (CmpLHS->getType()->isIntOrIntVectorTy() && CmpLHS->getType() == TrueVal->getType()) { const APInt *C1, *C2; if (match(TrueVal, m_APInt(C1)) && match(FalseVal, m_APInt(C2))) { ICmpInst::Predicate Pred = ICI->getPredicate(); Value *X; APInt Mask; if (decomposeBitTestICmp(CmpLHS, CmpRHS, Pred, X, Mask, false)) { if (Mask.isSignMask()) { assert(X == CmpLHS && "Expected to use the compare input directly"); assert(ICmpInst::isEquality(Pred) && "Expected equality predicate"); if (Pred == ICmpInst::ICMP_NE) std::swap(C1, C2); // This shift results in either -1 or 0. Value *AShr = Builder.CreateAShr(X, Mask.getBitWidth() - 1); // Check if we can express the operation with a single or. if (C2->isAllOnesValue()) return replaceInstUsesWith(SI, Builder.CreateOr(AShr, *C1)); Value *And = Builder.CreateAnd(AShr, *C2 - *C1); return replaceInstUsesWith(SI, Builder.CreateAdd(And, ConstantInt::get(And->getType(), *C1))); } } } } { const APInt *TrueValC, *FalseValC; if (match(TrueVal, m_APInt(TrueValC)) && match(FalseVal, m_APInt(FalseValC))) if (Value *V = foldSelectICmpAnd(SI.getType(), ICI, *TrueValC, *FalseValC, Builder)) return replaceInstUsesWith(SI, V); } // NOTE: if we wanted to, this is where to detect integer MIN/MAX if (CmpRHS != CmpLHS && isa(CmpRHS)) { if (CmpLHS == TrueVal && Pred == ICmpInst::ICMP_EQ) { // Transform (X == C) ? X : Y -> (X == C) ? C : Y SI.setOperand(1, CmpRHS); Changed = true; } else if (CmpLHS == FalseVal && Pred == ICmpInst::ICMP_NE) { // Transform (X != C) ? Y : X -> (X != C) ? Y : C SI.setOperand(2, CmpRHS); Changed = true; } } // FIXME: This code is nearly duplicated in InstSimplify. Using/refactoring // decomposeBitTestICmp() might help. { unsigned BitWidth = DL.getTypeSizeInBits(TrueVal->getType()->getScalarType()); APInt MinSignedValue = APInt::getSignedMinValue(BitWidth); Value *X; const APInt *Y, *C; bool TrueWhenUnset; bool IsBitTest = false; if (ICmpInst::isEquality(Pred) && match(CmpLHS, m_And(m_Value(X), m_Power2(Y))) && match(CmpRHS, m_Zero())) { IsBitTest = true; TrueWhenUnset = Pred == ICmpInst::ICMP_EQ; } else if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, m_Zero())) { X = CmpLHS; Y = &MinSignedValue; IsBitTest = true; TrueWhenUnset = false; } else if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, m_AllOnes())) { X = CmpLHS; Y = &MinSignedValue; IsBitTest = true; TrueWhenUnset = true; } if (IsBitTest) { Value *V = nullptr; // (X & Y) == 0 ? X : X ^ Y --> X & ~Y if (TrueWhenUnset && TrueVal == X && match(FalseVal, m_Xor(m_Specific(X), m_APInt(C))) && *Y == *C) V = Builder.CreateAnd(X, ~(*Y)); // (X & Y) != 0 ? X ^ Y : X --> X & ~Y else if (!TrueWhenUnset && FalseVal == X && match(TrueVal, m_Xor(m_Specific(X), m_APInt(C))) && *Y == *C) V = Builder.CreateAnd(X, ~(*Y)); // (X & Y) == 0 ? X ^ Y : X --> X | Y else if (TrueWhenUnset && FalseVal == X && match(TrueVal, m_Xor(m_Specific(X), m_APInt(C))) && *Y == *C) V = Builder.CreateOr(X, *Y); // (X & Y) != 0 ? X : X ^ Y --> X | Y else if (!TrueWhenUnset && TrueVal == X && match(FalseVal, m_Xor(m_Specific(X), m_APInt(C))) && *Y == *C) V = Builder.CreateOr(X, *Y); if (V) return replaceInstUsesWith(SI, V); } } if (Value *V = foldSelectICmpAndOr(ICI, TrueVal, FalseVal, Builder)) return replaceInstUsesWith(SI, V); if (Value *V = foldSelectCttzCtlz(ICI, TrueVal, FalseVal, Builder)) return replaceInstUsesWith(SI, V); return Changed ? &SI : nullptr; } /// SI is a select whose condition is a PHI node (but the two may be in /// different blocks). See if the true/false values (V) are live in all of the /// predecessor blocks of the PHI. For example, cases like this can't be mapped: /// /// X = phi [ C1, BB1], [C2, BB2] /// Y = add /// Z = select X, Y, 0 /// /// because Y is not live in BB1/BB2. static bool canSelectOperandBeMappingIntoPredBlock(const Value *V, const SelectInst &SI) { // If the value is a non-instruction value like a constant or argument, it // can always be mapped. const Instruction *I = dyn_cast(V); if (!I) return true; // If V is a PHI node defined in the same block as the condition PHI, we can // map the arguments. const PHINode *CondPHI = cast(SI.getCondition()); if (const PHINode *VP = dyn_cast(I)) if (VP->getParent() == CondPHI->getParent()) return true; // Otherwise, if the PHI and select are defined in the same block and if V is // defined in a different block, then we can transform it. if (SI.getParent() == CondPHI->getParent() && I->getParent() != CondPHI->getParent()) return true; // Otherwise we have a 'hard' case and we can't tell without doing more // detailed dominator based analysis, punt. return false; } /// We have an SPF (e.g. a min or max) of an SPF of the form: /// SPF2(SPF1(A, B), C) Instruction *InstCombiner::foldSPFofSPF(Instruction *Inner, SelectPatternFlavor SPF1, Value *A, Value *B, Instruction &Outer, SelectPatternFlavor SPF2, Value *C) { if (Outer.getType() != Inner->getType()) return nullptr; if (C == A || C == B) { // MAX(MAX(A, B), B) -> MAX(A, B) // MIN(MIN(a, b), a) -> MIN(a, b) if (SPF1 == SPF2) return replaceInstUsesWith(Outer, Inner); // MAX(MIN(a, b), a) -> a // MIN(MAX(a, b), a) -> a if ((SPF1 == SPF_SMIN && SPF2 == SPF_SMAX) || (SPF1 == SPF_SMAX && SPF2 == SPF_SMIN) || (SPF1 == SPF_UMIN && SPF2 == SPF_UMAX) || (SPF1 == SPF_UMAX && SPF2 == SPF_UMIN)) return replaceInstUsesWith(Outer, C); } if (SPF1 == SPF2) { const APInt *CB, *CC; if (match(B, m_APInt(CB)) && match(C, m_APInt(CC))) { // MIN(MIN(A, 23), 97) -> MIN(A, 23) // MAX(MAX(A, 97), 23) -> MAX(A, 97) if ((SPF1 == SPF_UMIN && CB->ule(*CC)) || (SPF1 == SPF_SMIN && CB->sle(*CC)) || (SPF1 == SPF_UMAX && CB->uge(*CC)) || (SPF1 == SPF_SMAX && CB->sge(*CC))) return replaceInstUsesWith(Outer, Inner); // MIN(MIN(A, 97), 23) -> MIN(A, 23) // MAX(MAX(A, 23), 97) -> MAX(A, 97) if ((SPF1 == SPF_UMIN && CB->ugt(*CC)) || (SPF1 == SPF_SMIN && CB->sgt(*CC)) || (SPF1 == SPF_UMAX && CB->ult(*CC)) || (SPF1 == SPF_SMAX && CB->slt(*CC))) { Outer.replaceUsesOfWith(Inner, A); return &Outer; } } } // ABS(ABS(X)) -> ABS(X) // NABS(NABS(X)) -> NABS(X) if (SPF1 == SPF2 && (SPF1 == SPF_ABS || SPF1 == SPF_NABS)) { return replaceInstUsesWith(Outer, Inner); } // ABS(NABS(X)) -> ABS(X) // NABS(ABS(X)) -> NABS(X) if ((SPF1 == SPF_ABS && SPF2 == SPF_NABS) || (SPF1 == SPF_NABS && SPF2 == SPF_ABS)) { SelectInst *SI = cast(Inner); Value *NewSI = Builder.CreateSelect(SI->getCondition(), SI->getFalseValue(), SI->getTrueValue(), SI->getName(), SI); return replaceInstUsesWith(Outer, NewSI); } auto IsFreeOrProfitableToInvert = [&](Value *V, Value *&NotV, bool &ElidesXor) { if (match(V, m_Not(m_Value(NotV)))) { // If V has at most 2 uses then we can get rid of the xor operation // entirely. ElidesXor |= !V->hasNUsesOrMore(3); return true; } if (IsFreeToInvert(V, !V->hasNUsesOrMore(3))) { NotV = nullptr; return true; } return false; }; Value *NotA, *NotB, *NotC; bool ElidesXor = false; // MIN(MIN(~A, ~B), ~C) == ~MAX(MAX(A, B), C) // MIN(MAX(~A, ~B), ~C) == ~MAX(MIN(A, B), C) // MAX(MIN(~A, ~B), ~C) == ~MIN(MAX(A, B), C) // MAX(MAX(~A, ~B), ~C) == ~MIN(MIN(A, B), C) // // This transform is performance neutral if we can elide at least one xor from // the set of three operands, since we'll be tacking on an xor at the very // end. if (SelectPatternResult::isMinOrMax(SPF1) && SelectPatternResult::isMinOrMax(SPF2) && IsFreeOrProfitableToInvert(A, NotA, ElidesXor) && IsFreeOrProfitableToInvert(B, NotB, ElidesXor) && IsFreeOrProfitableToInvert(C, NotC, ElidesXor) && ElidesXor) { if (!NotA) NotA = Builder.CreateNot(A); if (!NotB) NotB = Builder.CreateNot(B); if (!NotC) NotC = Builder.CreateNot(C); Value *NewInner = generateMinMaxSelectPattern( Builder, getInverseMinMaxSelectPattern(SPF1), NotA, NotB); Value *NewOuter = Builder.CreateNot(generateMinMaxSelectPattern( Builder, getInverseMinMaxSelectPattern(SPF2), NewInner, NotC)); return replaceInstUsesWith(Outer, NewOuter); } return nullptr; } /// Turn select C, (X + Y), (X - Y) --> (X + (select C, Y, (-Y))). /// This is even legal for FP. static Instruction *foldAddSubSelect(SelectInst &SI, InstCombiner::BuilderTy &Builder) { Value *CondVal = SI.getCondition(); Value *TrueVal = SI.getTrueValue(); Value *FalseVal = SI.getFalseValue(); auto *TI = dyn_cast(TrueVal); auto *FI = dyn_cast(FalseVal); if (!TI || !FI || !TI->hasOneUse() || !FI->hasOneUse()) return nullptr; Instruction *AddOp = nullptr, *SubOp = nullptr; if ((TI->getOpcode() == Instruction::Sub && FI->getOpcode() == Instruction::Add) || (TI->getOpcode() == Instruction::FSub && FI->getOpcode() == Instruction::FAdd)) { AddOp = FI; SubOp = TI; } else if ((FI->getOpcode() == Instruction::Sub && TI->getOpcode() == Instruction::Add) || (FI->getOpcode() == Instruction::FSub && TI->getOpcode() == Instruction::FAdd)) { AddOp = TI; SubOp = FI; } if (AddOp) { Value *OtherAddOp = nullptr; if (SubOp->getOperand(0) == AddOp->getOperand(0)) { OtherAddOp = AddOp->getOperand(1); } else if (SubOp->getOperand(0) == AddOp->getOperand(1)) { OtherAddOp = AddOp->getOperand(0); } if (OtherAddOp) { // So at this point we know we have (Y -> OtherAddOp): // select C, (add X, Y), (sub X, Z) Value *NegVal; // Compute -Z if (SI.getType()->isFPOrFPVectorTy()) { NegVal = Builder.CreateFNeg(SubOp->getOperand(1)); if (Instruction *NegInst = dyn_cast(NegVal)) { FastMathFlags Flags = AddOp->getFastMathFlags(); Flags &= SubOp->getFastMathFlags(); NegInst->setFastMathFlags(Flags); } } else { NegVal = Builder.CreateNeg(SubOp->getOperand(1)); } Value *NewTrueOp = OtherAddOp; Value *NewFalseOp = NegVal; if (AddOp != TI) std::swap(NewTrueOp, NewFalseOp); Value *NewSel = Builder.CreateSelect(CondVal, NewTrueOp, NewFalseOp, SI.getName() + ".p", &SI); if (SI.getType()->isFPOrFPVectorTy()) { Instruction *RI = BinaryOperator::CreateFAdd(SubOp->getOperand(0), NewSel); FastMathFlags Flags = AddOp->getFastMathFlags(); Flags &= SubOp->getFastMathFlags(); RI->setFastMathFlags(Flags); return RI; } else return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel); } } return nullptr; } Instruction *InstCombiner::foldSelectExtConst(SelectInst &Sel) { Instruction *ExtInst; if (!match(Sel.getTrueValue(), m_Instruction(ExtInst)) && !match(Sel.getFalseValue(), m_Instruction(ExtInst))) return nullptr; auto ExtOpcode = ExtInst->getOpcode(); if (ExtOpcode != Instruction::ZExt && ExtOpcode != Instruction::SExt) return nullptr; // TODO: Handle larger types? That requires adjusting FoldOpIntoSelect too. Value *X = ExtInst->getOperand(0); Type *SmallType = X->getType(); if (!SmallType->isIntOrIntVectorTy(1)) return nullptr; Constant *C; if (!match(Sel.getTrueValue(), m_Constant(C)) && !match(Sel.getFalseValue(), m_Constant(C))) return nullptr; // If the constant is the same after truncation to the smaller type and // extension to the original type, we can narrow the select. Value *Cond = Sel.getCondition(); Type *SelType = Sel.getType(); Constant *TruncC = ConstantExpr::getTrunc(C, SmallType); Constant *ExtC = ConstantExpr::getCast(ExtOpcode, TruncC, SelType); if (ExtC == C) { Value *TruncCVal = cast(TruncC); if (ExtInst == Sel.getFalseValue()) std::swap(X, TruncCVal); // select Cond, (ext X), C --> ext(select Cond, X, C') // select Cond, C, (ext X) --> ext(select Cond, C', X) Value *NewSel = Builder.CreateSelect(Cond, X, TruncCVal, "narrow", &Sel); return CastInst::Create(Instruction::CastOps(ExtOpcode), NewSel, SelType); } // If one arm of the select is the extend of the condition, replace that arm // with the extension of the appropriate known bool value. if (Cond == X) { if (ExtInst == Sel.getTrueValue()) { // select X, (sext X), C --> select X, -1, C // select X, (zext X), C --> select X, 1, C Constant *One = ConstantInt::getTrue(SmallType); Constant *AllOnesOrOne = ConstantExpr::getCast(ExtOpcode, One, SelType); return SelectInst::Create(Cond, AllOnesOrOne, C, "", nullptr, &Sel); } else { // select X, C, (sext X) --> select X, C, 0 // select X, C, (zext X) --> select X, C, 0 Constant *Zero = ConstantInt::getNullValue(SelType); return SelectInst::Create(Cond, C, Zero, "", nullptr, &Sel); } } return nullptr; } /// Try to transform a vector select with a constant condition vector into a /// shuffle for easier combining with other shuffles and insert/extract. static Instruction *canonicalizeSelectToShuffle(SelectInst &SI) { Value *CondVal = SI.getCondition(); Constant *CondC; if (!CondVal->getType()->isVectorTy() || !match(CondVal, m_Constant(CondC))) return nullptr; unsigned NumElts = CondVal->getType()->getVectorNumElements(); SmallVector Mask; Mask.reserve(NumElts); Type *Int32Ty = Type::getInt32Ty(CondVal->getContext()); for (unsigned i = 0; i != NumElts; ++i) { Constant *Elt = CondC->getAggregateElement(i); if (!Elt) return nullptr; if (Elt->isOneValue()) { // If the select condition element is true, choose from the 1st vector. Mask.push_back(ConstantInt::get(Int32Ty, i)); } else if (Elt->isNullValue()) { // If the select condition element is false, choose from the 2nd vector. Mask.push_back(ConstantInt::get(Int32Ty, i + NumElts)); } else if (isa(Elt)) { // Undef in a select condition (choose one of the operands) does not mean // the same thing as undef in a shuffle mask (any value is acceptable), so // give up. return nullptr; } else { // Bail out on a constant expression. return nullptr; } } return new ShuffleVectorInst(SI.getTrueValue(), SI.getFalseValue(), ConstantVector::get(Mask)); } /// Reuse bitcasted operands between a compare and select: /// select (cmp (bitcast C), (bitcast D)), (bitcast' C), (bitcast' D) --> /// bitcast (select (cmp (bitcast C), (bitcast D)), (bitcast C), (bitcast D)) static Instruction *foldSelectCmpBitcasts(SelectInst &Sel, InstCombiner::BuilderTy &Builder) { Value *Cond = Sel.getCondition(); Value *TVal = Sel.getTrueValue(); Value *FVal = Sel.getFalseValue(); CmpInst::Predicate Pred; Value *A, *B; if (!match(Cond, m_Cmp(Pred, m_Value(A), m_Value(B)))) return nullptr; // The select condition is a compare instruction. If the select's true/false // values are already the same as the compare operands, there's nothing to do. if (TVal == A || TVal == B || FVal == A || FVal == B) return nullptr; Value *C, *D; if (!match(A, m_BitCast(m_Value(C))) || !match(B, m_BitCast(m_Value(D)))) return nullptr; // select (cmp (bitcast C), (bitcast D)), (bitcast TSrc), (bitcast FSrc) Value *TSrc, *FSrc; if (!match(TVal, m_BitCast(m_Value(TSrc))) || !match(FVal, m_BitCast(m_Value(FSrc)))) return nullptr; // If the select true/false values are *different bitcasts* of the same source // operands, make the select operands the same as the compare operands and // cast the result. This is the canonical select form for min/max. Value *NewSel; if (TSrc == C && FSrc == D) { // select (cmp (bitcast C), (bitcast D)), (bitcast' C), (bitcast' D) --> // bitcast (select (cmp A, B), A, B) NewSel = Builder.CreateSelect(Cond, A, B, "", &Sel); } else if (TSrc == D && FSrc == C) { // select (cmp (bitcast C), (bitcast D)), (bitcast' D), (bitcast' C) --> // bitcast (select (cmp A, B), B, A) NewSel = Builder.CreateSelect(Cond, B, A, "", &Sel); } else { return nullptr; } return CastInst::CreateBitOrPointerCast(NewSel, Sel.getType()); } /// Try to eliminate select instructions that test the returned flag of cmpxchg /// instructions. /// /// If a select instruction tests the returned flag of a cmpxchg instruction and /// selects between the returned value of the cmpxchg instruction its compare /// operand, the result of the select will always be equal to its false value. /// For example: /// /// %0 = cmpxchg i64* %ptr, i64 %compare, i64 %new_value seq_cst seq_cst /// %1 = extractvalue { i64, i1 } %0, 1 /// %2 = extractvalue { i64, i1 } %0, 0 /// %3 = select i1 %1, i64 %compare, i64 %2 /// ret i64 %3 /// /// The returned value of the cmpxchg instruction (%2) is the original value /// located at %ptr prior to any update. If the cmpxchg operation succeeds, %2 /// must have been equal to %compare. Thus, the result of the select is always /// equal to %2, and the code can be simplified to: /// /// %0 = cmpxchg i64* %ptr, i64 %compare, i64 %new_value seq_cst seq_cst /// %1 = extractvalue { i64, i1 } %0, 0 /// ret i64 %1 /// static Instruction *foldSelectCmpXchg(SelectInst &SI) { // A helper that determines if V is an extractvalue instruction whose // aggregate operand is a cmpxchg instruction and whose single index is equal // to I. If such conditions are true, the helper returns the cmpxchg // instruction; otherwise, a nullptr is returned. auto isExtractFromCmpXchg = [](Value *V, unsigned I) -> AtomicCmpXchgInst * { auto *Extract = dyn_cast(V); if (!Extract) return nullptr; if (Extract->getIndices()[0] != I) return nullptr; return dyn_cast(Extract->getAggregateOperand()); }; // If the select has a single user, and this user is a select instruction that // we can simplify, skip the cmpxchg simplification for now. if (SI.hasOneUse()) if (auto *Select = dyn_cast(SI.user_back())) if (Select->getCondition() == SI.getCondition()) if (Select->getFalseValue() == SI.getTrueValue() || Select->getTrueValue() == SI.getFalseValue()) return nullptr; // Ensure the select condition is the returned flag of a cmpxchg instruction. auto *CmpXchg = isExtractFromCmpXchg(SI.getCondition(), 1); if (!CmpXchg) return nullptr; // Check the true value case: The true value of the select is the returned // value of the same cmpxchg used by the condition, and the false value is the // cmpxchg instruction's compare operand. if (auto *X = isExtractFromCmpXchg(SI.getTrueValue(), 0)) if (X == CmpXchg && X->getCompareOperand() == SI.getFalseValue()) { SI.setTrueValue(SI.getFalseValue()); return &SI; } // Check the false value case: The false value of the select is the returned // value of the same cmpxchg used by the condition, and the true value is the // cmpxchg instruction's compare operand. if (auto *X = isExtractFromCmpXchg(SI.getFalseValue(), 0)) if (X == CmpXchg && X->getCompareOperand() == SI.getTrueValue()) { SI.setTrueValue(SI.getFalseValue()); return &SI; } return nullptr; } Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { Value *CondVal = SI.getCondition(); Value *TrueVal = SI.getTrueValue(); Value *FalseVal = SI.getFalseValue(); Type *SelType = SI.getType(); // FIXME: Remove this workaround when freeze related patches are done. // For select with undef operand which feeds into an equality comparison, // don't simplify it so loop unswitch can know the equality comparison // may have an undef operand. This is a workaround for PR31652 caused by // descrepancy about branch on undef between LoopUnswitch and GVN. if (isa(TrueVal) || isa(FalseVal)) { if (llvm::any_of(SI.users(), [&](User *U) { ICmpInst *CI = dyn_cast(U); if (CI && CI->isEquality()) return true; return false; })) { return nullptr; } } if (Value *V = SimplifySelectInst(CondVal, TrueVal, FalseVal, SQ.getWithInstruction(&SI))) return replaceInstUsesWith(SI, V); if (Instruction *I = canonicalizeSelectToShuffle(SI)) return I; // Canonicalize a one-use integer compare with a non-canonical predicate by // inverting the predicate and swapping the select operands. This matches a // compare canonicalization for conditional branches. // TODO: Should we do the same for FP compares? CmpInst::Predicate Pred; if (match(CondVal, m_OneUse(m_ICmp(Pred, m_Value(), m_Value()))) && !isCanonicalPredicate(Pred)) { // Swap true/false values and condition. CmpInst *Cond = cast(CondVal); Cond->setPredicate(CmpInst::getInversePredicate(Pred)); SI.setOperand(1, FalseVal); SI.setOperand(2, TrueVal); SI.swapProfMetadata(); Worklist.Add(Cond); return &SI; } if (SelType->isIntOrIntVectorTy(1) && TrueVal->getType() == CondVal->getType()) { if (match(TrueVal, m_One())) { // Change: A = select B, true, C --> A = or B, C return BinaryOperator::CreateOr(CondVal, FalseVal); } if (match(TrueVal, m_Zero())) { // Change: A = select B, false, C --> A = and !B, C Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName()); return BinaryOperator::CreateAnd(NotCond, FalseVal); } if (match(FalseVal, m_Zero())) { // Change: A = select B, C, false --> A = and B, C return BinaryOperator::CreateAnd(CondVal, TrueVal); } if (match(FalseVal, m_One())) { // Change: A = select B, C, true --> A = or !B, C Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName()); return BinaryOperator::CreateOr(NotCond, TrueVal); } // select a, a, b -> a | b // select a, b, a -> a & b if (CondVal == TrueVal) return BinaryOperator::CreateOr(CondVal, FalseVal); if (CondVal == FalseVal) return BinaryOperator::CreateAnd(CondVal, TrueVal); // select a, ~a, b -> (~a) & b // select a, b, ~a -> (~a) | b if (match(TrueVal, m_Not(m_Specific(CondVal)))) return BinaryOperator::CreateAnd(TrueVal, FalseVal); if (match(FalseVal, m_Not(m_Specific(CondVal)))) return BinaryOperator::CreateOr(TrueVal, FalseVal); } // Selecting between two integer or vector splat integer constants? // // Note that we don't handle a scalar select of vectors: // select i1 %c, <2 x i8> <1, 1>, <2 x i8> <0, 0> // because that may need 3 instructions to splat the condition value: // extend, insertelement, shufflevector. if (SelType->isIntOrIntVectorTy() && CondVal->getType()->isVectorTy() == SelType->isVectorTy()) { // select C, 1, 0 -> zext C to int if (match(TrueVal, m_One()) && match(FalseVal, m_Zero())) return new ZExtInst(CondVal, SelType); // select C, -1, 0 -> sext C to int if (match(TrueVal, m_AllOnes()) && match(FalseVal, m_Zero())) return new SExtInst(CondVal, SelType); // select C, 0, 1 -> zext !C to int if (match(TrueVal, m_Zero()) && match(FalseVal, m_One())) { Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName()); return new ZExtInst(NotCond, SelType); } // select C, 0, -1 -> sext !C to int if (match(TrueVal, m_Zero()) && match(FalseVal, m_AllOnes())) { Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName()); return new SExtInst(NotCond, SelType); } } // See if we are selecting two values based on a comparison of the two values. if (FCmpInst *FCI = dyn_cast(CondVal)) { if (FCI->getOperand(0) == TrueVal && FCI->getOperand(1) == FalseVal) { // Transform (X == Y) ? X : Y -> Y if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) { // This is not safe in general for floating point: // consider X== -0, Y== +0. // It becomes safe if either operand is a nonzero constant. ConstantFP *CFPt, *CFPf; if (((CFPt = dyn_cast(TrueVal)) && !CFPt->getValueAPF().isZero()) || ((CFPf = dyn_cast(FalseVal)) && !CFPf->getValueAPF().isZero())) return replaceInstUsesWith(SI, FalseVal); } // Transform (X une Y) ? X : Y -> X if (FCI->getPredicate() == FCmpInst::FCMP_UNE) { // This is not safe in general for floating point: // consider X== -0, Y== +0. // It becomes safe if either operand is a nonzero constant. ConstantFP *CFPt, *CFPf; if (((CFPt = dyn_cast(TrueVal)) && !CFPt->getValueAPF().isZero()) || ((CFPf = dyn_cast(FalseVal)) && !CFPf->getValueAPF().isZero())) return replaceInstUsesWith(SI, TrueVal); } // Canonicalize to use ordered comparisons by swapping the select // operands. // // e.g. // (X ugt Y) ? X : Y -> (X ole Y) ? Y : X if (FCI->hasOneUse() && FCmpInst::isUnordered(FCI->getPredicate())) { FCmpInst::Predicate InvPred = FCI->getInversePredicate(); IRBuilder<>::FastMathFlagGuard FMFG(Builder); Builder.setFastMathFlags(FCI->getFastMathFlags()); Value *NewCond = Builder.CreateFCmp(InvPred, TrueVal, FalseVal, FCI->getName() + ".inv"); return SelectInst::Create(NewCond, FalseVal, TrueVal, SI.getName() + ".p"); } // NOTE: if we wanted to, this is where to detect MIN/MAX } else if (FCI->getOperand(0) == FalseVal && FCI->getOperand(1) == TrueVal){ // Transform (X == Y) ? Y : X -> X if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) { // This is not safe in general for floating point: // consider X== -0, Y== +0. // It becomes safe if either operand is a nonzero constant. ConstantFP *CFPt, *CFPf; if (((CFPt = dyn_cast(TrueVal)) && !CFPt->getValueAPF().isZero()) || ((CFPf = dyn_cast(FalseVal)) && !CFPf->getValueAPF().isZero())) return replaceInstUsesWith(SI, FalseVal); } // Transform (X une Y) ? Y : X -> Y if (FCI->getPredicate() == FCmpInst::FCMP_UNE) { // This is not safe in general for floating point: // consider X== -0, Y== +0. // It becomes safe if either operand is a nonzero constant. ConstantFP *CFPt, *CFPf; if (((CFPt = dyn_cast(TrueVal)) && !CFPt->getValueAPF().isZero()) || ((CFPf = dyn_cast(FalseVal)) && !CFPf->getValueAPF().isZero())) return replaceInstUsesWith(SI, TrueVal); } // Canonicalize to use ordered comparisons by swapping the select // operands. // // e.g. // (X ugt Y) ? X : Y -> (X ole Y) ? X : Y if (FCI->hasOneUse() && FCmpInst::isUnordered(FCI->getPredicate())) { FCmpInst::Predicate InvPred = FCI->getInversePredicate(); IRBuilder<>::FastMathFlagGuard FMFG(Builder); Builder.setFastMathFlags(FCI->getFastMathFlags()); Value *NewCond = Builder.CreateFCmp(InvPred, FalseVal, TrueVal, FCI->getName() + ".inv"); return SelectInst::Create(NewCond, FalseVal, TrueVal, SI.getName() + ".p"); } // NOTE: if we wanted to, this is where to detect MIN/MAX } // NOTE: if we wanted to, this is where to detect ABS } // See if we are selecting two values based on a comparison of the two values. if (ICmpInst *ICI = dyn_cast(CondVal)) if (Instruction *Result = foldSelectInstWithICmp(SI, ICI)) return Result; if (Instruction *Add = foldAddSubSelect(SI, Builder)) return Add; // Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z)) auto *TI = dyn_cast(TrueVal); auto *FI = dyn_cast(FalseVal); if (TI && FI && TI->getOpcode() == FI->getOpcode()) if (Instruction *IV = foldSelectOpOp(SI, TI, FI)) return IV; if (Instruction *I = foldSelectExtConst(SI)) return I; // See if we can fold the select into one of our operands. if (SelType->isIntOrIntVectorTy() || SelType->isFPOrFPVectorTy()) { if (Instruction *FoldI = foldSelectIntoOp(SI, TrueVal, FalseVal)) return FoldI; Value *LHS, *RHS, *LHS2, *RHS2; Instruction::CastOps CastOp; SelectPatternResult SPR = matchSelectPattern(&SI, LHS, RHS, &CastOp); auto SPF = SPR.Flavor; if (SelectPatternResult::isMinOrMax(SPF)) { // Canonicalize so that // - type casts are outside select patterns. // - float clamp is transformed to min/max pattern bool IsCastNeeded = LHS->getType() != SelType; Value *CmpLHS = cast(CondVal)->getOperand(0); Value *CmpRHS = cast(CondVal)->getOperand(1); if (IsCastNeeded || (LHS->getType()->isFPOrFPVectorTy() && ((CmpLHS != LHS && CmpLHS != RHS) || (CmpRHS != LHS && CmpRHS != RHS)))) { CmpInst::Predicate Pred = getCmpPredicateForMinMax(SPF, SPR.Ordered); Value *Cmp; if (CmpInst::isIntPredicate(Pred)) { Cmp = Builder.CreateICmp(Pred, LHS, RHS); } else { IRBuilder<>::FastMathFlagGuard FMFG(Builder); auto FMF = cast(SI.getCondition())->getFastMathFlags(); Builder.setFastMathFlags(FMF); Cmp = Builder.CreateFCmp(Pred, LHS, RHS); } Value *NewSI = Builder.CreateSelect(Cmp, LHS, RHS, SI.getName(), &SI); if (!IsCastNeeded) return replaceInstUsesWith(SI, NewSI); Value *NewCast = Builder.CreateCast(CastOp, NewSI, SelType); return replaceInstUsesWith(SI, NewCast); } } if (SPF) { // MAX(MAX(a, b), a) -> MAX(a, b) // MIN(MIN(a, b), a) -> MIN(a, b) // MAX(MIN(a, b), a) -> a // MIN(MAX(a, b), a) -> a // ABS(ABS(a)) -> ABS(a) // NABS(NABS(a)) -> NABS(a) if (SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2).Flavor) if (Instruction *R = foldSPFofSPF(cast(LHS),SPF2,LHS2,RHS2, SI, SPF, RHS)) return R; if (SelectPatternFlavor SPF2 = matchSelectPattern(RHS, LHS2, RHS2).Flavor) if (Instruction *R = foldSPFofSPF(cast(RHS),SPF2,LHS2,RHS2, SI, SPF, LHS)) return R; } // MAX(~a, ~b) -> ~MIN(a, b) if ((SPF == SPF_SMAX || SPF == SPF_UMAX) && IsFreeToInvert(LHS, LHS->hasNUses(2)) && IsFreeToInvert(RHS, RHS->hasNUses(2))) { // For this transform to be profitable, we need to eliminate at least two // 'not' instructions if we're going to add one 'not' instruction. int NumberOfNots = (LHS->hasNUses(2) && match(LHS, m_Not(m_Value()))) + (RHS->hasNUses(2) && match(RHS, m_Not(m_Value()))) + (SI.hasOneUse() && match(*SI.user_begin(), m_Not(m_Value()))); if (NumberOfNots >= 2) { Value *NewLHS = Builder.CreateNot(LHS); Value *NewRHS = Builder.CreateNot(RHS); Value *NewCmp = SPF == SPF_SMAX ? Builder.CreateICmpSLT(NewLHS, NewRHS) : Builder.CreateICmpULT(NewLHS, NewRHS); Value *NewSI = Builder.CreateNot(Builder.CreateSelect(NewCmp, NewLHS, NewRHS)); return replaceInstUsesWith(SI, NewSI); } } // TODO. // ABS(-X) -> ABS(X) } // See if we can fold the select into a phi node if the condition is a select. if (auto *PN = dyn_cast(SI.getCondition())) // The true/false values have to be live in the PHI predecessor's blocks. if (canSelectOperandBeMappingIntoPredBlock(TrueVal, SI) && canSelectOperandBeMappingIntoPredBlock(FalseVal, SI)) if (Instruction *NV = foldOpIntoPhi(SI, PN)) return NV; if (SelectInst *TrueSI = dyn_cast(TrueVal)) { if (TrueSI->getCondition()->getType() == CondVal->getType()) { // select(C, select(C, a, b), c) -> select(C, a, c) if (TrueSI->getCondition() == CondVal) { if (SI.getTrueValue() == TrueSI->getTrueValue()) return nullptr; SI.setOperand(1, TrueSI->getTrueValue()); return &SI; } // select(C0, select(C1, a, b), b) -> select(C0&C1, a, b) // We choose this as normal form to enable folding on the And and shortening // paths for the values (this helps GetUnderlyingObjects() for example). if (TrueSI->getFalseValue() == FalseVal && TrueSI->hasOneUse()) { Value *And = Builder.CreateAnd(CondVal, TrueSI->getCondition()); SI.setOperand(0, And); SI.setOperand(1, TrueSI->getTrueValue()); return &SI; } } } if (SelectInst *FalseSI = dyn_cast(FalseVal)) { if (FalseSI->getCondition()->getType() == CondVal->getType()) { // select(C, a, select(C, b, c)) -> select(C, a, c) if (FalseSI->getCondition() == CondVal) { if (SI.getFalseValue() == FalseSI->getFalseValue()) return nullptr; SI.setOperand(2, FalseSI->getFalseValue()); return &SI; } // select(C0, a, select(C1, a, b)) -> select(C0|C1, a, b) if (FalseSI->getTrueValue() == TrueVal && FalseSI->hasOneUse()) { Value *Or = Builder.CreateOr(CondVal, FalseSI->getCondition()); SI.setOperand(0, Or); SI.setOperand(2, FalseSI->getFalseValue()); return &SI; } } } + auto canMergeSelectThroughBinop = [](BinaryOperator *BO) { + // The select might be preventing a division by 0. + switch (BO->getOpcode()) { + default: + return true; + case Instruction::SRem: + case Instruction::URem: + case Instruction::SDiv: + case Instruction::UDiv: + return false; + } + }; + // Try to simplify a binop sandwiched between 2 selects with the same // condition. // select(C, binop(select(C, X, Y), W), Z) -> select(C, binop(X, W), Z) BinaryOperator *TrueBO; - if (match(TrueVal, m_OneUse(m_BinOp(TrueBO)))) { + if (match(TrueVal, m_OneUse(m_BinOp(TrueBO))) && + canMergeSelectThroughBinop(TrueBO)) { if (auto *TrueBOSI = dyn_cast(TrueBO->getOperand(0))) { if (TrueBOSI->getCondition() == CondVal) { TrueBO->setOperand(0, TrueBOSI->getTrueValue()); Worklist.Add(TrueBO); return &SI; } } if (auto *TrueBOSI = dyn_cast(TrueBO->getOperand(1))) { if (TrueBOSI->getCondition() == CondVal) { TrueBO->setOperand(1, TrueBOSI->getTrueValue()); Worklist.Add(TrueBO); return &SI; } } } // select(C, Z, binop(select(C, X, Y), W)) -> select(C, Z, binop(Y, W)) BinaryOperator *FalseBO; - if (match(FalseVal, m_OneUse(m_BinOp(FalseBO)))) { + if (match(FalseVal, m_OneUse(m_BinOp(FalseBO))) && + canMergeSelectThroughBinop(FalseBO)) { if (auto *FalseBOSI = dyn_cast(FalseBO->getOperand(0))) { if (FalseBOSI->getCondition() == CondVal) { FalseBO->setOperand(0, FalseBOSI->getFalseValue()); Worklist.Add(FalseBO); return &SI; } } if (auto *FalseBOSI = dyn_cast(FalseBO->getOperand(1))) { if (FalseBOSI->getCondition() == CondVal) { FalseBO->setOperand(1, FalseBOSI->getFalseValue()); Worklist.Add(FalseBO); return &SI; } } } if (BinaryOperator::isNot(CondVal)) { SI.setOperand(0, BinaryOperator::getNotArgument(CondVal)); SI.setOperand(1, FalseVal); SI.setOperand(2, TrueVal); return &SI; } if (VectorType *VecTy = dyn_cast(SelType)) { unsigned VWidth = VecTy->getNumElements(); APInt UndefElts(VWidth, 0); APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); if (Value *V = SimplifyDemandedVectorElts(&SI, AllOnesEltMask, UndefElts)) { if (V != &SI) return replaceInstUsesWith(SI, V); return &SI; } } // See if we can determine the result of this select based on a dominating // condition. BasicBlock *Parent = SI.getParent(); if (BasicBlock *Dom = Parent->getSinglePredecessor()) { auto *PBI = dyn_cast_or_null(Dom->getTerminator()); if (PBI && PBI->isConditional() && PBI->getSuccessor(0) != PBI->getSuccessor(1) && (PBI->getSuccessor(0) == Parent || PBI->getSuccessor(1) == Parent)) { bool CondIsTrue = PBI->getSuccessor(0) == Parent; Optional Implication = isImpliedCondition( PBI->getCondition(), SI.getCondition(), DL, CondIsTrue); if (Implication) { Value *V = *Implication ? TrueVal : FalseVal; return replaceInstUsesWith(SI, V); } } } // If we can compute the condition, there's no need for a select. // Like the above fold, we are attempting to reduce compile-time cost by // putting this fold here with limitations rather than in InstSimplify. // The motivation for this call into value tracking is to take advantage of // the assumption cache, so make sure that is populated. if (!CondVal->getType()->isVectorTy() && !AC.assumptions().empty()) { KnownBits Known(1); computeKnownBits(CondVal, Known, 0, &SI); if (Known.One.isOneValue()) return replaceInstUsesWith(SI, TrueVal); if (Known.Zero.isOneValue()) return replaceInstUsesWith(SI, FalseVal); } if (Instruction *BitCastSel = foldSelectCmpBitcasts(SI, Builder)) return BitCastSel; // Simplify selects that test the returned flag of cmpxchg instructions. if (Instruction *Select = foldSelectCmpXchg(SI)) return Select; return nullptr; } Index: head/contrib/llvm/lib/Transforms/Scalar/LICM.cpp =================================================================== --- head/contrib/llvm/lib/Transforms/Scalar/LICM.cpp (revision 329982) +++ head/contrib/llvm/lib/Transforms/Scalar/LICM.cpp (revision 329983) @@ -1,1561 +1,1576 @@ //===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This pass performs loop invariant code motion, attempting to remove as much // code from the body of a loop as possible. It does this by either hoisting // code into the preheader block, or by sinking code to the exit blocks if it is // safe. This pass also promotes must-aliased memory locations in the loop to // live in registers, thus hoisting and sinking "invariant" loads and stores. // // This pass uses alias analysis for two purposes: // // 1. Moving loop invariant loads and calls out of loops. If we can determine // that a load or call inside of a loop never aliases anything stored to, // we can hoist it or sink it like any other instruction. // 2. Scalar Promotion of Memory - If there is a store instruction inside of // the loop, we try to move the store to happen AFTER the loop instead of // inside of the loop. This can only happen if a few conditions are true: // A. The pointer stored through is loop invariant // B. There are no stores or loads in the loop which _may_ alias the // pointer. There are no calls in the loop which mod/ref the pointer. // If these conditions are true, we can promote the loads and stores in the // loop of the pointer to use a temporary alloca'd variable. We then use // the SSAUpdater to construct the appropriate SSA form for the value. // //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/LICM.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/PredIteratorCache.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include #include using namespace llvm; #define DEBUG_TYPE "licm" STATISTIC(NumSunk, "Number of instructions sunk out of loop"); STATISTIC(NumHoisted, "Number of instructions hoisted out of loop"); STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk"); STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk"); STATISTIC(NumPromoted, "Number of memory locations promoted to registers"); /// Memory promotion is enabled by default. static cl::opt DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false), cl::desc("Disable memory promotion in LICM pass")); static cl::opt MaxNumUsesTraversed( "licm-max-num-uses-traversed", cl::Hidden, cl::init(8), cl::desc("Max num uses visited for identifying load " "invariance in loop using invariant start (default = 8)")); static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI); static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, TargetTransformInfo *TTI, bool &FreeInLoop); static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE); static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, - const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, + const Loop *CurLoop, LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE, bool FreeInLoop); static bool isSafeToExecuteUnconditionally(Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE, const Instruction *CtxI = nullptr); static bool pointerInvalidatedByLoop(Value *V, uint64_t Size, const AAMDNodes &AAInfo, AliasSetTracker *CurAST); static Instruction * CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI, const LoopSafetyInfo *SafetyInfo); namespace { struct LoopInvariantCodeMotion { bool runOnLoop(Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, ScalarEvolution *SE, MemorySSA *MSSA, OptimizationRemarkEmitter *ORE, bool DeleteAST); DenseMap &getLoopToAliasSetMap() { return LoopToAliasSetMap; } private: DenseMap LoopToAliasSetMap; AliasSetTracker *collectAliasInfoForLoop(Loop *L, LoopInfo *LI, AliasAnalysis *AA); }; struct LegacyLICMPass : public LoopPass { static char ID; // Pass identification, replacement for typeid LegacyLICMPass() : LoopPass(ID) { initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry()); } bool runOnLoop(Loop *L, LPPassManager &LPM) override { if (skipLoop(L)) { // If we have run LICM on a previous loop but now we are skipping // (because we've hit the opt-bisect limit), we need to clear the // loop alias information. for (auto <AS : LICM.getLoopToAliasSetMap()) delete LTAS.second; LICM.getLoopToAliasSetMap().clear(); return false; } auto *SE = getAnalysisIfAvailable(); MemorySSA *MSSA = EnableMSSALoopDependency ? (&getAnalysis().getMSSA()) : nullptr; // For the old PM, we can't use OptimizationRemarkEmitter as an analysis // pass. Function analyses need to be preserved across loop transformations // but ORE cannot be preserved (see comment before the pass definition). OptimizationRemarkEmitter ORE(L->getHeader()->getParent()); return LICM.runOnLoop(L, &getAnalysis().getAAResults(), &getAnalysis().getLoopInfo(), &getAnalysis().getDomTree(), &getAnalysis().getTLI(), &getAnalysis().getTTI( *L->getHeader()->getParent()), SE ? &SE->getSE() : nullptr, MSSA, &ORE, false); } /// This transformation requires natural loop information & requires that /// loop preheaders be inserted into the CFG... /// void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired(); if (EnableMSSALoopDependency) AU.addRequired(); AU.addRequired(); getLoopAnalysisUsage(AU); } using llvm::Pass::doFinalization; bool doFinalization() override { assert(LICM.getLoopToAliasSetMap().empty() && "Didn't free loop alias sets"); return false; } private: LoopInvariantCodeMotion LICM; /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info. void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) override; /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias /// set. void deleteAnalysisValue(Value *V, Loop *L) override; /// Simple Analysis hook. Delete loop L from alias set map. void deleteAnalysisLoop(Loop *L) override; }; } // namespace PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &) { const auto &FAM = AM.getResult(L, AR).getManager(); Function *F = L.getHeader()->getParent(); auto *ORE = FAM.getCachedResult(*F); // FIXME: This should probably be optional rather than required. if (!ORE) report_fatal_error("LICM: OptimizationRemarkEmitterAnalysis not " "cached at a higher level"); LoopInvariantCodeMotion LICM; if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.TTI, &AR.SE, AR.MSSA, ORE, true)) return PreservedAnalyses::all(); auto PA = getLoopPassPreservedAnalyses(); PA.preserveSet(); return PA; } char LegacyLICMPass::ID = 0; INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false, false) INITIALIZE_PASS_DEPENDENCY(LoopPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false, false) Pass *llvm::createLICMPass() { return new LegacyLICMPass(); } /// Hoist expressions out of the specified loop. Note, alias info for inner /// loop is not preserved so it is not a good idea to run LICM multiple /// times on one loop. /// We should delete AST for inner loops in the new pass manager to avoid /// memory leak. /// bool LoopInvariantCodeMotion::runOnLoop( Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, ScalarEvolution *SE, MemorySSA *MSSA, OptimizationRemarkEmitter *ORE, bool DeleteAST) { bool Changed = false; assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form."); AliasSetTracker *CurAST = collectAliasInfoForLoop(L, LI, AA); // Get the preheader block to move instructions into... BasicBlock *Preheader = L->getLoopPreheader(); // Compute loop safety information. LoopSafetyInfo SafetyInfo; computeLoopSafetyInfo(&SafetyInfo, L); // We want to visit all of the instructions in this loop... that are not parts // of our subloops (they have already had their invariants hoisted out of // their loop, into this loop, so there is no need to process the BODIES of // the subloops). // // Traverse the body of the loop in depth first order on the dominator tree so // that we are guaranteed to see definitions before we see uses. This allows // us to sink instructions in one pass, without iteration. After sinking // instructions, we perform another pass to hoist them out of the loop. // if (L->hasDedicatedExits()) Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L, CurAST, &SafetyInfo, ORE); if (Preheader) Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L, CurAST, &SafetyInfo, ORE); // Now that all loop invariants have been removed from the loop, promote any // memory references to scalars that we can. // Don't sink stores from loops without dedicated block exits. Exits // containing indirect branches are not transformed by loop simplify, // make sure we catch that. An additional load may be generated in the // preheader for SSA updater, so also avoid sinking when no preheader // is available. if (!DisablePromotion && Preheader && L->hasDedicatedExits()) { // Figure out the loop exits and their insertion points SmallVector ExitBlocks; L->getUniqueExitBlocks(ExitBlocks); // We can't insert into a catchswitch. bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) { return isa(Exit->getTerminator()); }); if (!HasCatchSwitch) { SmallVector InsertPts; InsertPts.reserve(ExitBlocks.size()); for (BasicBlock *ExitBlock : ExitBlocks) InsertPts.push_back(&*ExitBlock->getFirstInsertionPt()); PredIteratorCache PIC; bool Promoted = false; // Loop over all of the alias sets in the tracker object. for (AliasSet &AS : *CurAST) { // We can promote this alias set if it has a store, if it is a "Must" // alias set, if the pointer is loop invariant, and if we are not // eliminating any volatile loads or stores. if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() || AS.isVolatile() || !L->isLoopInvariant(AS.begin()->getValue())) continue; assert( !AS.empty() && "Must alias set should have at least one pointer element in it!"); SmallSetVector PointerMustAliases; for (const auto &ASI : AS) PointerMustAliases.insert(ASI.getValue()); Promoted |= promoteLoopAccessesToScalars(PointerMustAliases, ExitBlocks, InsertPts, PIC, LI, DT, TLI, L, CurAST, &SafetyInfo, ORE); } // Once we have promoted values across the loop body we have to // recursively reform LCSSA as any nested loop may now have values defined // within the loop used in the outer loop. // FIXME: This is really heavy handed. It would be a bit better to use an // SSAUpdater strategy during promotion that was LCSSA aware and reformed // it as it went. if (Promoted) formLCSSARecursively(*L, *DT, LI, SE); Changed |= Promoted; } } // Check that neither this loop nor its parent have had LCSSA broken. LICM is // specifically moving instructions across the loop boundary and so it is // especially in need of sanity checking here. assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!"); assert((!L->getParentLoop() || L->getParentLoop()->isLCSSAForm(*DT)) && "Parent loop not left in LCSSA form after LICM!"); // If this loop is nested inside of another one, save the alias information // for when we process the outer loop. if (L->getParentLoop() && !DeleteAST) LoopToAliasSetMap[L] = CurAST; else delete CurAST; if (Changed && SE) SE->forgetLoopDispositions(L); return Changed; } /// Walk the specified region of the CFG (defined by all blocks dominated by /// the specified block, and that are in the current loop) in reverse depth /// first order w.r.t the DominatorTree. This allows us to visit uses before /// definitions, allowing us to sink a loop body in one pass without iteration. /// bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, Loop *CurLoop, AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) { // Verify inputs. assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr && "Unexpected input to sinkRegion"); // We want to visit children before parents. We will enque all the parents // before their children in the worklist and process the worklist in reverse // order. SmallVector Worklist = collectChildrenInLoop(N, CurLoop); bool Changed = false; for (DomTreeNode *DTN : reverse(Worklist)) { BasicBlock *BB = DTN->getBlock(); // Only need to process the contents of this block if it is not part of a // subloop (which would already have been processed). if (inSubLoop(BB, CurLoop, LI)) continue; for (BasicBlock::iterator II = BB->end(); II != BB->begin();) { Instruction &I = *--II; // If the instruction is dead, we would try to sink it because it isn't // used in the loop, instead, just delete it. if (isInstructionTriviallyDead(&I, TLI)) { DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n'); ++II; CurAST->deleteValue(&I); I.eraseFromParent(); Changed = true; continue; } // Check to see if we can sink this instruction to the exit blocks // of the loop. We can do this if the all users of the instruction are // outside of the loop. In this case, it doesn't even matter if the // operands of the instruction are loop invariant. // bool FreeInLoop = false; if (isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) && canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE)) { if (sink(I, LI, DT, CurLoop, SafetyInfo, ORE, FreeInLoop)) { if (!FreeInLoop) { ++II; CurAST->deleteValue(&I); I.eraseFromParent(); } Changed = true; } } } } return Changed; } /// Walk the specified region of the CFG (defined by all blocks dominated by /// the specified block, and that are in the current loop) in depth first /// order w.r.t the DominatorTree. This allows us to visit definitions before /// uses, allowing us to hoist a loop body in one pass without iteration. /// bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop, AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) { // Verify inputs. assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr && "Unexpected input to hoistRegion"); // We want to visit parents before children. We will enque all the parents // before their children in the worklist and process the worklist in order. SmallVector Worklist = collectChildrenInLoop(N, CurLoop); bool Changed = false; for (DomTreeNode *DTN : Worklist) { BasicBlock *BB = DTN->getBlock(); // Only need to process the contents of this block if it is not part of a // subloop (which would already have been processed). if (!inSubLoop(BB, CurLoop, LI)) for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) { Instruction &I = *II++; // Try constant folding this instruction. If all the operands are // constants, it is technically hoistable, but it would be better to // just fold it. if (Constant *C = ConstantFoldInstruction( &I, I.getModule()->getDataLayout(), TLI)) { DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n'); CurAST->copyValue(&I, C); I.replaceAllUsesWith(C); if (isInstructionTriviallyDead(&I, TLI)) { CurAST->deleteValue(&I); I.eraseFromParent(); } Changed = true; continue; } // Attempt to remove floating point division out of the loop by // converting it to a reciprocal multiplication. if (I.getOpcode() == Instruction::FDiv && CurLoop->isLoopInvariant(I.getOperand(1)) && I.hasAllowReciprocal()) { auto Divisor = I.getOperand(1); auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0); auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor); ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags()); ReciprocalDivisor->insertBefore(&I); auto Product = BinaryOperator::CreateFMul(I.getOperand(0), ReciprocalDivisor); Product->setFastMathFlags(I.getFastMathFlags()); Product->insertAfter(&I); I.replaceAllUsesWith(Product); I.eraseFromParent(); hoist(*ReciprocalDivisor, DT, CurLoop, SafetyInfo, ORE); Changed = true; continue; } // Try hoisting the instruction out to the preheader. We can only do // this if all of the operands of the instruction are loop invariant and // if it is safe to hoist the instruction. // if (CurLoop->hasLoopInvariantOperands(&I) && canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE) && isSafeToExecuteUnconditionally( I, DT, CurLoop, SafetyInfo, ORE, CurLoop->getLoopPreheader()->getTerminator())) Changed |= hoist(I, DT, CurLoop, SafetyInfo, ORE); } } return Changed; } /// Computes loop safety information, checks loop body & header /// for the possibility of may throw exception. /// void llvm::computeLoopSafetyInfo(LoopSafetyInfo *SafetyInfo, Loop *CurLoop) { assert(CurLoop != nullptr && "CurLoop cant be null"); BasicBlock *Header = CurLoop->getHeader(); // Setting default safety values. SafetyInfo->MayThrow = false; SafetyInfo->HeaderMayThrow = false; // Iterate over header and compute safety info. for (BasicBlock::iterator I = Header->begin(), E = Header->end(); (I != E) && !SafetyInfo->HeaderMayThrow; ++I) SafetyInfo->HeaderMayThrow |= !isGuaranteedToTransferExecutionToSuccessor(&*I); SafetyInfo->MayThrow = SafetyInfo->HeaderMayThrow; // Iterate over loop instructions and compute safety info. // Skip header as it has been computed and stored in HeaderMayThrow. // The first block in loopinfo.Blocks is guaranteed to be the header. assert(Header == *CurLoop->getBlocks().begin() && "First block must be header"); for (Loop::block_iterator BB = std::next(CurLoop->block_begin()), BBE = CurLoop->block_end(); (BB != BBE) && !SafetyInfo->MayThrow; ++BB) for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); (I != E) && !SafetyInfo->MayThrow; ++I) SafetyInfo->MayThrow |= !isGuaranteedToTransferExecutionToSuccessor(&*I); // Compute funclet colors if we might sink/hoist in a function with a funclet // personality routine. Function *Fn = CurLoop->getHeader()->getParent(); if (Fn->hasPersonalityFn()) if (Constant *PersonalityFn = Fn->getPersonalityFn()) if (isFuncletEHPersonality(classifyEHPersonality(PersonalityFn))) SafetyInfo->BlockColors = colorEHFunclets(*Fn); } // Return true if LI is invariant within scope of the loop. LI is invariant if // CurLoop is dominated by an invariant.start representing the same memory // location and size as the memory location LI loads from, and also the // invariant.start has no uses. static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT, Loop *CurLoop) { Value *Addr = LI->getOperand(0); const DataLayout &DL = LI->getModule()->getDataLayout(); const uint32_t LocSizeInBits = DL.getTypeSizeInBits( cast(Addr->getType())->getElementType()); // if the type is i8 addrspace(x)*, we know this is the type of // llvm.invariant.start operand auto *PtrInt8Ty = PointerType::get(Type::getInt8Ty(LI->getContext()), LI->getPointerAddressSpace()); unsigned BitcastsVisited = 0; // Look through bitcasts until we reach the i8* type (this is invariant.start // operand type). while (Addr->getType() != PtrInt8Ty) { auto *BC = dyn_cast(Addr); // Avoid traversing high number of bitcast uses. if (++BitcastsVisited > MaxNumUsesTraversed || !BC) return false; Addr = BC->getOperand(0); } unsigned UsesVisited = 0; // Traverse all uses of the load operand value, to see if invariant.start is // one of the uses, and whether it dominates the load instruction. for (auto *U : Addr->users()) { // Avoid traversing for Load operand with high number of users. if (++UsesVisited > MaxNumUsesTraversed) return false; IntrinsicInst *II = dyn_cast(U); // If there are escaping uses of invariant.start instruction, the load maybe // non-invariant. if (!II || II->getIntrinsicID() != Intrinsic::invariant_start || !II->use_empty()) continue; unsigned InvariantSizeInBits = cast(II->getArgOperand(0))->getSExtValue() * 8; // Confirm the invariant.start location size contains the load operand size // in bits. Also, the invariant.start should dominate the load, and we // should not hoist the load out of a loop that contains this dominating // invariant.start. if (LocSizeInBits <= InvariantSizeInBits && DT->properlyDominates(II->getParent(), CurLoop->getHeader())) return true; } return false; } bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, Loop *CurLoop, AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) { // SafetyInfo is nullptr if we are checking for sinking from preheader to // loop body. const bool SinkingToLoopBody = !SafetyInfo; // Loads have extra constraints we have to verify before we can hoist them. if (LoadInst *LI = dyn_cast(&I)) { if (!LI->isUnordered()) return false; // Don't sink/hoist volatile or ordered atomic loads! // Loads from constant memory are always safe to move, even if they end up // in the same alias set as something that ends up being modified. if (AA->pointsToConstantMemory(LI->getOperand(0))) return true; if (LI->getMetadata(LLVMContext::MD_invariant_load)) return true; if (LI->isAtomic() && SinkingToLoopBody) return false; // Don't sink unordered atomic loads to loop body. // This checks for an invariant.start dominating the load. if (isLoadInvariantInLoop(LI, DT, CurLoop)) return true; // Don't hoist loads which have may-aliased stores in loop. uint64_t Size = 0; if (LI->getType()->isSized()) Size = I.getModule()->getDataLayout().getTypeStoreSize(LI->getType()); AAMDNodes AAInfo; LI->getAAMetadata(AAInfo); bool Invalidated = pointerInvalidatedByLoop(LI->getOperand(0), Size, AAInfo, CurAST); // Check loop-invariant address because this may also be a sinkable load // whose address is not necessarily loop-invariant. if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand())) ORE->emit([&]() { return OptimizationRemarkMissed( DEBUG_TYPE, "LoadWithLoopInvariantAddressInvalidated", LI) << "failed to move load with loop-invariant address " "because the loop may invalidate its value"; }); return !Invalidated; } else if (CallInst *CI = dyn_cast(&I)) { // Don't sink or hoist dbg info; it's legal, but not useful. if (isa(I)) return false; // Don't sink calls which can throw. if (CI->mayThrow()) return false; // Handle simple cases by querying alias analysis. FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI); if (Behavior == FMRB_DoesNotAccessMemory) return true; if (AliasAnalysis::onlyReadsMemory(Behavior)) { // A readonly argmemonly function only reads from memory pointed to by // it's arguments with arbitrary offsets. If we can prove there are no // writes to this memory in the loop, we can hoist or sink. if (AliasAnalysis::onlyAccessesArgPointees(Behavior)) { for (Value *Op : CI->arg_operands()) if (Op->getType()->isPointerTy() && pointerInvalidatedByLoop(Op, MemoryLocation::UnknownSize, AAMDNodes(), CurAST)) return false; return true; } // If this call only reads from memory and there are no writes to memory // in the loop, we can hoist or sink the call as appropriate. bool FoundMod = false; for (AliasSet &AS : *CurAST) { if (!AS.isForwardingAliasSet() && AS.isMod()) { FoundMod = true; break; } } if (!FoundMod) return true; } // FIXME: This should use mod/ref information to see if we can hoist or // sink the call. return false; } // Only these instructions are hoistable/sinkable. if (!isa(I) && !isa(I) && !isa(I) && !isa(I) && !isa(I) && !isa(I) && !isa(I) && !isa(I) && !isa(I) && !isa(I)) return false; // If we are checking for sinking from preheader to loop body it will be // always safe as there is no speculative execution. if (SinkingToLoopBody) return true; // TODO: Plumb the context instruction through to make hoisting and sinking // more powerful. Hoisting of loads already works due to the special casing // above. return isSafeToExecuteUnconditionally(I, DT, CurLoop, SafetyInfo, nullptr); } /// Returns true if a PHINode is a trivially replaceable with an /// Instruction. /// This is true when all incoming values are that instruction. /// This pattern occurs most often with LCSSA PHI nodes. /// static bool isTriviallyReplacablePHI(const PHINode &PN, const Instruction &I) { for (const Value *IncValue : PN.incoming_values()) if (IncValue != &I) return false; return true; } /// Return true if the instruction is free in the loop. static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop, const TargetTransformInfo *TTI) { if (const GetElementPtrInst *GEP = dyn_cast(&I)) { if (TTI->getUserCost(GEP) != TargetTransformInfo::TCC_Free) return false; // For a GEP, we cannot simply use getUserCost because currently it // optimistically assume that a GEP will fold into addressing mode // regardless of its users. const BasicBlock *BB = GEP->getParent(); for (const User *U : GEP->users()) { const Instruction *UI = cast(U); if (CurLoop->contains(UI) && (BB != UI->getParent() || (!isa(UI) && !isa(UI)))) return false; } return true; } else return TTI->getUserCost(&I) == TargetTransformInfo::TCC_Free; } /// Return true if the only users of this instruction are outside of /// the loop. If this is true, we can sink the instruction to the exit /// blocks of the loop. /// /// We also return true if the instruction could be folded away in lowering. /// (e.g., a GEP can be folded into a load as an addressing mode in the loop). static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, TargetTransformInfo *TTI, bool &FreeInLoop) { const auto &BlockColors = SafetyInfo->BlockColors; bool IsFree = isFreeInLoop(I, CurLoop, TTI); for (const User *U : I.users()) { const Instruction *UI = cast(U); if (const PHINode *PN = dyn_cast(UI)) { const BasicBlock *BB = PN->getParent(); // We cannot sink uses in catchswitches. if (isa(BB->getTerminator())) return false; // We need to sink a callsite to a unique funclet. Avoid sinking if the // phi use is too muddled. if (isa(I)) if (!BlockColors.empty() && BlockColors.find(const_cast(BB))->second.size() != 1) return false; } if (CurLoop->contains(UI)) { if (IsFree) { FreeInLoop = true; continue; } return false; } } return true; } static Instruction * CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI, const LoopSafetyInfo *SafetyInfo) { Instruction *New; if (auto *CI = dyn_cast(&I)) { const auto &BlockColors = SafetyInfo->BlockColors; // Sinking call-sites need to be handled differently from other // instructions. The cloned call-site needs a funclet bundle operand // appropriate for it's location in the CFG. SmallVector OpBundles; for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles(); BundleIdx != BundleEnd; ++BundleIdx) { OperandBundleUse Bundle = CI->getOperandBundleAt(BundleIdx); if (Bundle.getTagID() == LLVMContext::OB_funclet) continue; OpBundles.emplace_back(Bundle); } if (!BlockColors.empty()) { const ColorVector &CV = BlockColors.find(&ExitBlock)->second; assert(CV.size() == 1 && "non-unique color for exit block!"); BasicBlock *BBColor = CV.front(); Instruction *EHPad = BBColor->getFirstNonPHI(); if (EHPad->isEHPad()) OpBundles.emplace_back("funclet", EHPad); } New = CallInst::Create(CI, OpBundles); } else { New = I.clone(); } ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New); if (!I.getName().empty()) New->setName(I.getName() + ".le"); // Build LCSSA PHI nodes for any in-loop operands. Note that this is // particularly cheap because we can rip off the PHI node that we're // replacing for the number and blocks of the predecessors. // OPT: If this shows up in a profile, we can instead finish sinking all // invariant instructions, and then walk their operands to re-establish // LCSSA. That will eliminate creating PHI nodes just to nuke them when // sinking bottom-up. for (User::op_iterator OI = New->op_begin(), OE = New->op_end(); OI != OE; ++OI) if (Instruction *OInst = dyn_cast(*OI)) if (Loop *OLoop = LI->getLoopFor(OInst->getParent())) if (!OLoop->contains(&PN)) { PHINode *OpPN = PHINode::Create(OInst->getType(), PN.getNumIncomingValues(), OInst->getName() + ".lcssa", &ExitBlock.front()); for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) OpPN->addIncoming(OInst, PN.getIncomingBlock(i)); *OI = OpPN; } return New; } static Instruction *sinkThroughTriviallyReplacablePHI( PHINode *TPN, Instruction *I, LoopInfo *LI, SmallDenseMap &SunkCopies, const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop) { assert(isTriviallyReplacablePHI(*TPN, *I) && "Expect only trivially replacalbe PHI"); BasicBlock *ExitBlock = TPN->getParent(); Instruction *New; auto It = SunkCopies.find(ExitBlock); if (It != SunkCopies.end()) New = It->second; else New = SunkCopies[ExitBlock] = CloneInstructionInExitBlock(*I, *ExitBlock, *TPN, LI, SafetyInfo); return New; } -static bool canSplitPredecessors(PHINode *PN) { +static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) { BasicBlock *BB = PN->getParent(); if (!BB->canSplitPredecessors()) return false; + // It's not impossible to split EHPad blocks, but if BlockColors already exist + // it require updating BlockColors for all offspring blocks accordingly. By + // skipping such corner case, we can make updating BlockColors after splitting + // predecessor fairly simple. + if (!SafetyInfo->BlockColors.empty() && BB->getFirstNonPHI()->isEHPad()) + return false; for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { BasicBlock *BBPred = *PI; if (isa(BBPred->getTerminator())) return false; } return true; } static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT, - LoopInfo *LI, const Loop *CurLoop) { + LoopInfo *LI, const Loop *CurLoop, + LoopSafetyInfo *SafetyInfo) { #ifndef NDEBUG SmallVector ExitBlocks; CurLoop->getUniqueExitBlocks(ExitBlocks); SmallPtrSet ExitBlockSet(ExitBlocks.begin(), ExitBlocks.end()); #endif BasicBlock *ExitBB = PN->getParent(); assert(ExitBlockSet.count(ExitBB) && "Expect the PHI is in an exit block."); // Split predecessors of the loop exit to make instructions in the loop are // exposed to exit blocks through trivially replacable PHIs while keeping the // loop in the canonical form where each predecessor of each exit block should // be contained within the loop. For example, this will convert the loop below // from // // LB1: // %v1 = // br %LE, %LB2 // LB2: // %v2 = // br %LE, %LB1 // LE: // %p = phi [%v1, %LB1], [%v2, %LB2] <-- non-trivially replacable // // to // // LB1: // %v1 = // br %LE.split, %LB2 // LB2: // %v2 = // br %LE.split2, %LB1 // LE.split: // %p1 = phi [%v1, %LB1] <-- trivially replacable // br %LE // LE.split2: // %p2 = phi [%v2, %LB2] <-- trivially replacable // br %LE // LE: // %p = phi [%p1, %LE.split], [%p2, %LE.split2] // + auto &BlockColors = SafetyInfo->BlockColors; SmallSetVector PredBBs(pred_begin(ExitBB), pred_end(ExitBB)); while (!PredBBs.empty()) { BasicBlock *PredBB = *PredBBs.begin(); assert(CurLoop->contains(PredBB) && "Expect all predecessors are in the loop"); - if (PN->getBasicBlockIndex(PredBB) >= 0) - SplitBlockPredecessors(ExitBB, PredBB, ".split.loop.exit", DT, LI, true); + if (PN->getBasicBlockIndex(PredBB) >= 0) { + BasicBlock *NewPred = SplitBlockPredecessors( + ExitBB, PredBB, ".split.loop.exit", DT, LI, true); + // Since we do not allow splitting EH-block with BlockColors in + // canSplitPredecessors(), we can simply assign predecessor's color to + // the new block. + if (!BlockColors.empty()) + BlockColors[NewPred] = BlockColors[PredBB]; + } PredBBs.remove(PredBB); } } /// When an instruction is found to only be used outside of the loop, this /// function moves it to the exit blocks and patches up SSA form as needed. /// This method is guaranteed to remove the original instruction from its /// position, and may either delete it or move it to outside of the loop. /// static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, - const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, + const Loop *CurLoop, LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE, bool FreeInLoop) { DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n"); ORE->emit([&]() { return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I) << "sinking " << ore::NV("Inst", &I); }); bool Changed = false; if (isa(I)) ++NumMovedLoads; else if (isa(I)) ++NumMovedCalls; ++NumSunk; // Iterate over users to be ready for actual sinking. Replace users via // unrechable blocks with undef and make all user PHIs trivially replcable. SmallPtrSet VisitedUsers; for (Value::user_iterator UI = I.user_begin(), UE = I.user_end(); UI != UE;) { auto *User = cast(*UI); Use &U = UI.getUse(); ++UI; if (VisitedUsers.count(User) || CurLoop->contains(User)) continue; if (!DT->isReachableFromEntry(User->getParent())) { U = UndefValue::get(I.getType()); Changed = true; continue; } // The user must be a PHI node. PHINode *PN = cast(User); // Surprisingly, instructions can be used outside of loops without any // exits. This can only happen in PHI nodes if the incoming block is // unreachable. BasicBlock *BB = PN->getIncomingBlock(U); if (!DT->isReachableFromEntry(BB)) { U = UndefValue::get(I.getType()); Changed = true; continue; } VisitedUsers.insert(PN); if (isTriviallyReplacablePHI(*PN, I)) continue; - if (!canSplitPredecessors(PN)) + if (!canSplitPredecessors(PN, SafetyInfo)) return Changed; // Split predecessors of the PHI so that we can make users trivially // replacable. - splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop); + splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo); // Should rebuild the iterators, as they may be invalidated by // splitPredecessorsOfLoopExit(). UI = I.user_begin(); UE = I.user_end(); } if (VisitedUsers.empty()) return Changed; #ifndef NDEBUG SmallVector ExitBlocks; CurLoop->getUniqueExitBlocks(ExitBlocks); SmallPtrSet ExitBlockSet(ExitBlocks.begin(), ExitBlocks.end()); #endif // Clones of this instruction. Don't create more than one per exit block! SmallDenseMap SunkCopies; // If this instruction is only used outside of the loop, then all users are // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of // the instruction. SmallSetVector Users(I.user_begin(), I.user_end()); for (auto *UI : Users) { auto *User = cast(UI); if (CurLoop->contains(User)) continue; PHINode *PN = cast(User); assert(ExitBlockSet.count(PN->getParent()) && "The LCSSA PHI is not in an exit block!"); // The PHI must be trivially replacable. Instruction *New = sinkThroughTriviallyReplacablePHI(PN, &I, LI, SunkCopies, SafetyInfo, CurLoop); PN->replaceAllUsesWith(New); PN->eraseFromParent(); Changed = true; } return Changed; } /// When an instruction is found to only use loop invariant operands that /// is safe to hoist, this instruction is called to do the dirty work. /// static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) { auto *Preheader = CurLoop->getLoopPreheader(); DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": " << I << "\n"); ORE->emit([&]() { return OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) << "hoisting " << ore::NV("Inst", &I); }); // Metadata can be dependent on conditions we are hoisting above. // Conservatively strip all metadata on the instruction unless we were // guaranteed to execute I if we entered the loop, in which case the metadata // is valid in the loop preheader. if (I.hasMetadataOtherThanDebugLoc() && // The check on hasMetadataOtherThanDebugLoc is to prevent us from burning // time in isGuaranteedToExecute if we don't actually have anything to // drop. It is a compile time optimization, not required for correctness. !isGuaranteedToExecute(I, DT, CurLoop, SafetyInfo)) I.dropUnknownNonDebugMetadata(); // Move the new node to the Preheader, before its terminator. I.moveBefore(Preheader->getTerminator()); // Do not retain debug locations when we are moving instructions to different // basic blocks, because we want to avoid jumpy line tables. Calls, however, // need to retain their debug locs because they may be inlined. // FIXME: How do we retain source locations without causing poor debugging // behavior? if (!isa(I)) I.setDebugLoc(DebugLoc()); if (isa(I)) ++NumMovedLoads; else if (isa(I)) ++NumMovedCalls; ++NumHoisted; return true; } /// Only sink or hoist an instruction if it is not a trapping instruction, /// or if the instruction is known not to trap when moved to the preheader. /// or if it is a trapping instruction and is guaranteed to execute. static bool isSafeToExecuteUnconditionally(Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE, const Instruction *CtxI) { if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT)) return true; bool GuaranteedToExecute = isGuaranteedToExecute(Inst, DT, CurLoop, SafetyInfo); if (!GuaranteedToExecute) { auto *LI = dyn_cast(&Inst); if (LI && CurLoop->isLoopInvariant(LI->getPointerOperand())) ORE->emit([&]() { return OptimizationRemarkMissed( DEBUG_TYPE, "LoadWithLoopInvariantAddressCondExecuted", LI) << "failed to hoist load with loop-invariant address " "because load is conditionally executed"; }); } return GuaranteedToExecute; } namespace { class LoopPromoter : public LoadAndStorePromoter { Value *SomePtr; // Designated pointer to store to. const SmallSetVector &PointerMustAliases; SmallVectorImpl &LoopExitBlocks; SmallVectorImpl &LoopInsertPts; PredIteratorCache &PredCache; AliasSetTracker &AST; LoopInfo &LI; DebugLoc DL; int Alignment; bool UnorderedAtomic; AAMDNodes AATags; Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const { if (Instruction *I = dyn_cast(V)) if (Loop *L = LI.getLoopFor(I->getParent())) if (!L->contains(BB)) { // We need to create an LCSSA PHI node for the incoming value and // store that. PHINode *PN = PHINode::Create(I->getType(), PredCache.size(BB), I->getName() + ".lcssa", &BB->front()); for (BasicBlock *Pred : PredCache.get(BB)) PN->addIncoming(I, Pred); return PN; } return V; } public: LoopPromoter(Value *SP, ArrayRef Insts, SSAUpdater &S, const SmallSetVector &PMA, SmallVectorImpl &LEB, SmallVectorImpl &LIP, PredIteratorCache &PIC, AliasSetTracker &ast, LoopInfo &li, DebugLoc dl, int alignment, bool UnorderedAtomic, const AAMDNodes &AATags) : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA), LoopExitBlocks(LEB), LoopInsertPts(LIP), PredCache(PIC), AST(ast), LI(li), DL(std::move(dl)), Alignment(alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags) {} bool isInstInList(Instruction *I, const SmallVectorImpl &) const override { Value *Ptr; if (LoadInst *LI = dyn_cast(I)) Ptr = LI->getOperand(0); else Ptr = cast(I)->getPointerOperand(); return PointerMustAliases.count(Ptr); } void doExtraRewritesBeforeFinalDeletion() const override { // Insert stores after in the loop exit blocks. Each exit block gets a // store of the live-out values that feed them. Since we've already told // the SSA updater about the defs in the loop and the preheader // definition, it is all set and we can start using it. for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) { BasicBlock *ExitBlock = LoopExitBlocks[i]; Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock); Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock); Instruction *InsertPos = LoopInsertPts[i]; StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos); if (UnorderedAtomic) NewSI->setOrdering(AtomicOrdering::Unordered); NewSI->setAlignment(Alignment); NewSI->setDebugLoc(DL); if (AATags) NewSI->setAAMetadata(AATags); } } void replaceLoadWithValue(LoadInst *LI, Value *V) const override { // Update alias analysis. AST.copyValue(LI, V); } void instructionDeleted(Instruction *I) const override { AST.deleteValue(I); } }; /// Return true iff we can prove that a caller of this function can not inspect /// the contents of the provided object in a well defined program. bool isKnownNonEscaping(Value *Object, const TargetLibraryInfo *TLI) { if (isa(Object)) // Since the alloca goes out of scope, we know the caller can't retain a // reference to it and be well defined. Thus, we don't need to check for // capture. return true; // For all other objects we need to know that the caller can't possibly // have gotten a reference to the object. There are two components of // that: // 1) Object can't be escaped by this function. This is what // PointerMayBeCaptured checks. // 2) Object can't have been captured at definition site. For this, we // need to know the return value is noalias. At the moment, we use a // weaker condition and handle only AllocLikeFunctions (which are // known to be noalias). TODO return isAllocLikeFn(Object, TLI) && !PointerMayBeCaptured(Object, true, true); } } // namespace /// Try to promote memory values to scalars by sinking stores out of the /// loop and moving loads to before the loop. We do this by looping over /// the stores in the loop, looking for stores to Must pointers which are /// loop invariant. /// bool llvm::promoteLoopAccessesToScalars( const SmallSetVector &PointerMustAliases, SmallVectorImpl &ExitBlocks, SmallVectorImpl &InsertPts, PredIteratorCache &PIC, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, Loop *CurLoop, AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) { // Verify inputs. assert(LI != nullptr && DT != nullptr && CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr && "Unexpected Input to promoteLoopAccessesToScalars"); Value *SomePtr = *PointerMustAliases.begin(); BasicBlock *Preheader = CurLoop->getLoopPreheader(); // It isn't safe to promote a load/store from the loop if the load/store is // conditional. For example, turning: // // for () { if (c) *P += 1; } // // into: // // tmp = *P; for () { if (c) tmp +=1; } *P = tmp; // // is not safe, because *P may only be valid to access if 'c' is true. // // The safety property divides into two parts: // p1) The memory may not be dereferenceable on entry to the loop. In this // case, we can't insert the required load in the preheader. // p2) The memory model does not allow us to insert a store along any dynamic // path which did not originally have one. // // If at least one store is guaranteed to execute, both properties are // satisfied, and promotion is legal. // // This, however, is not a necessary condition. Even if no store/load is // guaranteed to execute, we can still establish these properties. // We can establish (p1) by proving that hoisting the load into the preheader // is safe (i.e. proving dereferenceability on all paths through the loop). We // can use any access within the alias set to prove dereferenceability, // since they're all must alias. // // There are two ways establish (p2): // a) Prove the location is thread-local. In this case the memory model // requirement does not apply, and stores are safe to insert. // b) Prove a store dominates every exit block. In this case, if an exit // blocks is reached, the original dynamic path would have taken us through // the store, so inserting a store into the exit block is safe. Note that this // is different from the store being guaranteed to execute. For instance, // if an exception is thrown on the first iteration of the loop, the original // store is never executed, but the exit blocks are not executed either. bool DereferenceableInPH = false; bool SafeToInsertStore = false; SmallVector LoopUses; // We start with an alignment of one and try to find instructions that allow // us to prove better alignment. unsigned Alignment = 1; // Keep track of which types of access we see bool SawUnorderedAtomic = false; bool SawNotAtomic = false; AAMDNodes AATags; const DataLayout &MDL = Preheader->getModule()->getDataLayout(); bool IsKnownThreadLocalObject = false; if (SafetyInfo->MayThrow) { // If a loop can throw, we have to insert a store along each unwind edge. // That said, we can't actually make the unwind edge explicit. Therefore, // we have to prove that the store is dead along the unwind edge. We do // this by proving that the caller can't have a reference to the object // after return and thus can't possibly load from the object. Value *Object = GetUnderlyingObject(SomePtr, MDL); if (!isKnownNonEscaping(Object, TLI)) return false; // Subtlety: Alloca's aren't visible to callers, but *are* potentially // visible to other threads if captured and used during their lifetimes. IsKnownThreadLocalObject = !isa(Object); } // Check that all of the pointers in the alias set have the same type. We // cannot (yet) promote a memory location that is loaded and stored in // different sizes. While we are at it, collect alignment and AA info. for (Value *ASIV : PointerMustAliases) { // Check that all of the pointers in the alias set have the same type. We // cannot (yet) promote a memory location that is loaded and stored in // different sizes. if (SomePtr->getType() != ASIV->getType()) return false; for (User *U : ASIV->users()) { // Ignore instructions that are outside the loop. Instruction *UI = dyn_cast(U); if (!UI || !CurLoop->contains(UI)) continue; // If there is an non-load/store instruction in the loop, we can't promote // it. if (LoadInst *Load = dyn_cast(UI)) { assert(!Load->isVolatile() && "AST broken"); if (!Load->isUnordered()) return false; SawUnorderedAtomic |= Load->isAtomic(); SawNotAtomic |= !Load->isAtomic(); if (!DereferenceableInPH) DereferenceableInPH = isSafeToExecuteUnconditionally( *Load, DT, CurLoop, SafetyInfo, ORE, Preheader->getTerminator()); } else if (const StoreInst *Store = dyn_cast(UI)) { // Stores *of* the pointer are not interesting, only stores *to* the // pointer. if (UI->getOperand(1) != ASIV) continue; assert(!Store->isVolatile() && "AST broken"); if (!Store->isUnordered()) return false; SawUnorderedAtomic |= Store->isAtomic(); SawNotAtomic |= !Store->isAtomic(); // If the store is guaranteed to execute, both properties are satisfied. // We may want to check if a store is guaranteed to execute even if we // already know that promotion is safe, since it may have higher // alignment than any other guaranteed stores, in which case we can // raise the alignment on the promoted store. unsigned InstAlignment = Store->getAlignment(); if (!InstAlignment) InstAlignment = MDL.getABITypeAlignment(Store->getValueOperand()->getType()); if (!DereferenceableInPH || !SafeToInsertStore || (InstAlignment > Alignment)) { if (isGuaranteedToExecute(*UI, DT, CurLoop, SafetyInfo)) { DereferenceableInPH = true; SafeToInsertStore = true; Alignment = std::max(Alignment, InstAlignment); } } // If a store dominates all exit blocks, it is safe to sink. // As explained above, if an exit block was executed, a dominating // store must have been been executed at least once, so we are not // introducing stores on paths that did not have them. // Note that this only looks at explicit exit blocks. If we ever // start sinking stores into unwind edges (see above), this will break. if (!SafeToInsertStore) SafeToInsertStore = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) { return DT->dominates(Store->getParent(), Exit); }); // If the store is not guaranteed to execute, we may still get // deref info through it. if (!DereferenceableInPH) { DereferenceableInPH = isDereferenceableAndAlignedPointer( Store->getPointerOperand(), Store->getAlignment(), MDL, Preheader->getTerminator(), DT); } } else return false; // Not a load or store. // Merge the AA tags. if (LoopUses.empty()) { // On the first load/store, just take its AA tags. UI->getAAMetadata(AATags); } else if (AATags) { UI->getAAMetadata(AATags, /* Merge = */ true); } LoopUses.push_back(UI); } } // If we found both an unordered atomic instruction and a non-atomic memory // access, bail. We can't blindly promote non-atomic to atomic since we // might not be able to lower the result. We can't downgrade since that // would violate memory model. Also, align 0 is an error for atomics. if (SawUnorderedAtomic && SawNotAtomic) return false; // If we couldn't prove we can hoist the load, bail. if (!DereferenceableInPH) return false; // We know we can hoist the load, but don't have a guaranteed store. // Check whether the location is thread-local. If it is, then we can insert // stores along paths which originally didn't have them without violating the // memory model. if (!SafeToInsertStore) { if (IsKnownThreadLocalObject) SafeToInsertStore = true; else { Value *Object = GetUnderlyingObject(SomePtr, MDL); SafeToInsertStore = (isAllocLikeFn(Object, TLI) || isa(Object)) && !PointerMayBeCaptured(Object, true, true); } } // If we've still failed to prove we can sink the store, give up. if (!SafeToInsertStore) return false; // Otherwise, this is safe to promote, lets do it! DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr << '\n'); ORE->emit([&]() { return OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar", LoopUses[0]) << "Moving accesses to memory location out of the loop"; }); ++NumPromoted; // Grab a debug location for the inserted loads/stores; given that the // inserted loads/stores have little relation to the original loads/stores, // this code just arbitrarily picks a location from one, since any debug // location is better than none. DebugLoc DL = LoopUses[0]->getDebugLoc(); // We use the SSAUpdater interface to insert phi nodes as required. SmallVector NewPHIs; SSAUpdater SSA(&NewPHIs); LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks, InsertPts, PIC, *CurAST, *LI, DL, Alignment, SawUnorderedAtomic, AATags); // Set up the preheader to have a definition of the value. It is the live-out // value from the preheader that uses in the loop will use. LoadInst *PreheaderLoad = new LoadInst( SomePtr, SomePtr->getName() + ".promoted", Preheader->getTerminator()); if (SawUnorderedAtomic) PreheaderLoad->setOrdering(AtomicOrdering::Unordered); PreheaderLoad->setAlignment(Alignment); PreheaderLoad->setDebugLoc(DL); if (AATags) PreheaderLoad->setAAMetadata(AATags); SSA.AddAvailableValue(Preheader, PreheaderLoad); // Rewrite all the loads in the loop and remember all the definitions from // stores in the loop. Promoter.run(LoopUses); // If the SSAUpdater didn't use the load in the preheader, just zap it now. if (PreheaderLoad->use_empty()) PreheaderLoad->eraseFromParent(); return true; } /// Returns an owning pointer to an alias set which incorporates aliasing info /// from L and all subloops of L. /// FIXME: In new pass manager, there is no helper function to handle loop /// analysis such as cloneBasicBlockAnalysis, so the AST needs to be recomputed /// from scratch for every loop. Hook up with the helper functions when /// available in the new pass manager to avoid redundant computation. AliasSetTracker * LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI, AliasAnalysis *AA) { AliasSetTracker *CurAST = nullptr; SmallVector RecomputeLoops; for (Loop *InnerL : L->getSubLoops()) { auto MapI = LoopToAliasSetMap.find(InnerL); // If the AST for this inner loop is missing it may have been merged into // some other loop's AST and then that loop unrolled, and so we need to // recompute it. if (MapI == LoopToAliasSetMap.end()) { RecomputeLoops.push_back(InnerL); continue; } AliasSetTracker *InnerAST = MapI->second; if (CurAST != nullptr) { // What if InnerLoop was modified by other passes ? CurAST->add(*InnerAST); // Once we've incorporated the inner loop's AST into ours, we don't need // the subloop's anymore. delete InnerAST; } else { CurAST = InnerAST; } LoopToAliasSetMap.erase(MapI); } if (CurAST == nullptr) CurAST = new AliasSetTracker(*AA); auto mergeLoop = [&](Loop *L) { // Loop over the body of this loop, looking for calls, invokes, and stores. for (BasicBlock *BB : L->blocks()) CurAST->add(*BB); // Incorporate the specified basic block }; // Add everything from the sub loops that are no longer directly available. for (Loop *InnerL : RecomputeLoops) mergeLoop(InnerL); // And merge in this loop. mergeLoop(L); return CurAST; } /// Simple analysis hook. Clone alias set info. /// void LegacyLICMPass::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) { AliasSetTracker *AST = LICM.getLoopToAliasSetMap().lookup(L); if (!AST) return; AST->copyValue(From, To); } /// Simple Analysis hook. Delete value V from alias set /// void LegacyLICMPass::deleteAnalysisValue(Value *V, Loop *L) { AliasSetTracker *AST = LICM.getLoopToAliasSetMap().lookup(L); if (!AST) return; AST->deleteValue(V); } /// Simple Analysis hook. Delete value L from alias set map. /// void LegacyLICMPass::deleteAnalysisLoop(Loop *L) { AliasSetTracker *AST = LICM.getLoopToAliasSetMap().lookup(L); if (!AST) return; delete AST; LICM.getLoopToAliasSetMap().erase(L); } /// Return true if the body of this loop may store into the memory /// location pointed to by V. /// static bool pointerInvalidatedByLoop(Value *V, uint64_t Size, const AAMDNodes &AAInfo, AliasSetTracker *CurAST) { // Check to see if any of the basic blocks in CurLoop invalidate *V. return CurAST->getAliasSetForPointer(V, Size, AAInfo).isMod(); } /// Little predicate that returns true if the specified basic block is in /// a subloop of the current one, not the current one itself. /// static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) { assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop"); return LI->getLoopFor(BB) != CurLoop; } Index: head/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp =================================================================== --- head/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp (revision 329982) +++ head/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp (revision 329983) @@ -1,1648 +1,1720 @@ //===-- LoopUtils.cpp - Loop Utility functions -------------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file defines common loop utility functions. // //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; using namespace llvm::PatternMatch; #define DEBUG_TYPE "loop-utils" bool RecurrenceDescriptor::areAllUsesIn(Instruction *I, SmallPtrSetImpl &Set) { for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use) if (!Set.count(dyn_cast(*Use))) return false; return true; } bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurrenceKind Kind) { switch (Kind) { default: break; case RK_IntegerAdd: case RK_IntegerMult: case RK_IntegerOr: case RK_IntegerAnd: case RK_IntegerXor: case RK_IntegerMinMax: return true; } return false; } bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurrenceKind Kind) { return (Kind != RK_NoRecurrence) && !isIntegerRecurrenceKind(Kind); } bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurrenceKind Kind) { switch (Kind) { default: break; case RK_IntegerAdd: case RK_IntegerMult: case RK_FloatAdd: case RK_FloatMult: return true; } return false; } -Instruction * -RecurrenceDescriptor::lookThroughAnd(PHINode *Phi, Type *&RT, - SmallPtrSetImpl &Visited, - SmallPtrSetImpl &CI) { +/// Determines if Phi may have been type-promoted. If Phi has a single user +/// that ANDs the Phi with a type mask, return the user. RT is updated to +/// account for the narrower bit width represented by the mask, and the AND +/// instruction is added to CI. +static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT, + SmallPtrSetImpl &Visited, + SmallPtrSetImpl &CI) { if (!Phi->hasOneUse()) return Phi; const APInt *M = nullptr; Instruction *I, *J = cast(Phi->use_begin()->getUser()); // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT // with a new integer type of the corresponding bit width. if (match(J, m_c_And(m_Instruction(I), m_APInt(M)))) { int32_t Bits = (*M + 1).exactLogBase2(); if (Bits > 0) { RT = IntegerType::get(Phi->getContext(), Bits); Visited.insert(Phi); CI.insert(J); return J; } } return Phi; } -bool RecurrenceDescriptor::getSourceExtensionKind( - Instruction *Start, Instruction *Exit, Type *RT, bool &IsSigned, - SmallPtrSetImpl &Visited, - SmallPtrSetImpl &CI) { +/// Compute the minimal bit width needed to represent a reduction whose exit +/// instruction is given by Exit. +static std::pair computeRecurrenceType(Instruction *Exit, + DemandedBits *DB, + AssumptionCache *AC, + DominatorTree *DT) { + bool IsSigned = false; + const DataLayout &DL = Exit->getModule()->getDataLayout(); + uint64_t MaxBitWidth = DL.getTypeSizeInBits(Exit->getType()); + if (DB) { + // Use the demanded bits analysis to determine the bits that are live out + // of the exit instruction, rounding up to the nearest power of two. If the + // use of demanded bits results in a smaller bit width, we know the value + // must be positive (i.e., IsSigned = false), because if this were not the + // case, the sign bit would have been demanded. + auto Mask = DB->getDemandedBits(Exit); + MaxBitWidth = Mask.getBitWidth() - Mask.countLeadingZeros(); + } + + if (MaxBitWidth == DL.getTypeSizeInBits(Exit->getType()) && AC && DT) { + // If demanded bits wasn't able to limit the bit width, we can try to use + // value tracking instead. This can be the case, for example, if the value + // may be negative. + auto NumSignBits = ComputeNumSignBits(Exit, DL, 0, AC, nullptr, DT); + auto NumTypeBits = DL.getTypeSizeInBits(Exit->getType()); + MaxBitWidth = NumTypeBits - NumSignBits; + KnownBits Bits = computeKnownBits(Exit, DL); + if (!Bits.isNonNegative()) { + // If the value is not known to be non-negative, we set IsSigned to true, + // meaning that we will use sext instructions instead of zext + // instructions to restore the original type. + IsSigned = true; + if (!Bits.isNegative()) + // If the value is not known to be negative, we don't known what the + // upper bit is, and therefore, we don't know what kind of extend we + // will need. In this case, just increase the bit width by one bit and + // use sext. + ++MaxBitWidth; + } + } + if (!isPowerOf2_64(MaxBitWidth)) + MaxBitWidth = NextPowerOf2(MaxBitWidth); + + return std::make_pair(Type::getIntNTy(Exit->getContext(), MaxBitWidth), + IsSigned); +} + +/// Collect cast instructions that can be ignored in the vectorizer's cost +/// model, given a reduction exit value and the minimal type in which the +/// reduction can be represented. +static void collectCastsToIgnore(Loop *TheLoop, Instruction *Exit, + Type *RecurrenceType, + SmallPtrSetImpl &Casts) { + SmallVector Worklist; - bool FoundOneOperand = false; - unsigned DstSize = RT->getPrimitiveSizeInBits(); + SmallPtrSet Visited; Worklist.push_back(Exit); - // Traverse the instructions in the reduction expression, beginning with the - // exit value. while (!Worklist.empty()) { - Instruction *I = Worklist.pop_back_val(); - for (Use &U : I->operands()) { - - // Terminate the traversal if the operand is not an instruction, or we - // reach the starting value. - Instruction *J = dyn_cast(U.get()); - if (!J || J == Start) + Instruction *Val = Worklist.pop_back_val(); + Visited.insert(Val); + if (auto *Cast = dyn_cast(Val)) + if (Cast->getSrcTy() == RecurrenceType) { + // If the source type of a cast instruction is equal to the recurrence + // type, it will be eliminated, and should be ignored in the vectorizer + // cost model. + Casts.insert(Cast); continue; - - // Otherwise, investigate the operation if it is also in the expression. - if (Visited.count(J)) { - Worklist.push_back(J); - continue; } - // If the operand is not in Visited, it is not a reduction operation, but - // it does feed into one. Make sure it is either a single-use sign- or - // zero-extend instruction. - CastInst *Cast = dyn_cast(J); - bool IsSExtInst = isa(J); - if (!Cast || !Cast->hasOneUse() || !(isa(J) || IsSExtInst)) - return false; - - // Ensure the source type of the extend is no larger than the reduction - // type. It is not necessary for the types to be identical. - unsigned SrcSize = Cast->getSrcTy()->getPrimitiveSizeInBits(); - if (SrcSize > DstSize) - return false; - - // Furthermore, ensure that all such extends are of the same kind. - if (FoundOneOperand) { - if (IsSigned != IsSExtInst) - return false; - } else { - FoundOneOperand = true; - IsSigned = IsSExtInst; - } - - // Lastly, if the source type of the extend matches the reduction type, - // add the extend to CI so that we can avoid accounting for it in the - // cost model. - if (SrcSize == DstSize) - CI.insert(Cast); - } + // Add all operands to the work list if they are loop-varying values that + // we haven't yet visited. + for (Value *O : cast(Val)->operands()) + if (auto *I = dyn_cast(O)) + if (TheLoop->contains(I) && !Visited.count(I)) + Worklist.push_back(I); } - return true; } bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, Loop *TheLoop, bool HasFunNoNaNAttr, - RecurrenceDescriptor &RedDes) { + RecurrenceDescriptor &RedDes, + DemandedBits *DB, + AssumptionCache *AC, + DominatorTree *DT) { if (Phi->getNumIncomingValues() != 2) return false; // Reduction variables are only found in the loop header block. if (Phi->getParent() != TheLoop->getHeader()) return false; // Obtain the reduction start value from the value that comes from the loop // preheader. Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader()); // ExitInstruction is the single value which is used outside the loop. // We only allow for a single reduction value to be used outside the loop. // This includes users of the reduction, variables (which form a cycle // which ends in the phi node). Instruction *ExitInstruction = nullptr; // Indicates that we found a reduction operation in our scan. bool FoundReduxOp = false; // We start with the PHI node and scan for all of the users of this // instruction. All users must be instructions that can be used as reduction // variables (such as ADD). We must have a single out-of-block user. The cycle // must include the original PHI. bool FoundStartPHI = false; // To recognize min/max patterns formed by a icmp select sequence, we store // the number of instruction we saw from the recognized min/max pattern, // to make sure we only see exactly the two instructions. unsigned NumCmpSelectPatternInst = 0; InstDesc ReduxDesc(false, nullptr); // Data used for determining if the recurrence has been type-promoted. Type *RecurrenceType = Phi->getType(); SmallPtrSet CastInsts; Instruction *Start = Phi; bool IsSigned = false; SmallPtrSet VisitedInsts; SmallVector Worklist; // Return early if the recurrence kind does not match the type of Phi. If the // recurrence kind is arithmetic, we attempt to look through AND operations // resulting from the type promotion performed by InstCombine. Vector // operations are not limited to the legal integer widths, so we may be able // to evaluate the reduction in the narrower width. if (RecurrenceType->isFloatingPointTy()) { if (!isFloatingPointRecurrenceKind(Kind)) return false; } else { if (!isIntegerRecurrenceKind(Kind)) return false; if (isArithmeticRecurrenceKind(Kind)) Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts); } Worklist.push_back(Start); VisitedInsts.insert(Start); // A value in the reduction can be used: // - By the reduction: // - Reduction operation: // - One use of reduction value (safe). // - Multiple use of reduction value (not safe). // - PHI: // - All uses of the PHI must be the reduction (safe). // - Otherwise, not safe. // - By instructions outside of the loop (safe). // * One value may have several outside users, but all outside // uses must be of the same value. // - By an instruction that is not part of the reduction (not safe). // This is either: // * An instruction type other than PHI or the reduction operation. // * A PHI in the header other than the initial PHI. while (!Worklist.empty()) { Instruction *Cur = Worklist.back(); Worklist.pop_back(); // No Users. // If the instruction has no users then this is a broken chain and can't be // a reduction variable. if (Cur->use_empty()) return false; bool IsAPhi = isa(Cur); // A header PHI use other than the original PHI. if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent()) return false; // Reductions of instructions such as Div, and Sub is only possible if the // LHS is the reduction variable. if (!Cur->isCommutative() && !IsAPhi && !isa(Cur) && !isa(Cur) && !isa(Cur) && !VisitedInsts.count(dyn_cast(Cur->getOperand(0)))) return false; // Any reduction instruction must be of one of the allowed kinds. We ignore // the starting value (the Phi or an AND instruction if the Phi has been // type-promoted). if (Cur != Start) { ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr); if (!ReduxDesc.isRecurrence()) return false; } // A reduction operation must only have one use of the reduction value. if (!IsAPhi && Kind != RK_IntegerMinMax && Kind != RK_FloatMinMax && hasMultipleUsesOf(Cur, VisitedInsts)) return false; // All inputs to a PHI node must be a reduction value. if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts)) return false; if (Kind == RK_IntegerMinMax && (isa(Cur) || isa(Cur))) ++NumCmpSelectPatternInst; if (Kind == RK_FloatMinMax && (isa(Cur) || isa(Cur))) ++NumCmpSelectPatternInst; // Check whether we found a reduction operator. FoundReduxOp |= !IsAPhi && Cur != Start; // Process users of current instruction. Push non-PHI nodes after PHI nodes // onto the stack. This way we are going to have seen all inputs to PHI // nodes once we get to them. SmallVector NonPHIs; SmallVector PHIs; for (User *U : Cur->users()) { Instruction *UI = cast(U); // Check if we found the exit user. BasicBlock *Parent = UI->getParent(); if (!TheLoop->contains(Parent)) { // If we already know this instruction is used externally, move on to // the next user. if (ExitInstruction == Cur) continue; // Exit if you find multiple values used outside or if the header phi // node is being used. In this case the user uses the value of the // previous iteration, in which case we would loose "VF-1" iterations of // the reduction operation if we vectorize. if (ExitInstruction != nullptr || Cur == Phi) return false; // The instruction used by an outside user must be the last instruction // before we feed back to the reduction phi. Otherwise, we loose VF-1 // operations on the value. if (!is_contained(Phi->operands(), Cur)) return false; ExitInstruction = Cur; continue; } // Process instructions only once (termination). Each reduction cycle // value must only be used once, except by phi nodes and min/max // reductions which are represented as a cmp followed by a select. InstDesc IgnoredVal(false, nullptr); if (VisitedInsts.insert(UI).second) { if (isa(UI)) PHIs.push_back(UI); else NonPHIs.push_back(UI); } else if (!isa(UI) && ((!isa(UI) && !isa(UI) && !isa(UI)) || !isMinMaxSelectCmpPattern(UI, IgnoredVal).isRecurrence())) return false; // Remember that we completed the cycle. if (UI == Phi) FoundStartPHI = true; } Worklist.append(PHIs.begin(), PHIs.end()); Worklist.append(NonPHIs.begin(), NonPHIs.end()); } // This means we have seen one but not the other instruction of the // pattern or more than just a select and cmp. if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) && NumCmpSelectPatternInst != 2) return false; if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction) return false; - // If we think Phi may have been type-promoted, we also need to ensure that - // all source operands of the reduction are either SExtInsts or ZEstInsts. If - // so, we will be able to evaluate the reduction in the narrower bit width. - if (Start != Phi) - if (!getSourceExtensionKind(Start, ExitInstruction, RecurrenceType, - IsSigned, VisitedInsts, CastInsts)) + if (Start != Phi) { + // If the starting value is not the same as the phi node, we speculatively + // looked through an 'and' instruction when evaluating a potential + // arithmetic reduction to determine if it may have been type-promoted. + // + // We now compute the minimal bit width that is required to represent the + // reduction. If this is the same width that was indicated by the 'and', we + // can represent the reduction in the smaller type. The 'and' instruction + // will be eliminated since it will essentially be a cast instruction that + // can be ignore in the cost model. If we compute a different type than we + // did when evaluating the 'and', the 'and' will not be eliminated, and we + // will end up with different kinds of operations in the recurrence + // expression (e.g., RK_IntegerAND, RK_IntegerADD). We give up if this is + // the case. + // + // The vectorizer relies on InstCombine to perform the actual + // type-shrinking. It does this by inserting instructions to truncate the + // exit value of the reduction to the width indicated by RecurrenceType and + // then extend this value back to the original width. If IsSigned is false, + // a 'zext' instruction will be generated; otherwise, a 'sext' will be + // used. + // + // TODO: We should not rely on InstCombine to rewrite the reduction in the + // smaller type. We should just generate a correctly typed expression + // to begin with. + Type *ComputedType; + std::tie(ComputedType, IsSigned) = + computeRecurrenceType(ExitInstruction, DB, AC, DT); + if (ComputedType != RecurrenceType) return false; + // The recurrence expression will be represented in a narrower type. If + // there are any cast instructions that will be unnecessary, collect them + // in CastInsts. Note that the 'and' instruction was already included in + // this list. + // + // TODO: A better way to represent this may be to tag in some way all the + // instructions that are a part of the reduction. The vectorizer cost + // model could then apply the recurrence type to these instructions, + // without needing a white list of instructions to ignore. + collectCastsToIgnore(TheLoop, ExitInstruction, RecurrenceType, CastInsts); + } + // We found a reduction var if we have reached the original phi node and we // only have a single instruction with out-of-loop users. // The ExitInstruction(Instruction which is allowed to have out-of-loop users) // is saved as part of the RecurrenceDescriptor. // Save the description of this reduction variable. RecurrenceDescriptor RD( RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(), ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts); RedDes = RD; return true; } /// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction /// pattern corresponding to a min(X, Y) or max(X, Y). RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev) { assert((isa(I) || isa(I) || isa(I)) && "Expect a select instruction"); Instruction *Cmp = nullptr; SelectInst *Select = nullptr; // We must handle the select(cmp()) as a single instruction. Advance to the // select. if ((Cmp = dyn_cast(I)) || (Cmp = dyn_cast(I))) { if (!Cmp->hasOneUse() || !(Select = dyn_cast(*I->user_begin()))) return InstDesc(false, I); return InstDesc(Select, Prev.getMinMaxKind()); } // Only handle single use cases for now. if (!(Select = dyn_cast(I))) return InstDesc(false, I); if (!(Cmp = dyn_cast(I->getOperand(0))) && !(Cmp = dyn_cast(I->getOperand(0)))) return InstDesc(false, I); if (!Cmp->hasOneUse()) return InstDesc(false, I); Value *CmpLeft; Value *CmpRight; // Look for a min/max pattern. if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) return InstDesc(Select, MRK_UIntMin); else if (m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) return InstDesc(Select, MRK_UIntMax); else if (m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) return InstDesc(Select, MRK_SIntMax); else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) return InstDesc(Select, MRK_SIntMin); else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) return InstDesc(Select, MRK_FloatMin); else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) return InstDesc(Select, MRK_FloatMax); else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) return InstDesc(Select, MRK_FloatMin); else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) return InstDesc(Select, MRK_FloatMax); return InstDesc(false, I); } RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind, InstDesc &Prev, bool HasFunNoNaNAttr) { bool FP = I->getType()->isFloatingPointTy(); Instruction *UAI = Prev.getUnsafeAlgebraInst(); if (!UAI && FP && !I->isFast()) UAI = I; // Found an unsafe (unvectorizable) algebra instruction. switch (I->getOpcode()) { default: return InstDesc(false, I); case Instruction::PHI: return InstDesc(I, Prev.getMinMaxKind(), Prev.getUnsafeAlgebraInst()); case Instruction::Sub: case Instruction::Add: return InstDesc(Kind == RK_IntegerAdd, I); case Instruction::Mul: return InstDesc(Kind == RK_IntegerMult, I); case Instruction::And: return InstDesc(Kind == RK_IntegerAnd, I); case Instruction::Or: return InstDesc(Kind == RK_IntegerOr, I); case Instruction::Xor: return InstDesc(Kind == RK_IntegerXor, I); case Instruction::FMul: return InstDesc(Kind == RK_FloatMult, I, UAI); case Instruction::FSub: case Instruction::FAdd: return InstDesc(Kind == RK_FloatAdd, I, UAI); case Instruction::FCmp: case Instruction::ICmp: case Instruction::Select: if (Kind != RK_IntegerMinMax && (!HasFunNoNaNAttr || Kind != RK_FloatMinMax)) return InstDesc(false, I); return isMinMaxSelectCmpPattern(I, Prev); } } bool RecurrenceDescriptor::hasMultipleUsesOf( Instruction *I, SmallPtrSetImpl &Insts) { unsigned NumUses = 0; for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use) { if (Insts.count(dyn_cast(*Use))) ++NumUses; if (NumUses > 1) return true; } return false; } bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop, - RecurrenceDescriptor &RedDes) { + RecurrenceDescriptor &RedDes, + DemandedBits *DB, AssumptionCache *AC, + DominatorTree *DT) { BasicBlock *Header = TheLoop->getHeader(); Function &F = *Header->getParent(); bool HasFunNoNaNAttr = F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; - if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes)) { + if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB, + AC, DT)) { DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n"); return true; } - if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes)) { + if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes, DB, + AC, DT)) { DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n"); return true; } - if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes)) { + if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes, DB, + AC, DT)) { DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n"); return true; } - if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes)) { + if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes, DB, + AC, DT)) { DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n"); return true; } - if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes)) { + if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes, DB, + AC, DT)) { DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n"); return true; } - if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr, - RedDes)) { + if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr, RedDes, + DB, AC, DT)) { DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n"); return true; } - if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes)) { + if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes, DB, + AC, DT)) { DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n"); return true; } - if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes)) { + if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB, + AC, DT)) { DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n"); return true; } - if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes)) { + if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes, DB, + AC, DT)) { DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi << "\n"); return true; } // Not a reduction of known type. return false; } bool RecurrenceDescriptor::isFirstOrderRecurrence( PHINode *Phi, Loop *TheLoop, DenseMap &SinkAfter, DominatorTree *DT) { // Ensure the phi node is in the loop header and has two incoming values. if (Phi->getParent() != TheLoop->getHeader() || Phi->getNumIncomingValues() != 2) return false; // Ensure the loop has a preheader and a single latch block. The loop // vectorizer will need the latch to set up the next iteration of the loop. auto *Preheader = TheLoop->getLoopPreheader(); auto *Latch = TheLoop->getLoopLatch(); if (!Preheader || !Latch) return false; // Ensure the phi node's incoming blocks are the loop preheader and latch. if (Phi->getBasicBlockIndex(Preheader) < 0 || Phi->getBasicBlockIndex(Latch) < 0) return false; // Get the previous value. The previous value comes from the latch edge while // the initial value comes form the preheader edge. auto *Previous = dyn_cast(Phi->getIncomingValueForBlock(Latch)); if (!Previous || !TheLoop->contains(Previous) || isa(Previous) || SinkAfter.count(Previous)) // Cannot rely on dominance due to motion. return false; // Ensure every user of the phi node is dominated by the previous value. // The dominance requirement ensures the loop vectorizer will not need to // vectorize the initial value prior to the first iteration of the loop. // TODO: Consider extending this sinking to handle other kinds of instructions // and expressions, beyond sinking a single cast past Previous. if (Phi->hasOneUse()) { auto *I = Phi->user_back(); if (I->isCast() && (I->getParent() == Phi->getParent()) && I->hasOneUse() && DT->dominates(Previous, I->user_back())) { if (!DT->dominates(Previous, I)) // Otherwise we're good w/o sinking. SinkAfter[I] = Previous; return true; } } for (User *U : Phi->users()) if (auto *I = dyn_cast(U)) { if (!DT->dominates(Previous, I)) return false; } return true; } /// This function returns the identity element (or neutral element) for /// the operation K. Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurrenceKind K, Type *Tp) { switch (K) { case RK_IntegerXor: case RK_IntegerAdd: case RK_IntegerOr: // Adding, Xoring, Oring zero to a number does not change it. return ConstantInt::get(Tp, 0); case RK_IntegerMult: // Multiplying a number by 1 does not change it. return ConstantInt::get(Tp, 1); case RK_IntegerAnd: // AND-ing a number with an all-1 value does not change it. return ConstantInt::get(Tp, -1, true); case RK_FloatMult: // Multiplying a number by 1 does not change it. return ConstantFP::get(Tp, 1.0L); case RK_FloatAdd: // Adding zero to a number does not change it. return ConstantFP::get(Tp, 0.0L); default: llvm_unreachable("Unknown recurrence kind"); } } /// This function translates the recurrence kind to an LLVM binary operator. unsigned RecurrenceDescriptor::getRecurrenceBinOp(RecurrenceKind Kind) { switch (Kind) { case RK_IntegerAdd: return Instruction::Add; case RK_IntegerMult: return Instruction::Mul; case RK_IntegerOr: return Instruction::Or; case RK_IntegerAnd: return Instruction::And; case RK_IntegerXor: return Instruction::Xor; case RK_FloatMult: return Instruction::FMul; case RK_FloatAdd: return Instruction::FAdd; case RK_IntegerMinMax: return Instruction::ICmp; case RK_FloatMinMax: return Instruction::FCmp; default: llvm_unreachable("Unknown recurrence operation"); } } Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder, MinMaxRecurrenceKind RK, Value *Left, Value *Right) { CmpInst::Predicate P = CmpInst::ICMP_NE; switch (RK) { default: llvm_unreachable("Unknown min/max recurrence kind"); case MRK_UIntMin: P = CmpInst::ICMP_ULT; break; case MRK_UIntMax: P = CmpInst::ICMP_UGT; break; case MRK_SIntMin: P = CmpInst::ICMP_SLT; break; case MRK_SIntMax: P = CmpInst::ICMP_SGT; break; case MRK_FloatMin: P = CmpInst::FCMP_OLT; break; case MRK_FloatMax: P = CmpInst::FCMP_OGT; break; } // We only match FP sequences that are 'fast', so we can unconditionally // set it on any generated instructions. IRBuilder<>::FastMathFlagGuard FMFG(Builder); FastMathFlags FMF; FMF.setFast(); Builder.setFastMathFlags(FMF); Value *Cmp; if (RK == MRK_FloatMin || RK == MRK_FloatMax) Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp"); else Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp"); Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select"); return Select; } InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K, const SCEV *Step, BinaryOperator *BOp, SmallVectorImpl *Casts) : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) { assert(IK != IK_NoInduction && "Not an induction"); // Start value type should match the induction kind and the value // itself should not be null. assert(StartValue && "StartValue is null"); assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) && "StartValue is not a pointer for pointer induction"); assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) && "StartValue is not an integer for integer induction"); // Check the Step Value. It should be non-zero integer value. assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) && "Step value is zero"); assert((IK != IK_PtrInduction || getConstIntStepValue()) && "Step value should be constant for pointer induction"); assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) && "StepValue is not an integer"); assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) && "StepValue is not FP for FpInduction"); assert((IK != IK_FpInduction || (InductionBinOp && (InductionBinOp->getOpcode() == Instruction::FAdd || InductionBinOp->getOpcode() == Instruction::FSub))) && "Binary opcode should be specified for FP induction"); if (Casts) { for (auto &Inst : *Casts) { RedundantCasts.push_back(Inst); } } } int InductionDescriptor::getConsecutiveDirection() const { ConstantInt *ConstStep = getConstIntStepValue(); if (ConstStep && (ConstStep->isOne() || ConstStep->isMinusOne())) return ConstStep->getSExtValue(); return 0; } ConstantInt *InductionDescriptor::getConstIntStepValue() const { if (isa(Step)) return dyn_cast(cast(Step)->getValue()); return nullptr; } Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index, ScalarEvolution *SE, const DataLayout& DL) const { SCEVExpander Exp(*SE, DL, "induction"); assert(Index->getType() == Step->getType() && "Index type does not match StepValue type"); switch (IK) { case IK_IntInduction: { assert(Index->getType() == StartValue->getType() && "Index type does not match StartValue type"); // FIXME: Theoretically, we can call getAddExpr() of ScalarEvolution // and calculate (Start + Index * Step) for all cases, without // special handling for "isOne" and "isMinusOne". // But in the real life the result code getting worse. We mix SCEV // expressions and ADD/SUB operations and receive redundant // intermediate values being calculated in different ways and // Instcombine is unable to reduce them all. if (getConstIntStepValue() && getConstIntStepValue()->isMinusOne()) return B.CreateSub(StartValue, Index); if (getConstIntStepValue() && getConstIntStepValue()->isOne()) return B.CreateAdd(StartValue, Index); const SCEV *S = SE->getAddExpr(SE->getSCEV(StartValue), SE->getMulExpr(Step, SE->getSCEV(Index))); return Exp.expandCodeFor(S, StartValue->getType(), &*B.GetInsertPoint()); } case IK_PtrInduction: { assert(isa(Step) && "Expected constant step for pointer induction"); const SCEV *S = SE->getMulExpr(SE->getSCEV(Index), Step); Index = Exp.expandCodeFor(S, Index->getType(), &*B.GetInsertPoint()); return B.CreateGEP(nullptr, StartValue, Index); } case IK_FpInduction: { assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value"); assert(InductionBinOp && (InductionBinOp->getOpcode() == Instruction::FAdd || InductionBinOp->getOpcode() == Instruction::FSub) && "Original bin op should be defined for FP induction"); Value *StepValue = cast(Step)->getValue(); // Floating point operations had to be 'fast' to enable the induction. FastMathFlags Flags; Flags.setFast(); Value *MulExp = B.CreateFMul(StepValue, Index); if (isa(MulExp)) // We have to check, the MulExp may be a constant. cast(MulExp)->setFastMathFlags(Flags); Value *BOp = B.CreateBinOp(InductionBinOp->getOpcode() , StartValue, MulExp, "induction"); if (isa(BOp)) cast(BOp)->setFastMathFlags(Flags); return BOp; } case IK_NoInduction: return nullptr; } llvm_unreachable("invalid enum"); } bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE, InductionDescriptor &D) { // Here we only handle FP induction variables. assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type"); if (TheLoop->getHeader() != Phi->getParent()) return false; // The loop may have multiple entrances or multiple exits; we can analyze // this phi if it has a unique entry value and a unique backedge value. if (Phi->getNumIncomingValues() != 2) return false; Value *BEValue = nullptr, *StartValue = nullptr; if (TheLoop->contains(Phi->getIncomingBlock(0))) { BEValue = Phi->getIncomingValue(0); StartValue = Phi->getIncomingValue(1); } else { assert(TheLoop->contains(Phi->getIncomingBlock(1)) && "Unexpected Phi node in the loop"); BEValue = Phi->getIncomingValue(1); StartValue = Phi->getIncomingValue(0); } BinaryOperator *BOp = dyn_cast(BEValue); if (!BOp) return false; Value *Addend = nullptr; if (BOp->getOpcode() == Instruction::FAdd) { if (BOp->getOperand(0) == Phi) Addend = BOp->getOperand(1); else if (BOp->getOperand(1) == Phi) Addend = BOp->getOperand(0); } else if (BOp->getOpcode() == Instruction::FSub) if (BOp->getOperand(0) == Phi) Addend = BOp->getOperand(1); if (!Addend) return false; // The addend should be loop invariant if (auto *I = dyn_cast(Addend)) if (TheLoop->contains(I)) return false; // FP Step has unknown SCEV const SCEV *Step = SE->getUnknown(Addend); D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp); return true; } /// This function is called when we suspect that the update-chain of a phi node /// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts, /// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime /// predicate P under which the SCEV expression for the phi can be the /// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the /// cast instructions that are involved in the update-chain of this induction. /// A caller that adds the required runtime predicate can be free to drop these /// cast instructions, and compute the phi using \p AR (instead of some scev /// expression with casts). /// /// For example, without a predicate the scev expression can take the following /// form: /// (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy) /// /// It corresponds to the following IR sequence: /// %for.body: /// %x = phi i64 [ 0, %ph ], [ %add, %for.body ] /// %casted_phi = "ExtTrunc i64 %x" /// %add = add i64 %casted_phi, %step /// /// where %x is given in \p PN, /// PSE.getSCEV(%x) is equal to PSE.getSCEV(%casted_phi) under a predicate, /// and the IR sequence that "ExtTrunc i64 %x" represents can take one of /// several forms, for example, such as: /// ExtTrunc1: %casted_phi = and %x, 2^n-1 /// or: /// ExtTrunc2: %t = shl %x, m /// %casted_phi = ashr %t, m /// /// If we are able to find such sequence, we return the instructions /// we found, namely %casted_phi and the instructions on its use-def chain up /// to the phi (not including the phi). static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE, const SCEVUnknown *PhiScev, const SCEVAddRecExpr *AR, SmallVectorImpl &CastInsts) { assert(CastInsts.empty() && "CastInsts is expected to be empty."); auto *PN = cast(PhiScev->getValue()); assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression"); const Loop *L = AR->getLoop(); // Find any cast instructions that participate in the def-use chain of // PhiScev in the loop. // FORNOW/TODO: We currently expect the def-use chain to include only // two-operand instructions, where one of the operands is an invariant. // createAddRecFromPHIWithCasts() currently does not support anything more // involved than that, so we keep the search simple. This can be // extended/generalized as needed. auto getDef = [&](const Value *Val) -> Value * { const BinaryOperator *BinOp = dyn_cast(Val); if (!BinOp) return nullptr; Value *Op0 = BinOp->getOperand(0); Value *Op1 = BinOp->getOperand(1); Value *Def = nullptr; if (L->isLoopInvariant(Op0)) Def = Op1; else if (L->isLoopInvariant(Op1)) Def = Op0; return Def; }; // Look for the instruction that defines the induction via the // loop backedge. BasicBlock *Latch = L->getLoopLatch(); if (!Latch) return false; Value *Val = PN->getIncomingValueForBlock(Latch); if (!Val) return false; // Follow the def-use chain until the induction phi is reached. // If on the way we encounter a Value that has the same SCEV Expr as the // phi node, we can consider the instructions we visit from that point // as part of the cast-sequence that can be ignored. bool InCastSequence = false; auto *Inst = dyn_cast(Val); while (Val != PN) { // If we encountered a phi node other than PN, or if we left the loop, // we bail out. if (!Inst || !L->contains(Inst)) { return false; } auto *AddRec = dyn_cast(PSE.getSCEV(Val)); if (AddRec && PSE.areAddRecsEqualWithPreds(AddRec, AR)) InCastSequence = true; if (InCastSequence) { // Only the last instruction in the cast sequence is expected to have // uses outside the induction def-use chain. if (!CastInsts.empty()) if (!Inst->hasOneUse()) return false; CastInsts.push_back(Inst); } Val = getDef(Val); if (!Val) return false; Inst = dyn_cast(Val); } return InCastSequence; } bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, PredicatedScalarEvolution &PSE, InductionDescriptor &D, bool Assume) { Type *PhiTy = Phi->getType(); // Handle integer and pointer inductions variables. // Now we handle also FP induction but not trying to make a // recurrent expression from the PHI node in-place. if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() && !PhiTy->isFloatTy() && !PhiTy->isDoubleTy() && !PhiTy->isHalfTy()) return false; if (PhiTy->isFloatingPointTy()) return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D); const SCEV *PhiScev = PSE.getSCEV(Phi); const auto *AR = dyn_cast(PhiScev); // We need this expression to be an AddRecExpr. if (Assume && !AR) AR = PSE.getAsAddRec(Phi); if (!AR) { DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); return false; } // Record any Cast instructions that participate in the induction update const auto *SymbolicPhi = dyn_cast(PhiScev); // If we started from an UnknownSCEV, and managed to build an addRecurrence // only after enabling Assume with PSCEV, this means we may have encountered // cast instructions that required adding a runtime check in order to // guarantee the correctness of the AddRecurence respresentation of the // induction. if (PhiScev != AR && SymbolicPhi) { SmallVector Casts; if (getCastsForInductionPHI(PSE, SymbolicPhi, AR, Casts)) return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR, &Casts); } return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR); } bool InductionDescriptor::isInductionPHI( PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr, SmallVectorImpl *CastsToIgnore) { Type *PhiTy = Phi->getType(); // We only handle integer and pointer inductions variables. if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) return false; // Check that the PHI is consecutive. const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi); const SCEVAddRecExpr *AR = dyn_cast(PhiScev); if (!AR) { DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); return false; } if (AR->getLoop() != TheLoop) { // FIXME: We should treat this as a uniform. Unfortunately, we // don't currently know how to handled uniform PHIs. DEBUG(dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n"); return false; } Value *StartValue = Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader()); const SCEV *Step = AR->getStepRecurrence(*SE); // Calculate the pointer stride and check if it is consecutive. // The stride may be a constant or a loop invariant integer value. const SCEVConstant *ConstStep = dyn_cast(Step); if (!ConstStep && !SE->isLoopInvariant(Step, TheLoop)) return false; if (PhiTy->isIntegerTy()) { D = InductionDescriptor(StartValue, IK_IntInduction, Step, /*BOp=*/ nullptr, CastsToIgnore); return true; } assert(PhiTy->isPointerTy() && "The PHI must be a pointer"); // Pointer induction should be a constant. if (!ConstStep) return false; ConstantInt *CV = ConstStep->getValue(); Type *PointerElementType = PhiTy->getPointerElementType(); // The pointer stride cannot be determined if the pointer element type is not // sized. if (!PointerElementType->isSized()) return false; const DataLayout &DL = Phi->getModule()->getDataLayout(); int64_t Size = static_cast(DL.getTypeAllocSize(PointerElementType)); if (!Size) return false; int64_t CVSize = CV->getSExtValue(); if (CVSize % Size) return false; auto *StepValue = SE->getConstant(CV->getType(), CVSize / Size, true /* signed */); D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue); return true; } bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA) { bool Changed = false; // We re-use a vector for the in-loop predecesosrs. SmallVector InLoopPredecessors; auto RewriteExit = [&](BasicBlock *BB) { assert(InLoopPredecessors.empty() && "Must start with an empty predecessors list!"); auto Cleanup = make_scope_exit([&] { InLoopPredecessors.clear(); }); // See if there are any non-loop predecessors of this exit block and // keep track of the in-loop predecessors. bool IsDedicatedExit = true; for (auto *PredBB : predecessors(BB)) if (L->contains(PredBB)) { if (isa(PredBB->getTerminator())) // We cannot rewrite exiting edges from an indirectbr. return false; InLoopPredecessors.push_back(PredBB); } else { IsDedicatedExit = false; } assert(!InLoopPredecessors.empty() && "Must have *some* loop predecessor!"); // Nothing to do if this is already a dedicated exit. if (IsDedicatedExit) return false; auto *NewExitBB = SplitBlockPredecessors( BB, InLoopPredecessors, ".loopexit", DT, LI, PreserveLCSSA); if (!NewExitBB) DEBUG(dbgs() << "WARNING: Can't create a dedicated exit block for loop: " << *L << "\n"); else DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block " << NewExitBB->getName() << "\n"); return true; }; // Walk the exit blocks directly rather than building up a data structure for // them, but only visit each one once. SmallPtrSet Visited; for (auto *BB : L->blocks()) for (auto *SuccBB : successors(BB)) { // We're looking for exit blocks so skip in-loop successors. if (L->contains(SuccBB)) continue; // Visit each exit block exactly once. if (!Visited.insert(SuccBB).second) continue; Changed |= RewriteExit(SuccBB); } return Changed; } /// \brief Returns the instructions that use values defined in the loop. SmallVector llvm::findDefsUsedOutsideOfLoop(Loop *L) { SmallVector UsedOutside; for (auto *Block : L->getBlocks()) // FIXME: I believe that this could use copy_if if the Inst reference could // be adapted into a pointer. for (auto &Inst : *Block) { auto Users = Inst.users(); if (any_of(Users, [&](User *U) { auto *Use = cast(U); return !L->contains(Use->getParent()); })) UsedOutside.push_back(&Inst); } return UsedOutside; } void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) { // By definition, all loop passes need the LoopInfo analysis and the // Dominator tree it depends on. Because they all participate in the loop // pass manager, they must also preserve these. AU.addRequired(); AU.addPreserved(); AU.addRequired(); AU.addPreserved(); // We must also preserve LoopSimplify and LCSSA. We locally access their IDs // here because users shouldn't directly get them from this header. extern char &LoopSimplifyID; extern char &LCSSAID; AU.addRequiredID(LoopSimplifyID); AU.addPreservedID(LoopSimplifyID); AU.addRequiredID(LCSSAID); AU.addPreservedID(LCSSAID); // This is used in the LPPassManager to perform LCSSA verification on passes // which preserve lcssa form AU.addRequired(); AU.addPreserved(); // Loop passes are designed to run inside of a loop pass manager which means // that any function analyses they require must be required by the first loop // pass in the manager (so that it is computed before the loop pass manager // runs) and preserved by all loop pasess in the manager. To make this // reasonably robust, the set needed for most loop passes is maintained here. // If your loop pass requires an analysis not listed here, you will need to // carefully audit the loop pass manager nesting structure that results. AU.addRequired(); AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); AU.addRequired(); AU.addPreserved(); } /// Manually defined generic "LoopPass" dependency initialization. This is used /// to initialize the exact set of passes from above in \c /// getLoopAnalysisUsage. It can be used within a loop pass's initialization /// with: /// /// INITIALIZE_PASS_DEPENDENCY(LoopPass) /// /// As-if "LoopPass" were a pass. void llvm::initializeLoopPassPass(PassRegistry &Registry) { INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass) INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) } /// \brief Find string metadata for loop /// /// If it has a value (e.g. {"llvm.distribute", 1} return the value as an /// operand or null otherwise. If the string metadata is not found return /// Optional's not-a-value. Optional llvm::findStringMetadataForLoop(Loop *TheLoop, StringRef Name) { MDNode *LoopID = TheLoop->getLoopID(); // Return none if LoopID is false. if (!LoopID) return None; // First operand should refer to the loop id itself. assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); // Iterate over LoopID operands and look for MDString Metadata for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) { MDNode *MD = dyn_cast(LoopID->getOperand(i)); if (!MD) continue; MDString *S = dyn_cast(MD->getOperand(0)); if (!S) continue; // Return true if MDString holds expected MetaData. if (Name.equals(S->getString())) switch (MD->getNumOperands()) { case 1: return nullptr; case 2: return &MD->getOperand(1); default: llvm_unreachable("loop metadata has 0 or 1 operand"); } } return None; } /// Does a BFS from a given node to all of its children inside a given loop. /// The returned vector of nodes includes the starting point. SmallVector llvm::collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop) { SmallVector Worklist; auto AddRegionToWorklist = [&](DomTreeNode *DTN) { // Only include subregions in the top level loop. BasicBlock *BB = DTN->getBlock(); if (CurLoop->contains(BB)) Worklist.push_back(DTN); }; AddRegionToWorklist(N); for (size_t I = 0; I < Worklist.size(); I++) for (DomTreeNode *Child : Worklist[I]->getChildren()) AddRegionToWorklist(Child); return Worklist; } void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr, ScalarEvolution *SE = nullptr, LoopInfo *LI = nullptr) { assert((!DT || L->isLCSSAForm(*DT)) && "Expected LCSSA!"); auto *Preheader = L->getLoopPreheader(); assert(Preheader && "Preheader should exist!"); // Now that we know the removal is safe, remove the loop by changing the // branch from the preheader to go to the single exit block. // // Because we're deleting a large chunk of code at once, the sequence in which // we remove things is very important to avoid invalidation issues. // Tell ScalarEvolution that the loop is deleted. Do this before // deleting the loop so that ScalarEvolution can look at the loop // to determine what it needs to clean up. if (SE) SE->forgetLoop(L); auto *ExitBlock = L->getUniqueExitBlock(); assert(ExitBlock && "Should have a unique exit block!"); assert(L->hasDedicatedExits() && "Loop should have dedicated exits!"); auto *OldBr = dyn_cast(Preheader->getTerminator()); assert(OldBr && "Preheader must end with a branch"); assert(OldBr->isUnconditional() && "Preheader must have a single successor"); // Connect the preheader to the exit block. Keep the old edge to the header // around to perform the dominator tree update in two separate steps // -- #1 insertion of the edge preheader -> exit and #2 deletion of the edge // preheader -> header. // // // 0. Preheader 1. Preheader 2. Preheader // | | | | // V | V | // Header <--\ | Header <--\ | Header <--\ // | | | | | | | | | | | // | V | | | V | | | V | // | Body --/ | | Body --/ | | Body --/ // V V V V V // Exit Exit Exit // // By doing this is two separate steps we can perform the dominator tree // update without using the batch update API. // // Even when the loop is never executed, we cannot remove the edge from the // source block to the exit block. Consider the case where the unexecuted loop // branches back to an outer loop. If we deleted the loop and removed the edge // coming to this inner loop, this will break the outer loop structure (by // deleting the backedge of the outer loop). If the outer loop is indeed a // non-loop, it will be deleted in a future iteration of loop deletion pass. IRBuilder<> Builder(OldBr); Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock); // Remove the old branch. The conditional branch becomes a new terminator. OldBr->eraseFromParent(); // Rewrite phis in the exit block to get their inputs from the Preheader // instead of the exiting block. for (PHINode &P : ExitBlock->phis()) { // Set the zero'th element of Phi to be from the preheader and remove all // other incoming values. Given the loop has dedicated exits, all other // incoming values must be from the exiting blocks. int PredIndex = 0; P.setIncomingBlock(PredIndex, Preheader); // Removes all incoming values from all other exiting blocks (including // duplicate values from an exiting block). // Nuke all entries except the zero'th entry which is the preheader entry. // NOTE! We need to remove Incoming Values in the reverse order as done // below, to keep the indices valid for deletion (removeIncomingValues // updates getNumIncomingValues and shifts all values down into the operand // being deleted). for (unsigned i = 0, e = P.getNumIncomingValues() - 1; i != e; ++i) P.removeIncomingValue(e - i, false); assert((P.getNumIncomingValues() == 1 && P.getIncomingBlock(PredIndex) == Preheader) && "Should have exactly one value and that's from the preheader!"); } // Disconnect the loop body by branching directly to its exit. Builder.SetInsertPoint(Preheader->getTerminator()); Builder.CreateBr(ExitBlock); // Remove the old branch. Preheader->getTerminator()->eraseFromParent(); if (DT) { // Update the dominator tree by informing it about the new edge from the // preheader to the exit. DT->insertEdge(Preheader, ExitBlock); // Inform the dominator tree about the removed edge. DT->deleteEdge(Preheader, L->getHeader()); } // Remove the block from the reference counting scheme, so that we can // delete it freely later. for (auto *Block : L->blocks()) Block->dropAllReferences(); if (LI) { // Erase the instructions and the blocks without having to worry // about ordering because we already dropped the references. // NOTE: This iteration is safe because erasing the block does not remove // its entry from the loop's block list. We do that in the next section. for (Loop::block_iterator LpI = L->block_begin(), LpE = L->block_end(); LpI != LpE; ++LpI) (*LpI)->eraseFromParent(); // Finally, the blocks from loopinfo. This has to happen late because // otherwise our loop iterators won't work. SmallPtrSet blocks; blocks.insert(L->block_begin(), L->block_end()); for (BasicBlock *BB : blocks) LI->removeBlock(BB); // The last step is to update LoopInfo now that we've eliminated this loop. LI->erase(L); } } /// Returns true if the instruction in a loop is guaranteed to execute at least /// once. bool llvm::isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo) { // We have to check to make sure that the instruction dominates all // of the exit blocks. If it doesn't, then there is a path out of the loop // which does not execute this instruction, so we can't hoist it. // If the instruction is in the header block for the loop (which is very // common), it is always guaranteed to dominate the exit blocks. Since this // is a common case, and can save some work, check it now. if (Inst.getParent() == CurLoop->getHeader()) // If there's a throw in the header block, we can't guarantee we'll reach // Inst. return !SafetyInfo->HeaderMayThrow; // Somewhere in this loop there is an instruction which may throw and make us // exit the loop. if (SafetyInfo->MayThrow) return false; // Get the exit blocks for the current loop. SmallVector ExitBlocks; CurLoop->getExitBlocks(ExitBlocks); // Verify that the block dominates each of the exit blocks of the loop. for (BasicBlock *ExitBlock : ExitBlocks) if (!DT->dominates(Inst.getParent(), ExitBlock)) return false; // As a degenerate case, if the loop is statically infinite then we haven't // proven anything since there are no exit blocks. if (ExitBlocks.empty()) return false; // FIXME: In general, we have to prove that the loop isn't an infinite loop. // See http::llvm.org/PR24078 . (The "ExitBlocks.empty()" check above is // just a special case of this.) return true; } Optional llvm::getLoopEstimatedTripCount(Loop *L) { // Only support loops with a unique exiting block, and a latch. if (!L->getExitingBlock()) return None; // Get the branch weights for the the loop's backedge. BranchInst *LatchBR = dyn_cast(L->getLoopLatch()->getTerminator()); if (!LatchBR || LatchBR->getNumSuccessors() != 2) return None; assert((LatchBR->getSuccessor(0) == L->getHeader() || LatchBR->getSuccessor(1) == L->getHeader()) && "At least one edge out of the latch must go to the header"); // To estimate the number of times the loop body was executed, we want to // know the number of times the backedge was taken, vs. the number of times // we exited the loop. uint64_t TrueVal, FalseVal; if (!LatchBR->extractProfMetadata(TrueVal, FalseVal)) return None; if (!TrueVal || !FalseVal) return 0; // Divide the count of the backedge by the count of the edge exiting the loop, // rounding to nearest. if (LatchBR->getSuccessor(0) == L->getHeader()) return (TrueVal + (FalseVal / 2)) / FalseVal; else return (FalseVal + (TrueVal / 2)) / TrueVal; } /// \brief Adds a 'fast' flag to floating point operations. static Value *addFastMathFlag(Value *V) { if (isa(V)) { FastMathFlags Flags; Flags.setFast(); cast(V)->setFastMathFlags(Flags); } return V; } // Helper to generate a log2 shuffle reduction. Value * llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op, RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind, ArrayRef RedOps) { unsigned VF = Src->getType()->getVectorNumElements(); // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles // and vector ops, reducing the set of values being computed by half each // round. assert(isPowerOf2_32(VF) && "Reduction emission only supported for pow2 vectors!"); Value *TmpVec = Src; SmallVector ShuffleMask(VF, nullptr); for (unsigned i = VF; i != 1; i >>= 1) { // Move the upper half of the vector to the lower half. for (unsigned j = 0; j != i / 2; ++j) ShuffleMask[j] = Builder.getInt32(i / 2 + j); // Fill the rest of the mask with undef. std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), UndefValue::get(Builder.getInt32Ty())); Value *Shuf = Builder.CreateShuffleVector( TmpVec, UndefValue::get(TmpVec->getType()), ConstantVector::get(ShuffleMask), "rdx.shuf"); if (Op != Instruction::ICmp && Op != Instruction::FCmp) { // Floating point operations had to be 'fast' to enable the reduction. TmpVec = addFastMathFlag(Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf, "bin.rdx")); } else { assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid && "Invalid min/max"); TmpVec = RecurrenceDescriptor::createMinMaxOp(Builder, MinMaxKind, TmpVec, Shuf); } if (!RedOps.empty()) propagateIRFlags(TmpVec, RedOps); } // The result is in the first element of the vector. return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0)); } /// Create a simple vector reduction specified by an opcode and some /// flags (if generating min/max reductions). Value *llvm::createSimpleTargetReduction( IRBuilder<> &Builder, const TargetTransformInfo *TTI, unsigned Opcode, Value *Src, TargetTransformInfo::ReductionFlags Flags, ArrayRef RedOps) { assert(isa(Src->getType()) && "Type must be a vector"); Value *ScalarUdf = UndefValue::get(Src->getType()->getVectorElementType()); std::function BuildFunc; using RD = RecurrenceDescriptor; RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid; // TODO: Support creating ordered reductions. FastMathFlags FMFFast; FMFFast.setFast(); switch (Opcode) { case Instruction::Add: BuildFunc = [&]() { return Builder.CreateAddReduce(Src); }; break; case Instruction::Mul: BuildFunc = [&]() { return Builder.CreateMulReduce(Src); }; break; case Instruction::And: BuildFunc = [&]() { return Builder.CreateAndReduce(Src); }; break; case Instruction::Or: BuildFunc = [&]() { return Builder.CreateOrReduce(Src); }; break; case Instruction::Xor: BuildFunc = [&]() { return Builder.CreateXorReduce(Src); }; break; case Instruction::FAdd: BuildFunc = [&]() { auto Rdx = Builder.CreateFAddReduce(ScalarUdf, Src); cast(Rdx)->setFastMathFlags(FMFFast); return Rdx; }; break; case Instruction::FMul: BuildFunc = [&]() { auto Rdx = Builder.CreateFMulReduce(ScalarUdf, Src); cast(Rdx)->setFastMathFlags(FMFFast); return Rdx; }; break; case Instruction::ICmp: if (Flags.IsMaxOp) { MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMax : RD::MRK_UIntMax; BuildFunc = [&]() { return Builder.CreateIntMaxReduce(Src, Flags.IsSigned); }; } else { MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMin : RD::MRK_UIntMin; BuildFunc = [&]() { return Builder.CreateIntMinReduce(Src, Flags.IsSigned); }; } break; case Instruction::FCmp: if (Flags.IsMaxOp) { MinMaxKind = RD::MRK_FloatMax; BuildFunc = [&]() { return Builder.CreateFPMaxReduce(Src, Flags.NoNaN); }; } else { MinMaxKind = RD::MRK_FloatMin; BuildFunc = [&]() { return Builder.CreateFPMinReduce(Src, Flags.NoNaN); }; } break; default: llvm_unreachable("Unhandled opcode"); break; } if (TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags)) return BuildFunc(); return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, RedOps); } /// Create a vector reduction using a given recurrence descriptor. Value *llvm::createTargetReduction(IRBuilder<> &B, const TargetTransformInfo *TTI, RecurrenceDescriptor &Desc, Value *Src, bool NoNaN) { // TODO: Support in-order reductions based on the recurrence descriptor. using RD = RecurrenceDescriptor; RD::RecurrenceKind RecKind = Desc.getRecurrenceKind(); TargetTransformInfo::ReductionFlags Flags; Flags.NoNaN = NoNaN; switch (RecKind) { case RD::RK_FloatAdd: return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags); case RD::RK_FloatMult: return createSimpleTargetReduction(B, TTI, Instruction::FMul, Src, Flags); case RD::RK_IntegerAdd: return createSimpleTargetReduction(B, TTI, Instruction::Add, Src, Flags); case RD::RK_IntegerMult: return createSimpleTargetReduction(B, TTI, Instruction::Mul, Src, Flags); case RD::RK_IntegerAnd: return createSimpleTargetReduction(B, TTI, Instruction::And, Src, Flags); case RD::RK_IntegerOr: return createSimpleTargetReduction(B, TTI, Instruction::Or, Src, Flags); case RD::RK_IntegerXor: return createSimpleTargetReduction(B, TTI, Instruction::Xor, Src, Flags); case RD::RK_IntegerMinMax: { RD::MinMaxRecurrenceKind MMKind = Desc.getMinMaxRecurrenceKind(); Flags.IsMaxOp = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_UIntMax); Flags.IsSigned = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_SIntMin); return createSimpleTargetReduction(B, TTI, Instruction::ICmp, Src, Flags); } case RD::RK_FloatMinMax: { Flags.IsMaxOp = Desc.getMinMaxRecurrenceKind() == RD::MRK_FloatMax; return createSimpleTargetReduction(B, TTI, Instruction::FCmp, Src, Flags); } default: llvm_unreachable("Unhandled RecKind"); } } void llvm::propagateIRFlags(Value *I, ArrayRef VL, Value *OpValue) { auto *VecOp = dyn_cast(I); if (!VecOp) return; auto *Intersection = (OpValue == nullptr) ? dyn_cast(VL[0]) : dyn_cast(OpValue); if (!Intersection) return; const unsigned Opcode = Intersection->getOpcode(); VecOp->copyIRFlags(Intersection); for (auto *V : VL) { auto *Instr = dyn_cast(V); if (!Instr) continue; if (OpValue == nullptr || Opcode == Instr->getOpcode()) VecOp->andIRFlags(V); } } Index: head/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- head/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (revision 329982) +++ head/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (revision 329983) @@ -1,8826 +1,8836 @@ //===- LoopVectorize.cpp - A Loop Vectorizer ------------------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This is the LLVM loop vectorizer. This pass modifies 'vectorizable' loops // and generates target-independent LLVM-IR. // The vectorizer uses the TargetTransformInfo analysis to estimate the costs // of instructions in order to estimate the profitability of vectorization. // // The loop vectorizer combines consecutive loop iterations into a single // 'wide' iteration. After this transformation the index is incremented // by the SIMD vector width, and not by one. // // This pass has three parts: // 1. The main loop pass that drives the different parts. // 2. LoopVectorizationLegality - A unit that checks for the legality // of the vectorization. // 3. InnerLoopVectorizer - A unit that performs the actual // widening of instructions. // 4. LoopVectorizationCostModel - A unit that checks for the profitability // of vectorization. It decides on the optimal vector width, which // can be one, if vectorization is not profitable. // //===----------------------------------------------------------------------===// // // The reduction-variable vectorization is based on the paper: // D. Nuzman and R. Henderson. Multi-platform Auto-vectorization. // // Variable uniformity checks are inspired by: // Karrenberg, R. and Hack, S. Whole Function Vectorization. // // The interleaved access vectorization is based on the paper: // Dorit Nuzman, Ira Rosen and Ayal Zaks. Auto-Vectorization of Interleaved // Data for SIMD // // Other ideas/concepts are from: // A. Zaks and D. Nuzman. Autovectorization in GCC-two years later. // // S. Maleki, Y. Gao, M. Garzaran, T. Wong and D. Padua. An Evaluation of // Vectorizing Compilers. // //===----------------------------------------------------------------------===// #include "llvm/Transforms/Vectorize/LoopVectorize.h" #include "VPlan.h" #include "VPlanBuilder.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/DemandedBits.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" #include "llvm/IR/Verifier.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/LoopVersioning.h" #include #include #include #include #include #include #include #include #include #include #include #include using namespace llvm; #define LV_NAME "loop-vectorize" #define DEBUG_TYPE LV_NAME STATISTIC(LoopsVectorized, "Number of loops vectorized"); STATISTIC(LoopsAnalyzed, "Number of loops analyzed for vectorization"); static cl::opt EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization.")); /// Loops with a known constant trip count below this number are vectorized only /// if no scalar iteration overheads are incurred. static cl::opt TinyTripCountVectorThreshold( "vectorizer-min-trip-count", cl::init(16), cl::Hidden, cl::desc("Loops with a constant trip count that is smaller than this " "value are vectorized only if no scalar iteration overheads " "are incurred.")); static cl::opt MaximizeBandwidth( "vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden, cl::desc("Maximize bandwidth when selecting vectorization factor which " "will be determined by the smallest type in loop.")); static cl::opt EnableInterleavedMemAccesses( "enable-interleaved-mem-accesses", cl::init(false), cl::Hidden, cl::desc("Enable vectorization on interleaved memory accesses in a loop")); /// Maximum factor for an interleaved memory access. static cl::opt MaxInterleaveGroupFactor( "max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8)); /// We don't interleave loops with a known constant trip count below this /// number. static const unsigned TinyTripCountInterleaveThreshold = 128; static cl::opt ForceTargetNumScalarRegs( "force-target-num-scalar-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of scalar registers.")); static cl::opt ForceTargetNumVectorRegs( "force-target-num-vector-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of vector registers.")); /// Maximum vectorization interleave count. static const unsigned MaxInterleaveFactor = 16; static cl::opt ForceTargetMaxScalarInterleaveFactor( "force-target-max-scalar-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for " "scalar loops.")); static cl::opt ForceTargetMaxVectorInterleaveFactor( "force-target-max-vector-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for " "vectorized loops.")); static cl::opt ForceTargetInstructionCost( "force-target-instruction-cost", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's expected cost for " "an instruction to a single constant value. Mostly " "useful for getting consistent testing.")); static cl::opt SmallLoopCost( "small-loop-cost", cl::init(20), cl::Hidden, cl::desc( "The cost of a loop that is considered 'small' by the interleaver.")); static cl::opt LoopVectorizeWithBlockFrequency( "loop-vectorize-with-block-frequency", cl::init(false), cl::Hidden, cl::desc("Enable the use of the block frequency analysis to access PGO " "heuristics minimizing code growth in cold regions and being more " "aggressive in hot regions.")); // Runtime interleave loops for load/store throughput. static cl::opt EnableLoadStoreRuntimeInterleave( "enable-loadstore-runtime-interleave", cl::init(true), cl::Hidden, cl::desc( "Enable runtime interleaving until load/store ports are saturated")); /// The number of stores in a loop that are allowed to need predication. static cl::opt NumberOfStoresToPredicate( "vectorize-num-stores-pred", cl::init(1), cl::Hidden, cl::desc("Max number of stores to be predicated behind an if.")); static cl::opt EnableIndVarRegisterHeur( "enable-ind-var-reg-heur", cl::init(true), cl::Hidden, cl::desc("Count the induction variable only once when interleaving")); static cl::opt EnableCondStoresVectorization( "enable-cond-stores-vec", cl::init(true), cl::Hidden, cl::desc("Enable if predication of stores during vectorization.")); static cl::opt MaxNestedScalarReductionIC( "max-nested-scalar-reduction-interleave", cl::init(2), cl::Hidden, cl::desc("The maximum interleave count to use when interleaving a scalar " "reduction in a nested loop.")); static cl::opt PragmaVectorizeMemoryCheckThreshold( "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum allowed number of runtime memory checks with a " "vectorize(enable) pragma.")); static cl::opt VectorizeSCEVCheckThreshold( "vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed.")); static cl::opt PragmaVectorizeSCEVCheckThreshold( "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a " "vectorize(enable) pragma")); /// Create an analysis remark that explains why vectorization failed /// /// \p PassName is the name of the pass (e.g. can be AlwaysPrint). \p /// RemarkName is the identifier for the remark. If \p I is passed it is an /// instruction that prevents vectorization. Otherwise \p TheLoop is used for /// the location of the remark. \return the remark object that can be /// streamed to. static OptimizationRemarkAnalysis createMissedAnalysis(const char *PassName, StringRef RemarkName, Loop *TheLoop, Instruction *I = nullptr) { Value *CodeRegion = TheLoop->getHeader(); DebugLoc DL = TheLoop->getStartLoc(); if (I) { CodeRegion = I->getParent(); // If there is no debug location attached to the instruction, revert back to // using the loop's. if (I->getDebugLoc()) DL = I->getDebugLoc(); } OptimizationRemarkAnalysis R(PassName, RemarkName, DL, CodeRegion); R << "loop not vectorized: "; return R; } namespace { class LoopVectorizationLegality; class LoopVectorizationCostModel; class LoopVectorizationRequirements; } // end anonymous namespace /// Returns true if the given loop body has a cycle, excluding the loop /// itself. static bool hasCyclesInLoopBody(const Loop &L) { if (!L.empty()) return true; for (const auto &SCC : make_range(scc_iterator::begin(L), scc_iterator::end(L))) { if (SCC.size() > 1) { DEBUG(dbgs() << "LVL: Detected a cycle in the loop body:\n"); DEBUG(L.dump()); return true; } } return false; } /// A helper function for converting Scalar types to vector types. /// If the incoming type is void, we return void. If the VF is 1, we return /// the scalar type. static Type *ToVectorTy(Type *Scalar, unsigned VF) { if (Scalar->isVoidTy() || VF == 1) return Scalar; return VectorType::get(Scalar, VF); } // FIXME: The following helper functions have multiple implementations // in the project. They can be effectively organized in a common Load/Store // utilities unit. /// A helper function that returns the pointer operand of a load or store /// instruction. static Value *getPointerOperand(Value *I) { if (auto *LI = dyn_cast(I)) return LI->getPointerOperand(); if (auto *SI = dyn_cast(I)) return SI->getPointerOperand(); return nullptr; } /// A helper function that returns the type of loaded or stored value. static Type *getMemInstValueType(Value *I) { assert((isa(I) || isa(I)) && "Expected Load or Store instruction"); if (auto *LI = dyn_cast(I)) return LI->getType(); return cast(I)->getValueOperand()->getType(); } /// A helper function that returns the alignment of load or store instruction. static unsigned getMemInstAlignment(Value *I) { assert((isa(I) || isa(I)) && "Expected Load or Store instruction"); if (auto *LI = dyn_cast(I)) return LI->getAlignment(); return cast(I)->getAlignment(); } /// A helper function that returns the address space of the pointer operand of /// load or store instruction. static unsigned getMemInstAddressSpace(Value *I) { assert((isa(I) || isa(I)) && "Expected Load or Store instruction"); if (auto *LI = dyn_cast(I)) return LI->getPointerAddressSpace(); return cast(I)->getPointerAddressSpace(); } /// A helper function that returns true if the given type is irregular. The /// type is irregular if its allocated size doesn't equal the store size of an /// element of the corresponding vector type at the given vectorization factor. static bool hasIrregularType(Type *Ty, const DataLayout &DL, unsigned VF) { // Determine if an array of VF elements of type Ty is "bitcast compatible" // with a vector. if (VF > 1) { auto *VectorTy = VectorType::get(Ty, VF); return VF * DL.getTypeAllocSize(Ty) != DL.getTypeStoreSize(VectorTy); } // If the vectorization factor is one, we just check if an array of type Ty // requires padding between elements. return DL.getTypeAllocSizeInBits(Ty) != DL.getTypeSizeInBits(Ty); } /// A helper function that returns the reciprocal of the block probability of /// predicated blocks. If we return X, we are assuming the predicated block /// will execute once for for every X iterations of the loop header. /// /// TODO: We should use actual block probability here, if available. Currently, /// we always assume predicated blocks have a 50% chance of executing. static unsigned getReciprocalPredBlockProb() { return 2; } /// A helper function that adds a 'fast' flag to floating-point operations. static Value *addFastMathFlag(Value *V) { if (isa(V)) { FastMathFlags Flags; Flags.setFast(); cast(V)->setFastMathFlags(Flags); } return V; } /// A helper function that returns an integer or floating-point constant with /// value C. static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) { return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C) : ConstantFP::get(Ty, C); } namespace llvm { /// InnerLoopVectorizer vectorizes loops which contain only one basic /// block to a specified vectorization factor (VF). /// This class performs the widening of scalars into vectors, or multiple /// scalars. This class also implements the following features: /// * It inserts an epilogue loop for handling loops that don't have iteration /// counts that are known to be a multiple of the vectorization factor. /// * It handles the code generation for reduction variables. /// * Scalarization (implementation using scalars) of un-vectorizable /// instructions. /// InnerLoopVectorizer does not perform any vectorization-legality /// checks, and relies on the caller to check for the different legality /// aspects. The InnerLoopVectorizer relies on the /// LoopVectorizationLegality class to provide information about the induction /// and reduction variables that were found to a given vectorization factor. class InnerLoopVectorizer { public: InnerLoopVectorizer(Loop *OrigLoop, PredicatedScalarEvolution &PSE, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, unsigned VecWidth, unsigned UnrollFactor, LoopVectorizationLegality *LVL, LoopVectorizationCostModel *CM) : OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TLI(TLI), TTI(TTI), AC(AC), ORE(ORE), VF(VecWidth), UF(UnrollFactor), Builder(PSE.getSE()->getContext()), VectorLoopValueMap(UnrollFactor, VecWidth), Legal(LVL), Cost(CM) {} virtual ~InnerLoopVectorizer() = default; /// Create a new empty loop. Unlink the old loop and connect the new one. /// Return the pre-header block of the new loop. BasicBlock *createVectorizedLoopSkeleton(); /// Widen a single instruction within the innermost loop. void widenInstruction(Instruction &I); /// Fix the vectorized code, taking care of header phi's, live-outs, and more. void fixVectorizedLoop(); // Return true if any runtime check is added. bool areSafetyChecksAdded() { return AddedSafetyChecks; } /// A type for vectorized values in the new loop. Each value from the /// original loop, when vectorized, is represented by UF vector values in the /// new unrolled loop, where UF is the unroll factor. using VectorParts = SmallVector; /// Vectorize a single PHINode in a block. This method handles the induction /// variable canonicalization. It supports both VF = 1 for unrolled loops and /// arbitrary length vectors. void widenPHIInstruction(Instruction *PN, unsigned UF, unsigned VF); /// A helper function to scalarize a single Instruction in the innermost loop. /// Generates a sequence of scalar instances for each lane between \p MinLane /// and \p MaxLane, times each part between \p MinPart and \p MaxPart, /// inclusive.. void scalarizeInstruction(Instruction *Instr, const VPIteration &Instance, bool IfPredicateInstr); /// Widen an integer or floating-point induction variable \p IV. If \p Trunc /// is provided, the integer induction variable will first be truncated to /// the corresponding type. void widenIntOrFpInduction(PHINode *IV, TruncInst *Trunc = nullptr); /// getOrCreateVectorValue and getOrCreateScalarValue coordinate to generate a /// vector or scalar value on-demand if one is not yet available. When /// vectorizing a loop, we visit the definition of an instruction before its /// uses. When visiting the definition, we either vectorize or scalarize the /// instruction, creating an entry for it in the corresponding map. (In some /// cases, such as induction variables, we will create both vector and scalar /// entries.) Then, as we encounter uses of the definition, we derive values /// for each scalar or vector use unless such a value is already available. /// For example, if we scalarize a definition and one of its uses is vector, /// we build the required vector on-demand with an insertelement sequence /// when visiting the use. Otherwise, if the use is scalar, we can use the /// existing scalar definition. /// /// Return a value in the new loop corresponding to \p V from the original /// loop at unroll index \p Part. If the value has already been vectorized, /// the corresponding vector entry in VectorLoopValueMap is returned. If, /// however, the value has a scalar entry in VectorLoopValueMap, we construct /// a new vector value on-demand by inserting the scalar values into a vector /// with an insertelement sequence. If the value has been neither vectorized /// nor scalarized, it must be loop invariant, so we simply broadcast the /// value into a vector. Value *getOrCreateVectorValue(Value *V, unsigned Part); /// Return a value in the new loop corresponding to \p V from the original /// loop at unroll and vector indices \p Instance. If the value has been /// vectorized but not scalarized, the necessary extractelement instruction /// will be generated. Value *getOrCreateScalarValue(Value *V, const VPIteration &Instance); /// Construct the vector value of a scalarized value \p V one lane at a time. void packScalarIntoVectorValue(Value *V, const VPIteration &Instance); /// Try to vectorize the interleaved access group that \p Instr belongs to. void vectorizeInterleaveGroup(Instruction *Instr); /// Vectorize Load and Store instructions, optionally masking the vector /// operations if \p BlockInMask is non-null. void vectorizeMemoryInstruction(Instruction *Instr, VectorParts *BlockInMask = nullptr); /// \brief Set the debug location in the builder using the debug location in /// the instruction. void setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr); protected: friend class LoopVectorizationPlanner; /// A small list of PHINodes. using PhiVector = SmallVector; /// A type for scalarized values in the new loop. Each value from the /// original loop, when scalarized, is represented by UF x VF scalar values /// in the new unrolled loop, where UF is the unroll factor and VF is the /// vectorization factor. using ScalarParts = SmallVector, 2>; /// Set up the values of the IVs correctly when exiting the vector loop. void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II, Value *CountRoundDown, Value *EndValue, BasicBlock *MiddleBlock); /// Create a new induction variable inside L. PHINode *createInductionVariable(Loop *L, Value *Start, Value *End, Value *Step, Instruction *DL); /// Handle all cross-iteration phis in the header. void fixCrossIterationPHIs(); /// Fix a first-order recurrence. This is the second phase of vectorizing /// this phi node. void fixFirstOrderRecurrence(PHINode *Phi); /// Fix a reduction cross-iteration phi. This is the second phase of /// vectorizing this phi node. void fixReduction(PHINode *Phi); /// \brief The Loop exit block may have single value PHI nodes with some /// incoming value. While vectorizing we only handled real values /// that were defined inside the loop and we should have one value for /// each predecessor of its parent basic block. See PR14725. void fixLCSSAPHIs(); /// Iteratively sink the scalarized operands of a predicated instruction into /// the block that was created for it. void sinkScalarOperands(Instruction *PredInst); /// Shrinks vector element sizes to the smallest bitwidth they can be legally /// represented as. void truncateToMinimalBitwidths(); /// Insert the new loop to the loop hierarchy and pass manager /// and update the analysis passes. void updateAnalysis(); /// Create a broadcast instruction. This method generates a broadcast /// instruction (shuffle) for loop invariant values and for the induction /// value. If this is the induction variable then we extend it to N, N+1, ... /// this is needed because each iteration in the loop corresponds to a SIMD /// element. virtual Value *getBroadcastInstrs(Value *V); /// This function adds (StartIdx, StartIdx + Step, StartIdx + 2*Step, ...) /// to each vector element of Val. The sequence starts at StartIndex. /// \p Opcode is relevant for FP induction variable. virtual Value *getStepVector(Value *Val, int StartIdx, Value *Step, Instruction::BinaryOps Opcode = Instruction::BinaryOpsEnd); /// Compute scalar induction steps. \p ScalarIV is the scalar induction /// variable on which to base the steps, \p Step is the size of the step, and /// \p EntryVal is the value from the original loop that maps to the steps. /// Note that \p EntryVal doesn't have to be an induction variable (e.g., it /// can be a truncate instruction). void buildScalarSteps(Value *ScalarIV, Value *Step, Value *EntryVal, const InductionDescriptor &ID); /// Create a vector induction phi node based on an existing scalar one. \p /// EntryVal is the value from the original loop that maps to the vector phi /// node, and \p Step is the loop-invariant step. If \p EntryVal is a /// truncate instruction, instead of widening the original IV, we widen a /// version of the IV truncated to \p EntryVal's type. void createVectorIntOrFpInductionPHI(const InductionDescriptor &II, Value *Step, Instruction *EntryVal); /// Returns true if an instruction \p I should be scalarized instead of /// vectorized for the chosen vectorization factor. bool shouldScalarizeInstruction(Instruction *I) const; /// Returns true if we should generate a scalar version of \p IV. bool needsScalarInduction(Instruction *IV) const; /// If there is a cast involved in the induction variable \p ID, which should /// be ignored in the vectorized loop body, this function records the /// VectorLoopValue of the respective Phi also as the VectorLoopValue of the /// cast. We had already proved that the casted Phi is equal to the uncasted /// Phi in the vectorized loop (under a runtime guard), and therefore /// there is no need to vectorize the cast - the same value can be used in the /// vector loop for both the Phi and the cast. /// If \p VectorLoopValue is a scalarized value, \p Lane is also specified, /// Otherwise, \p VectorLoopValue is a widened/vectorized value. void recordVectorLoopValueForInductionCast (const InductionDescriptor &ID, Value *VectorLoopValue, unsigned Part, unsigned Lane = UINT_MAX); /// Generate a shuffle sequence that will reverse the vector Vec. virtual Value *reverseVector(Value *Vec); /// Returns (and creates if needed) the original loop trip count. Value *getOrCreateTripCount(Loop *NewLoop); /// Returns (and creates if needed) the trip count of the widened loop. Value *getOrCreateVectorTripCount(Loop *NewLoop); /// Returns a bitcasted value to the requested vector type. /// Also handles bitcasts of vector <-> vector types. Value *createBitOrPointerCast(Value *V, VectorType *DstVTy, const DataLayout &DL); /// Emit a bypass check to see if the vector trip count is zero, including if /// it overflows. void emitMinimumIterationCountCheck(Loop *L, BasicBlock *Bypass); /// Emit a bypass check to see if all of the SCEV assumptions we've /// had to make are correct. void emitSCEVChecks(Loop *L, BasicBlock *Bypass); /// Emit bypass checks to check any memory assumptions we may have made. void emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass); /// Add additional metadata to \p To that was not present on \p Orig. /// /// Currently this is used to add the noalias annotations based on the /// inserted memchecks. Use this for instructions that are *cloned* into the /// vector loop. void addNewMetadata(Instruction *To, const Instruction *Orig); /// Add metadata from one instruction to another. /// /// This includes both the original MDs from \p From and additional ones (\see /// addNewMetadata). Use this for *newly created* instructions in the vector /// loop. void addMetadata(Instruction *To, Instruction *From); /// \brief Similar to the previous function but it adds the metadata to a /// vector of instructions. void addMetadata(ArrayRef To, Instruction *From); /// The original loop. Loop *OrigLoop; /// A wrapper around ScalarEvolution used to add runtime SCEV checks. Applies /// dynamic knowledge to simplify SCEV expressions and converts them to a /// more usable form. PredicatedScalarEvolution &PSE; /// Loop Info. LoopInfo *LI; /// Dominator Tree. DominatorTree *DT; /// Alias Analysis. AliasAnalysis *AA; /// Target Library Info. const TargetLibraryInfo *TLI; /// Target Transform Info. const TargetTransformInfo *TTI; /// Assumption Cache. AssumptionCache *AC; /// Interface to emit optimization remarks. OptimizationRemarkEmitter *ORE; /// \brief LoopVersioning. It's only set up (non-null) if memchecks were /// used. /// /// This is currently only used to add no-alias metadata based on the /// memchecks. The actually versioning is performed manually. std::unique_ptr LVer; /// The vectorization SIMD factor to use. Each vector will have this many /// vector elements. unsigned VF; /// The vectorization unroll factor to use. Each scalar is vectorized to this /// many different vector instructions. unsigned UF; /// The builder that we use IRBuilder<> Builder; // --- Vectorization state --- /// The vector-loop preheader. BasicBlock *LoopVectorPreHeader; /// The scalar-loop preheader. BasicBlock *LoopScalarPreHeader; /// Middle Block between the vector and the scalar. BasicBlock *LoopMiddleBlock; /// The ExitBlock of the scalar loop. BasicBlock *LoopExitBlock; /// The vector loop body. BasicBlock *LoopVectorBody; /// The scalar loop body. BasicBlock *LoopScalarBody; /// A list of all bypass blocks. The first block is the entry of the loop. SmallVector LoopBypassBlocks; /// The new Induction variable which was added to the new block. PHINode *Induction = nullptr; /// The induction variable of the old basic block. PHINode *OldInduction = nullptr; /// Maps values from the original loop to their corresponding values in the /// vectorized loop. A key value can map to either vector values, scalar /// values or both kinds of values, depending on whether the key was /// vectorized and scalarized. VectorizerValueMap VectorLoopValueMap; /// Store instructions that were predicated. SmallVector PredicatedInstructions; /// Trip count of the original loop. Value *TripCount = nullptr; /// Trip count of the widened loop (TripCount - TripCount % (VF*UF)) Value *VectorTripCount = nullptr; /// The legality analysis. LoopVectorizationLegality *Legal; /// The profitablity analysis. LoopVectorizationCostModel *Cost; // Record whether runtime checks are added. bool AddedSafetyChecks = false; // Holds the end values for each induction variable. We save the end values // so we can later fix-up the external users of the induction variables. DenseMap IVEndValues; }; class InnerLoopUnroller : public InnerLoopVectorizer { public: InnerLoopUnroller(Loop *OrigLoop, PredicatedScalarEvolution &PSE, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, unsigned UnrollFactor, LoopVectorizationLegality *LVL, LoopVectorizationCostModel *CM) : InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE, 1, UnrollFactor, LVL, CM) {} private: Value *getBroadcastInstrs(Value *V) override; Value *getStepVector(Value *Val, int StartIdx, Value *Step, Instruction::BinaryOps Opcode = Instruction::BinaryOpsEnd) override; Value *reverseVector(Value *Vec) override; }; } // end namespace llvm /// \brief Look for a meaningful debug location on the instruction or it's /// operands. static Instruction *getDebugLocFromInstOrOperands(Instruction *I) { if (!I) return I; DebugLoc Empty; if (I->getDebugLoc() != Empty) return I; for (User::op_iterator OI = I->op_begin(), OE = I->op_end(); OI != OE; ++OI) { if (Instruction *OpInst = dyn_cast(*OI)) if (OpInst->getDebugLoc() != Empty) return OpInst; } return I; } void InnerLoopVectorizer::setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr) { if (const Instruction *Inst = dyn_cast_or_null(Ptr)) { const DILocation *DIL = Inst->getDebugLoc(); if (DIL && Inst->getFunction()->isDebugInfoForProfiling() && !isa(Inst)) B.SetCurrentDebugLocation(DIL->cloneWithDuplicationFactor(UF * VF)); else B.SetCurrentDebugLocation(DIL); } else B.SetCurrentDebugLocation(DebugLoc()); } #ifndef NDEBUG /// \return string containing a file name and a line # for the given loop. static std::string getDebugLocString(const Loop *L) { std::string Result; if (L) { raw_string_ostream OS(Result); if (const DebugLoc LoopDbgLoc = L->getStartLoc()) LoopDbgLoc.print(OS); else // Just print the module name. OS << L->getHeader()->getParent()->getParent()->getModuleIdentifier(); OS.flush(); } return Result; } #endif void InnerLoopVectorizer::addNewMetadata(Instruction *To, const Instruction *Orig) { // If the loop was versioned with memchecks, add the corresponding no-alias // metadata. if (LVer && (isa(Orig) || isa(Orig))) LVer->annotateInstWithNoAlias(To, Orig); } void InnerLoopVectorizer::addMetadata(Instruction *To, Instruction *From) { propagateMetadata(To, From); addNewMetadata(To, From); } void InnerLoopVectorizer::addMetadata(ArrayRef To, Instruction *From) { for (Value *V : To) { if (Instruction *I = dyn_cast(V)) addMetadata(I, From); } } namespace llvm { /// \brief The group of interleaved loads/stores sharing the same stride and /// close to each other. /// /// Each member in this group has an index starting from 0, and the largest /// index should be less than interleaved factor, which is equal to the absolute /// value of the access's stride. /// /// E.g. An interleaved load group of factor 4: /// for (unsigned i = 0; i < 1024; i+=4) { /// a = A[i]; // Member of index 0 /// b = A[i+1]; // Member of index 1 /// d = A[i+3]; // Member of index 3 /// ... /// } /// /// An interleaved store group of factor 4: /// for (unsigned i = 0; i < 1024; i+=4) { /// ... /// A[i] = a; // Member of index 0 /// A[i+1] = b; // Member of index 1 /// A[i+2] = c; // Member of index 2 /// A[i+3] = d; // Member of index 3 /// } /// /// Note: the interleaved load group could have gaps (missing members), but /// the interleaved store group doesn't allow gaps. class InterleaveGroup { public: InterleaveGroup(Instruction *Instr, int Stride, unsigned Align) : Align(Align), InsertPos(Instr) { assert(Align && "The alignment should be non-zero"); Factor = std::abs(Stride); assert(Factor > 1 && "Invalid interleave factor"); Reverse = Stride < 0; Members[0] = Instr; } bool isReverse() const { return Reverse; } unsigned getFactor() const { return Factor; } unsigned getAlignment() const { return Align; } unsigned getNumMembers() const { return Members.size(); } /// \brief Try to insert a new member \p Instr with index \p Index and /// alignment \p NewAlign. The index is related to the leader and it could be /// negative if it is the new leader. /// /// \returns false if the instruction doesn't belong to the group. bool insertMember(Instruction *Instr, int Index, unsigned NewAlign) { assert(NewAlign && "The new member's alignment should be non-zero"); int Key = Index + SmallestKey; // Skip if there is already a member with the same index. if (Members.count(Key)) return false; if (Key > LargestKey) { // The largest index is always less than the interleave factor. if (Index >= static_cast(Factor)) return false; LargestKey = Key; } else if (Key < SmallestKey) { // The largest index is always less than the interleave factor. if (LargestKey - Key >= static_cast(Factor)) return false; SmallestKey = Key; } // It's always safe to select the minimum alignment. Align = std::min(Align, NewAlign); Members[Key] = Instr; return true; } /// \brief Get the member with the given index \p Index /// /// \returns nullptr if contains no such member. Instruction *getMember(unsigned Index) const { int Key = SmallestKey + Index; if (!Members.count(Key)) return nullptr; return Members.find(Key)->second; } /// \brief Get the index for the given member. Unlike the key in the member /// map, the index starts from 0. unsigned getIndex(Instruction *Instr) const { for (auto I : Members) if (I.second == Instr) return I.first - SmallestKey; llvm_unreachable("InterleaveGroup contains no such member"); } Instruction *getInsertPos() const { return InsertPos; } void setInsertPos(Instruction *Inst) { InsertPos = Inst; } /// Add metadata (e.g. alias info) from the instructions in this group to \p /// NewInst. /// /// FIXME: this function currently does not add noalias metadata a'la /// addNewMedata. To do that we need to compute the intersection of the /// noalias info from all members. void addMetadata(Instruction *NewInst) const { SmallVector VL; std::transform(Members.begin(), Members.end(), std::back_inserter(VL), [](std::pair p) { return p.second; }); propagateMetadata(NewInst, VL); } private: unsigned Factor; // Interleave Factor. bool Reverse; unsigned Align; DenseMap Members; int SmallestKey = 0; int LargestKey = 0; // To avoid breaking dependences, vectorized instructions of an interleave // group should be inserted at either the first load or the last store in // program order. // // E.g. %even = load i32 // Insert Position // %add = add i32 %even // Use of %even // %odd = load i32 // // store i32 %even // %odd = add i32 // Def of %odd // store i32 %odd // Insert Position Instruction *InsertPos; }; } // end namespace llvm namespace { /// \brief Drive the analysis of interleaved memory accesses in the loop. /// /// Use this class to analyze interleaved accesses only when we can vectorize /// a loop. Otherwise it's meaningless to do analysis as the vectorization /// on interleaved accesses is unsafe. /// /// The analysis collects interleave groups and records the relationships /// between the member and the group in a map. class InterleavedAccessInfo { public: InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L, DominatorTree *DT, LoopInfo *LI) : PSE(PSE), TheLoop(L), DT(DT), LI(LI) {} ~InterleavedAccessInfo() { SmallSet DelSet; // Avoid releasing a pointer twice. for (auto &I : InterleaveGroupMap) DelSet.insert(I.second); for (auto *Ptr : DelSet) delete Ptr; } /// \brief Analyze the interleaved accesses and collect them in interleave /// groups. Substitute symbolic strides using \p Strides. void analyzeInterleaving(const ValueToValueMap &Strides); /// \brief Check if \p Instr belongs to any interleave group. bool isInterleaved(Instruction *Instr) const { return InterleaveGroupMap.count(Instr); } /// \brief Get the interleave group that \p Instr belongs to. /// /// \returns nullptr if doesn't have such group. InterleaveGroup *getInterleaveGroup(Instruction *Instr) const { if (InterleaveGroupMap.count(Instr)) return InterleaveGroupMap.find(Instr)->second; return nullptr; } /// \brief Returns true if an interleaved group that may access memory /// out-of-bounds requires a scalar epilogue iteration for correctness. bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; } /// \brief Initialize the LoopAccessInfo used for dependence checking. void setLAI(const LoopAccessInfo *Info) { LAI = Info; } private: /// A wrapper around ScalarEvolution, used to add runtime SCEV checks. /// Simplifies SCEV expressions in the context of existing SCEV assumptions. /// The interleaved access analysis can also add new predicates (for example /// by versioning strides of pointers). PredicatedScalarEvolution &PSE; Loop *TheLoop; DominatorTree *DT; LoopInfo *LI; const LoopAccessInfo *LAI = nullptr; /// True if the loop may contain non-reversed interleaved groups with /// out-of-bounds accesses. We ensure we don't speculatively access memory /// out-of-bounds by executing at least one scalar epilogue iteration. bool RequiresScalarEpilogue = false; /// Holds the relationships between the members and the interleave group. DenseMap InterleaveGroupMap; /// Holds dependences among the memory accesses in the loop. It maps a source /// access to a set of dependent sink accesses. DenseMap> Dependences; /// \brief The descriptor for a strided memory access. struct StrideDescriptor { StrideDescriptor() = default; StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size, unsigned Align) : Stride(Stride), Scev(Scev), Size(Size), Align(Align) {} // The access's stride. It is negative for a reverse access. int64_t Stride = 0; // The scalar expression of this access. const SCEV *Scev = nullptr; // The size of the memory object. uint64_t Size = 0; // The alignment of this access. unsigned Align = 0; }; /// \brief A type for holding instructions and their stride descriptors. using StrideEntry = std::pair; /// \brief Create a new interleave group with the given instruction \p Instr, /// stride \p Stride and alignment \p Align. /// /// \returns the newly created interleave group. InterleaveGroup *createInterleaveGroup(Instruction *Instr, int Stride, unsigned Align) { assert(!InterleaveGroupMap.count(Instr) && "Already in an interleaved access group"); InterleaveGroupMap[Instr] = new InterleaveGroup(Instr, Stride, Align); return InterleaveGroupMap[Instr]; } /// \brief Release the group and remove all the relationships. void releaseGroup(InterleaveGroup *Group) { for (unsigned i = 0; i < Group->getFactor(); i++) if (Instruction *Member = Group->getMember(i)) InterleaveGroupMap.erase(Member); delete Group; } /// \brief Collect all the accesses with a constant stride in program order. void collectConstStrideAccesses( MapVector &AccessStrideInfo, const ValueToValueMap &Strides); /// \brief Returns true if \p Stride is allowed in an interleaved group. static bool isStrided(int Stride) { unsigned Factor = std::abs(Stride); return Factor >= 2 && Factor <= MaxInterleaveGroupFactor; } /// \brief Returns true if \p BB is a predicated block. bool isPredicated(BasicBlock *BB) const { return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT); } /// \brief Returns true if LoopAccessInfo can be used for dependence queries. bool areDependencesValid() const { return LAI && LAI->getDepChecker().getDependences(); } /// \brief Returns true if memory accesses \p A and \p B can be reordered, if /// necessary, when constructing interleaved groups. /// /// \p A must precede \p B in program order. We return false if reordering is /// not necessary or is prevented because \p A and \p B may be dependent. bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A, StrideEntry *B) const { // Code motion for interleaved accesses can potentially hoist strided loads // and sink strided stores. The code below checks the legality of the // following two conditions: // // 1. Potentially moving a strided load (B) before any store (A) that // precedes B, or // // 2. Potentially moving a strided store (A) after any load or store (B) // that A precedes. // // It's legal to reorder A and B if we know there isn't a dependence from A // to B. Note that this determination is conservative since some // dependences could potentially be reordered safely. // A is potentially the source of a dependence. auto *Src = A->first; auto SrcDes = A->second; // B is potentially the sink of a dependence. auto *Sink = B->first; auto SinkDes = B->second; // Code motion for interleaved accesses can't violate WAR dependences. // Thus, reordering is legal if the source isn't a write. if (!Src->mayWriteToMemory()) return true; // At least one of the accesses must be strided. if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride)) return true; // If dependence information is not available from LoopAccessInfo, // conservatively assume the instructions can't be reordered. if (!areDependencesValid()) return false; // If we know there is a dependence from source to sink, assume the // instructions can't be reordered. Otherwise, reordering is legal. return !Dependences.count(Src) || !Dependences.lookup(Src).count(Sink); } /// \brief Collect the dependences from LoopAccessInfo. /// /// We process the dependences once during the interleaved access analysis to /// enable constant-time dependence queries. void collectDependences() { if (!areDependencesValid()) return; auto *Deps = LAI->getDepChecker().getDependences(); for (auto Dep : *Deps) Dependences[Dep.getSource(*LAI)].insert(Dep.getDestination(*LAI)); } }; /// Utility class for getting and setting loop vectorizer hints in the form /// of loop metadata. /// This class keeps a number of loop annotations locally (as member variables) /// and can, upon request, write them back as metadata on the loop. It will /// initially scan the loop for existing metadata, and will update the local /// values based on information in the loop. /// We cannot write all values to metadata, as the mere presence of some info, /// for example 'force', means a decision has been made. So, we need to be /// careful NOT to add them if the user hasn't specifically asked so. class LoopVectorizeHints { enum HintKind { HK_WIDTH, HK_UNROLL, HK_FORCE, HK_ISVECTORIZED }; /// Hint - associates name and validation with the hint value. struct Hint { const char *Name; unsigned Value; // This may have to change for non-numeric values. HintKind Kind; Hint(const char *Name, unsigned Value, HintKind Kind) : Name(Name), Value(Value), Kind(Kind) {} bool validate(unsigned Val) { switch (Kind) { case HK_WIDTH: return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth; case HK_UNROLL: return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor; case HK_FORCE: return (Val <= 1); case HK_ISVECTORIZED: return (Val==0 || Val==1); } return false; } }; /// Vectorization width. Hint Width; /// Vectorization interleave factor. Hint Interleave; /// Vectorization forced Hint Force; /// Already Vectorized Hint IsVectorized; /// Return the loop metadata prefix. static StringRef Prefix() { return "llvm.loop."; } /// True if there is any unsafe math in the loop. bool PotentiallyUnsafe = false; public: enum ForceKind { FK_Undefined = -1, ///< Not selected. FK_Disabled = 0, ///< Forcing disabled. FK_Enabled = 1, ///< Forcing enabled. }; LoopVectorizeHints(const Loop *L, bool DisableInterleaving, OptimizationRemarkEmitter &ORE) : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH), Interleave("interleave.count", DisableInterleaving, HK_UNROLL), Force("vectorize.enable", FK_Undefined, HK_FORCE), IsVectorized("isvectorized", 0, HK_ISVECTORIZED), TheLoop(L), ORE(ORE) { // Populate values with existing loop metadata. getHintsFromMetadata(); // force-vector-interleave overrides DisableInterleaving. if (VectorizerParams::isInterleaveForced()) Interleave.Value = VectorizerParams::VectorizationInterleave; if (IsVectorized.Value != 1) // If the vectorization width and interleaving count are both 1 then // consider the loop to have been already vectorized because there's // nothing more that we can do. IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1; DEBUG(if (DisableInterleaving && Interleave.Value == 1) dbgs() << "LV: Interleaving disabled by the pass manager\n"); } /// Mark the loop L as already vectorized by setting the width to 1. void setAlreadyVectorized() { IsVectorized.Value = 1; Hint Hints[] = {IsVectorized}; writeHintsToMetadata(Hints); } bool allowVectorization(Function *F, Loop *L, bool AlwaysVectorize) const { if (getForce() == LoopVectorizeHints::FK_Disabled) { DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n"); emitRemarkWithHints(); return false; } if (!AlwaysVectorize && getForce() != LoopVectorizeHints::FK_Enabled) { DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n"); emitRemarkWithHints(); return false; } if (getIsVectorized() == 1) { DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n"); // FIXME: Add interleave.disable metadata. This will allow // vectorize.disable to be used without disabling the pass and errors // to differentiate between disabled vectorization and a width of 1. ORE.emit([&]() { return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(), "AllDisabled", L->getStartLoc(), L->getHeader()) << "loop not vectorized: vectorization and interleaving are " "explicitly disabled, or the loop has already been " "vectorized"; }); return false; } return true; } /// Dumps all the hint information. void emitRemarkWithHints() const { using namespace ore; ORE.emit([&]() { if (Force.Value == LoopVectorizeHints::FK_Disabled) return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled", TheLoop->getStartLoc(), TheLoop->getHeader()) << "loop not vectorized: vectorization is explicitly disabled"; else { OptimizationRemarkMissed R(LV_NAME, "MissedDetails", TheLoop->getStartLoc(), TheLoop->getHeader()); R << "loop not vectorized"; if (Force.Value == LoopVectorizeHints::FK_Enabled) { R << " (Force=" << NV("Force", true); if (Width.Value != 0) R << ", Vector Width=" << NV("VectorWidth", Width.Value); if (Interleave.Value != 0) R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value); R << ")"; } return R; } }); } unsigned getWidth() const { return Width.Value; } unsigned getInterleave() const { return Interleave.Value; } unsigned getIsVectorized() const { return IsVectorized.Value; } enum ForceKind getForce() const { return (ForceKind)Force.Value; } /// \brief If hints are provided that force vectorization, use the AlwaysPrint /// pass name to force the frontend to print the diagnostic. const char *vectorizeAnalysisPassName() const { if (getWidth() == 1) return LV_NAME; if (getForce() == LoopVectorizeHints::FK_Disabled) return LV_NAME; if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0) return LV_NAME; return OptimizationRemarkAnalysis::AlwaysPrint; } bool allowReordering() const { // When enabling loop hints are provided we allow the vectorizer to change // the order of operations that is given by the scalar loop. This is not // enabled by default because can be unsafe or inefficient. For example, // reordering floating-point operations will change the way round-off // error accumulates in the loop. return getForce() == LoopVectorizeHints::FK_Enabled || getWidth() > 1; } bool isPotentiallyUnsafe() const { // Avoid FP vectorization if the target is unsure about proper support. // This may be related to the SIMD unit in the target not handling // IEEE 754 FP ops properly, or bad single-to-double promotions. // Otherwise, a sequence of vectorized loops, even without reduction, // could lead to different end results on the destination vectors. return getForce() != LoopVectorizeHints::FK_Enabled && PotentiallyUnsafe; } void setPotentiallyUnsafe() { PotentiallyUnsafe = true; } private: /// Find hints specified in the loop metadata and update local values. void getHintsFromMetadata() { MDNode *LoopID = TheLoop->getLoopID(); if (!LoopID) return; // First operand should refer to the loop id itself. assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { const MDString *S = nullptr; SmallVector Args; // The expected hint is either a MDString or a MDNode with the first // operand a MDString. if (const MDNode *MD = dyn_cast(LoopID->getOperand(i))) { if (!MD || MD->getNumOperands() == 0) continue; S = dyn_cast(MD->getOperand(0)); for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i) Args.push_back(MD->getOperand(i)); } else { S = dyn_cast(LoopID->getOperand(i)); assert(Args.size() == 0 && "too many arguments for MDString"); } if (!S) continue; // Check if the hint starts with the loop metadata prefix. StringRef Name = S->getString(); if (Args.size() == 1) setHint(Name, Args[0]); } } /// Checks string hint with one operand and set value if valid. void setHint(StringRef Name, Metadata *Arg) { if (!Name.startswith(Prefix())) return; Name = Name.substr(Prefix().size(), StringRef::npos); const ConstantInt *C = mdconst::dyn_extract(Arg); if (!C) return; unsigned Val = C->getZExtValue(); Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized}; for (auto H : Hints) { if (Name == H->Name) { if (H->validate(Val)) H->Value = Val; else DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n"); break; } } } /// Create a new hint from name / value pair. MDNode *createHintMetadata(StringRef Name, unsigned V) const { LLVMContext &Context = TheLoop->getHeader()->getContext(); Metadata *MDs[] = {MDString::get(Context, Name), ConstantAsMetadata::get( ConstantInt::get(Type::getInt32Ty(Context), V))}; return MDNode::get(Context, MDs); } /// Matches metadata with hint name. bool matchesHintMetadataName(MDNode *Node, ArrayRef HintTypes) { MDString *Name = dyn_cast(Node->getOperand(0)); if (!Name) return false; for (auto H : HintTypes) if (Name->getString().endswith(H.Name)) return true; return false; } /// Sets current hints into loop metadata, keeping other values intact. void writeHintsToMetadata(ArrayRef HintTypes) { if (HintTypes.empty()) return; // Reserve the first element to LoopID (see below). SmallVector MDs(1); // If the loop already has metadata, then ignore the existing operands. MDNode *LoopID = TheLoop->getLoopID(); if (LoopID) { for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { MDNode *Node = cast(LoopID->getOperand(i)); // If node in update list, ignore old value. if (!matchesHintMetadataName(Node, HintTypes)) MDs.push_back(Node); } } // Now, add the missing hints. for (auto H : HintTypes) MDs.push_back(createHintMetadata(Twine(Prefix(), H.Name).str(), H.Value)); // Replace current metadata node with new one. LLVMContext &Context = TheLoop->getHeader()->getContext(); MDNode *NewLoopID = MDNode::get(Context, MDs); // Set operand 0 to refer to the loop id itself. NewLoopID->replaceOperandWith(0, NewLoopID); TheLoop->setLoopID(NewLoopID); } /// The loop these hints belong to. const Loop *TheLoop; /// Interface to emit optimization remarks. OptimizationRemarkEmitter &ORE; }; } // end anonymous namespace static void emitMissedWarning(Function *F, Loop *L, const LoopVectorizeHints &LH, OptimizationRemarkEmitter *ORE) { LH.emitRemarkWithHints(); if (LH.getForce() == LoopVectorizeHints::FK_Enabled) { if (LH.getWidth() != 1) ORE->emit(DiagnosticInfoOptimizationFailure( DEBUG_TYPE, "FailedRequestedVectorization", L->getStartLoc(), L->getHeader()) << "loop not vectorized: " << "failed explicitly specified loop vectorization"); else if (LH.getInterleave() != 1) ORE->emit(DiagnosticInfoOptimizationFailure( DEBUG_TYPE, "FailedRequestedInterleaving", L->getStartLoc(), L->getHeader()) << "loop not interleaved: " << "failed explicitly specified loop interleaving"); } } namespace { /// LoopVectorizationLegality checks if it is legal to vectorize a loop, and /// to what vectorization factor. /// This class does not look at the profitability of vectorization, only the /// legality. This class has two main kinds of checks: /// * Memory checks - The code in canVectorizeMemory checks if vectorization /// will change the order of memory accesses in a way that will change the /// correctness of the program. /// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory /// checks for a number of different conditions, such as the availability of a /// single induction variable, that all types are supported and vectorize-able, /// etc. This code reflects the capabilities of InnerLoopVectorizer. /// This class is also used by InnerLoopVectorizer for identifying /// induction variable and the different reduction variables. class LoopVectorizationLegality { public: LoopVectorizationLegality( Loop *L, PredicatedScalarEvolution &PSE, DominatorTree *DT, TargetLibraryInfo *TLI, AliasAnalysis *AA, Function *F, const TargetTransformInfo *TTI, std::function *GetLAA, LoopInfo *LI, OptimizationRemarkEmitter *ORE, LoopVectorizationRequirements *R, - LoopVectorizeHints *H) + LoopVectorizeHints *H, DemandedBits *DB, AssumptionCache *AC) : TheLoop(L), PSE(PSE), TLI(TLI), TTI(TTI), DT(DT), GetLAA(GetLAA), - ORE(ORE), InterleaveInfo(PSE, L, DT, LI), Requirements(R), Hints(H) {} + ORE(ORE), InterleaveInfo(PSE, L, DT, LI), Requirements(R), Hints(H), + DB(DB), AC(AC) {} /// ReductionList contains the reduction descriptors for all /// of the reductions that were found in the loop. using ReductionList = DenseMap; /// InductionList saves induction variables and maps them to the /// induction descriptor. using InductionList = MapVector; /// RecurrenceSet contains the phi nodes that are recurrences other than /// inductions and reductions. using RecurrenceSet = SmallPtrSet; /// Returns true if it is legal to vectorize this loop. /// This does not mean that it is profitable to vectorize this /// loop, only that it is legal to do so. bool canVectorize(); /// Returns the primary induction variable. PHINode *getPrimaryInduction() { return PrimaryInduction; } /// Returns the reduction variables found in the loop. ReductionList *getReductionVars() { return &Reductions; } /// Returns the induction variables found in the loop. InductionList *getInductionVars() { return &Inductions; } /// Return the first-order recurrences found in the loop. RecurrenceSet *getFirstOrderRecurrences() { return &FirstOrderRecurrences; } /// Return the set of instructions to sink to handle first-order recurrences. DenseMap &getSinkAfter() { return SinkAfter; } /// Returns the widest induction type. Type *getWidestInductionType() { return WidestIndTy; } /// Returns True if V is a Phi node of an induction variable in this loop. bool isInductionPhi(const Value *V); /// Returns True if V is a cast that is part of an induction def-use chain, /// and had been proven to be redundant under a runtime guard (in other /// words, the cast has the same SCEV expression as the induction phi). bool isCastedInductionVariable(const Value *V); /// Returns True if V can be considered as an induction variable in this /// loop. V can be the induction phi, or some redundant cast in the def-use /// chain of the inducion phi. bool isInductionVariable(const Value *V); /// Returns True if PN is a reduction variable in this loop. bool isReductionVariable(PHINode *PN) { return Reductions.count(PN); } /// Returns True if Phi is a first-order recurrence in this loop. bool isFirstOrderRecurrence(const PHINode *Phi); /// Return true if the block BB needs to be predicated in order for the loop /// to be vectorized. bool blockNeedsPredication(BasicBlock *BB); /// Check if this pointer is consecutive when vectorizing. This happens /// when the last index of the GEP is the induction variable, or that the /// pointer itself is an induction variable. /// This check allows us to vectorize A[idx] into a wide load/store. /// Returns: /// 0 - Stride is unknown or non-consecutive. /// 1 - Address is consecutive. /// -1 - Address is consecutive, and decreasing. /// NOTE: This method must only be used before modifying the original scalar /// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965). int isConsecutivePtr(Value *Ptr); /// Returns true if the value V is uniform within the loop. bool isUniform(Value *V); /// Returns the information that we collected about runtime memory check. const RuntimePointerChecking *getRuntimePointerChecking() const { return LAI->getRuntimePointerChecking(); } const LoopAccessInfo *getLAI() const { return LAI; } /// \brief Check if \p Instr belongs to any interleaved access group. bool isAccessInterleaved(Instruction *Instr) { return InterleaveInfo.isInterleaved(Instr); } /// \brief Get the interleaved access group that \p Instr belongs to. const InterleaveGroup *getInterleavedAccessGroup(Instruction *Instr) { return InterleaveInfo.getInterleaveGroup(Instr); } /// \brief Returns true if an interleaved group requires a scalar iteration /// to handle accesses with gaps. bool requiresScalarEpilogue() const { return InterleaveInfo.requiresScalarEpilogue(); } unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); } uint64_t getMaxSafeRegisterWidth() const { return LAI->getDepChecker().getMaxSafeRegisterWidth(); } bool hasStride(Value *V) { return LAI->hasStride(V); } /// Returns true if the target machine supports masked store operation /// for the given \p DataType and kind of access to \p Ptr. bool isLegalMaskedStore(Type *DataType, Value *Ptr) { return isConsecutivePtr(Ptr) && TTI->isLegalMaskedStore(DataType); } /// Returns true if the target machine supports masked load operation /// for the given \p DataType and kind of access to \p Ptr. bool isLegalMaskedLoad(Type *DataType, Value *Ptr) { return isConsecutivePtr(Ptr) && TTI->isLegalMaskedLoad(DataType); } /// Returns true if the target machine supports masked scatter operation /// for the given \p DataType. bool isLegalMaskedScatter(Type *DataType) { return TTI->isLegalMaskedScatter(DataType); } /// Returns true if the target machine supports masked gather operation /// for the given \p DataType. bool isLegalMaskedGather(Type *DataType) { return TTI->isLegalMaskedGather(DataType); } /// Returns true if the target machine can represent \p V as a masked gather /// or scatter operation. bool isLegalGatherOrScatter(Value *V) { auto *LI = dyn_cast(V); auto *SI = dyn_cast(V); if (!LI && !SI) return false; auto *Ptr = getPointerOperand(V); auto *Ty = cast(Ptr->getType())->getElementType(); return (LI && isLegalMaskedGather(Ty)) || (SI && isLegalMaskedScatter(Ty)); } /// Returns true if vector representation of the instruction \p I /// requires mask. bool isMaskRequired(const Instruction *I) { return (MaskedOp.count(I) != 0); } unsigned getNumStores() const { return LAI->getNumStores(); } unsigned getNumLoads() const { return LAI->getNumLoads(); } unsigned getNumPredStores() const { return NumPredStores; } /// Returns true if \p I is an instruction that will be scalarized with /// predication. Such instructions include conditional stores and /// instructions that may divide by zero. bool isScalarWithPredication(Instruction *I); /// Returns true if \p I is a memory instruction with consecutive memory /// access that can be widened. bool memoryInstructionCanBeWidened(Instruction *I, unsigned VF = 1); // Returns true if the NoNaN attribute is set on the function. bool hasFunNoNaNAttr() const { return HasFunNoNaNAttr; } private: /// Check if a single basic block loop is vectorizable. /// At this point we know that this is a loop with a constant trip count /// and we only need to check individual instructions. bool canVectorizeInstrs(); /// When we vectorize loops we may change the order in which /// we read and write from memory. This method checks if it is /// legal to vectorize the code, considering only memory constrains. /// Returns true if the loop is vectorizable bool canVectorizeMemory(); /// Return true if we can vectorize this loop using the IF-conversion /// transformation. bool canVectorizeWithIfConvert(); /// Return true if all of the instructions in the block can be speculatively /// executed. \p SafePtrs is a list of addresses that are known to be legal /// and we know that we can read from them without segfault. bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl &SafePtrs); /// Updates the vectorization state by adding \p Phi to the inductions list. /// This can set \p Phi as the main induction of the loop if \p Phi is a /// better choice for the main induction than the existing one. void addInductionPhi(PHINode *Phi, const InductionDescriptor &ID, SmallPtrSetImpl &AllowedExit); /// Create an analysis remark that explains why vectorization failed /// /// \p RemarkName is the identifier for the remark. If \p I is passed it is /// an instruction that prevents vectorization. Otherwise the loop is used /// for the location of the remark. \return the remark object that can be /// streamed to. OptimizationRemarkAnalysis createMissedAnalysis(StringRef RemarkName, Instruction *I = nullptr) const { return ::createMissedAnalysis(Hints->vectorizeAnalysisPassName(), RemarkName, TheLoop, I); } /// \brief If an access has a symbolic strides, this maps the pointer value to /// the stride symbol. const ValueToValueMap *getSymbolicStrides() { // FIXME: Currently, the set of symbolic strides is sometimes queried before // it's collected. This happens from canVectorizeWithIfConvert, when the // pointer is checked to reference consecutive elements suitable for a // masked access. return LAI ? &LAI->getSymbolicStrides() : nullptr; } unsigned NumPredStores = 0; /// The loop that we evaluate. Loop *TheLoop; /// A wrapper around ScalarEvolution used to add runtime SCEV checks. /// Applies dynamic knowledge to simplify SCEV expressions in the context /// of existing SCEV assumptions. The analysis will also add a minimal set /// of new predicates if this is required to enable vectorization and /// unrolling. PredicatedScalarEvolution &PSE; /// Target Library Info. TargetLibraryInfo *TLI; /// Target Transform Info const TargetTransformInfo *TTI; /// Dominator Tree. DominatorTree *DT; // LoopAccess analysis. std::function *GetLAA; // And the loop-accesses info corresponding to this loop. This pointer is // null until canVectorizeMemory sets it up. const LoopAccessInfo *LAI = nullptr; /// Interface to emit optimization remarks. OptimizationRemarkEmitter *ORE; /// The interleave access information contains groups of interleaved accesses /// with the same stride and close to each other. InterleavedAccessInfo InterleaveInfo; // --- vectorization state --- // /// Holds the primary induction variable. This is the counter of the /// loop. PHINode *PrimaryInduction = nullptr; /// Holds the reduction variables. ReductionList Reductions; /// Holds all of the induction variables that we found in the loop. /// Notice that inductions don't need to start at zero and that induction /// variables can be pointers. InductionList Inductions; /// Holds all the casts that participate in the update chain of the induction /// variables, and that have been proven to be redundant (possibly under a /// runtime guard). These casts can be ignored when creating the vectorized /// loop body. SmallPtrSet InductionCastsToIgnore; /// Holds the phi nodes that are first-order recurrences. RecurrenceSet FirstOrderRecurrences; /// Holds instructions that need to sink past other instructions to handle /// first-order recurrences. DenseMap SinkAfter; /// Holds the widest induction type encountered. Type *WidestIndTy = nullptr; /// Allowed outside users. This holds the induction and reduction /// vars which can be accessed from outside the loop. SmallPtrSet AllowedExit; /// Can we assume the absence of NaNs. bool HasFunNoNaNAttr = false; /// Vectorization requirements that will go through late-evaluation. LoopVectorizationRequirements *Requirements; /// Used to emit an analysis of any legality issues. LoopVectorizeHints *Hints; + /// The demanded bits analsyis is used to compute the minimum type size in + /// which a reduction can be computed. + DemandedBits *DB; + + /// The assumption cache analysis is used to compute the minimum type size in + /// which a reduction can be computed. + AssumptionCache *AC; + /// While vectorizing these instructions we have to generate a /// call to the appropriate masked intrinsic SmallPtrSet MaskedOp; }; /// LoopVectorizationCostModel - estimates the expected speedups due to /// vectorization. /// In many cases vectorization is not profitable. This can happen because of /// a number of reasons. In this class we mainly attempt to predict the /// expected speedup/slowdowns due to the supported instruction set. We use the /// TargetTransformInfo to query the different backends for the cost of /// different operations. class LoopVectorizationCostModel { public: LoopVectorizationCostModel(Loop *L, PredicatedScalarEvolution &PSE, LoopInfo *LI, LoopVectorizationLegality *Legal, const TargetTransformInfo &TTI, const TargetLibraryInfo *TLI, DemandedBits *DB, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, const Function *F, const LoopVectorizeHints *Hints) : TheLoop(L), PSE(PSE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), DB(DB), AC(AC), ORE(ORE), TheFunction(F), Hints(Hints) {} /// \return An upper bound for the vectorization factor, or None if /// vectorization should be avoided up front. Optional computeMaxVF(bool OptForSize); /// Information about vectorization costs struct VectorizationFactor { // Vector width with best cost unsigned Width; // Cost of the loop with that width unsigned Cost; }; /// \return The most profitable vectorization factor and the cost of that VF. /// This method checks every power of two up to MaxVF. If UserVF is not ZERO /// then this vectorization factor will be selected if vectorization is /// possible. VectorizationFactor selectVectorizationFactor(unsigned MaxVF); /// Setup cost-based decisions for user vectorization factor. void selectUserVectorizationFactor(unsigned UserVF) { collectUniformsAndScalars(UserVF); collectInstsToScalarize(UserVF); } /// \return The size (in bits) of the smallest and widest types in the code /// that needs to be vectorized. We ignore values that remain scalar such as /// 64 bit loop indices. std::pair getSmallestAndWidestTypes(); /// \return The desired interleave count. /// If interleave count has been specified by metadata it will be returned. /// Otherwise, the interleave count is computed and returned. VF and LoopCost /// are the selected vectorization factor and the cost of the selected VF. unsigned selectInterleaveCount(bool OptForSize, unsigned VF, unsigned LoopCost); /// Memory access instruction may be vectorized in more than one way. /// Form of instruction after vectorization depends on cost. /// This function takes cost-based decisions for Load/Store instructions /// and collects them in a map. This decisions map is used for building /// the lists of loop-uniform and loop-scalar instructions. /// The calculated cost is saved with widening decision in order to /// avoid redundant calculations. void setCostBasedWideningDecision(unsigned VF); /// \brief A struct that represents some properties of the register usage /// of a loop. struct RegisterUsage { /// Holds the number of loop invariant values that are used in the loop. unsigned LoopInvariantRegs; /// Holds the maximum number of concurrent live intervals in the loop. unsigned MaxLocalUsers; /// Holds the number of instructions in the loop. unsigned NumInstructions; }; /// \return Returns information about the register usages of the loop for the /// given vectorization factors. SmallVector calculateRegisterUsage(ArrayRef VFs); /// Collect values we want to ignore in the cost model. void collectValuesToIgnore(); /// \returns The smallest bitwidth each instruction can be represented with. /// The vector equivalents of these instructions should be truncated to this /// type. const MapVector &getMinimalBitwidths() const { return MinBWs; } /// \returns True if it is more profitable to scalarize instruction \p I for /// vectorization factor \p VF. bool isProfitableToScalarize(Instruction *I, unsigned VF) const { assert(VF > 1 && "Profitable to scalarize relevant only for VF > 1."); auto Scalars = InstsToScalarize.find(VF); assert(Scalars != InstsToScalarize.end() && "VF not yet analyzed for scalarization profitability"); return Scalars->second.count(I); } /// Returns true if \p I is known to be uniform after vectorization. bool isUniformAfterVectorization(Instruction *I, unsigned VF) const { if (VF == 1) return true; assert(Uniforms.count(VF) && "VF not yet analyzed for uniformity"); auto UniformsPerVF = Uniforms.find(VF); return UniformsPerVF->second.count(I); } /// Returns true if \p I is known to be scalar after vectorization. bool isScalarAfterVectorization(Instruction *I, unsigned VF) const { if (VF == 1) return true; assert(Scalars.count(VF) && "Scalar values are not calculated for VF"); auto ScalarsPerVF = Scalars.find(VF); return ScalarsPerVF->second.count(I); } /// \returns True if instruction \p I can be truncated to a smaller bitwidth /// for vectorization factor \p VF. bool canTruncateToMinimalBitwidth(Instruction *I, unsigned VF) const { return VF > 1 && MinBWs.count(I) && !isProfitableToScalarize(I, VF) && !isScalarAfterVectorization(I, VF); } /// Decision that was taken during cost calculation for memory instruction. enum InstWidening { CM_Unknown, CM_Widen, // For consecutive accesses with stride +1. CM_Widen_Reverse, // For consecutive accesses with stride -1. CM_Interleave, CM_GatherScatter, CM_Scalarize }; /// Save vectorization decision \p W and \p Cost taken by the cost model for /// instruction \p I and vector width \p VF. void setWideningDecision(Instruction *I, unsigned VF, InstWidening W, unsigned Cost) { assert(VF >= 2 && "Expected VF >=2"); WideningDecisions[std::make_pair(I, VF)] = std::make_pair(W, Cost); } /// Save vectorization decision \p W and \p Cost taken by the cost model for /// interleaving group \p Grp and vector width \p VF. void setWideningDecision(const InterleaveGroup *Grp, unsigned VF, InstWidening W, unsigned Cost) { assert(VF >= 2 && "Expected VF >=2"); /// Broadcast this decicion to all instructions inside the group. /// But the cost will be assigned to one instruction only. for (unsigned i = 0; i < Grp->getFactor(); ++i) { if (auto *I = Grp->getMember(i)) { if (Grp->getInsertPos() == I) WideningDecisions[std::make_pair(I, VF)] = std::make_pair(W, Cost); else WideningDecisions[std::make_pair(I, VF)] = std::make_pair(W, 0); } } } /// Return the cost model decision for the given instruction \p I and vector /// width \p VF. Return CM_Unknown if this instruction did not pass /// through the cost modeling. InstWidening getWideningDecision(Instruction *I, unsigned VF) { assert(VF >= 2 && "Expected VF >=2"); std::pair InstOnVF = std::make_pair(I, VF); auto Itr = WideningDecisions.find(InstOnVF); if (Itr == WideningDecisions.end()) return CM_Unknown; return Itr->second.first; } /// Return the vectorization cost for the given instruction \p I and vector /// width \p VF. unsigned getWideningCost(Instruction *I, unsigned VF) { assert(VF >= 2 && "Expected VF >=2"); std::pair InstOnVF = std::make_pair(I, VF); assert(WideningDecisions.count(InstOnVF) && "The cost is not calculated"); return WideningDecisions[InstOnVF].second; } /// Return True if instruction \p I is an optimizable truncate whose operand /// is an induction variable. Such a truncate will be removed by adding a new /// induction variable with the destination type. bool isOptimizableIVTruncate(Instruction *I, unsigned VF) { // If the instruction is not a truncate, return false. auto *Trunc = dyn_cast(I); if (!Trunc) return false; // Get the source and destination types of the truncate. Type *SrcTy = ToVectorTy(cast(I)->getSrcTy(), VF); Type *DestTy = ToVectorTy(cast(I)->getDestTy(), VF); // If the truncate is free for the given types, return false. Replacing a // free truncate with an induction variable would add an induction variable // update instruction to each iteration of the loop. We exclude from this // check the primary induction variable since it will need an update // instruction regardless. Value *Op = Trunc->getOperand(0); if (Op != Legal->getPrimaryInduction() && TTI.isTruncateFree(SrcTy, DestTy)) return false; // If the truncated value is not an induction variable, return false. return Legal->isInductionPhi(Op); } /// Collects the instructions to scalarize for each predicated instruction in /// the loop. void collectInstsToScalarize(unsigned VF); /// Collect Uniform and Scalar values for the given \p VF. /// The sets depend on CM decision for Load/Store instructions /// that may be vectorized as interleave, gather-scatter or scalarized. void collectUniformsAndScalars(unsigned VF) { // Do the analysis once. if (VF == 1 || Uniforms.count(VF)) return; setCostBasedWideningDecision(VF); collectLoopUniforms(VF); collectLoopScalars(VF); } private: /// \return An upper bound for the vectorization factor, larger than zero. /// One is returned if vectorization should best be avoided due to cost. unsigned computeFeasibleMaxVF(bool OptForSize, unsigned ConstTripCount); /// The vectorization cost is a combination of the cost itself and a boolean /// indicating whether any of the contributing operations will actually /// operate on /// vector values after type legalization in the backend. If this latter value /// is /// false, then all operations will be scalarized (i.e. no vectorization has /// actually taken place). using VectorizationCostTy = std::pair; /// Returns the expected execution cost. The unit of the cost does /// not matter because we use the 'cost' units to compare different /// vector widths. The cost that is returned is *not* normalized by /// the factor width. VectorizationCostTy expectedCost(unsigned VF); /// Returns the execution time cost of an instruction for a given vector /// width. Vector width of one means scalar. VectorizationCostTy getInstructionCost(Instruction *I, unsigned VF); /// The cost-computation logic from getInstructionCost which provides /// the vector type as an output parameter. unsigned getInstructionCost(Instruction *I, unsigned VF, Type *&VectorTy); /// Calculate vectorization cost of memory instruction \p I. unsigned getMemoryInstructionCost(Instruction *I, unsigned VF); /// The cost computation for scalarized memory instruction. unsigned getMemInstScalarizationCost(Instruction *I, unsigned VF); /// The cost computation for interleaving group of memory instructions. unsigned getInterleaveGroupCost(Instruction *I, unsigned VF); /// The cost computation for Gather/Scatter instruction. unsigned getGatherScatterCost(Instruction *I, unsigned VF); /// The cost computation for widening instruction \p I with consecutive /// memory access. unsigned getConsecutiveMemOpCost(Instruction *I, unsigned VF); /// The cost calculation for Load instruction \p I with uniform pointer - /// scalar load + broadcast. unsigned getUniformMemOpCost(Instruction *I, unsigned VF); /// Returns whether the instruction is a load or store and will be a emitted /// as a vector operation. bool isConsecutiveLoadOrStore(Instruction *I); /// Create an analysis remark that explains why vectorization failed /// /// \p RemarkName is the identifier for the remark. \return the remark object /// that can be streamed to. OptimizationRemarkAnalysis createMissedAnalysis(StringRef RemarkName) { return ::createMissedAnalysis(Hints->vectorizeAnalysisPassName(), RemarkName, TheLoop); } /// Map of scalar integer values to the smallest bitwidth they can be legally /// represented as. The vector equivalents of these values should be truncated /// to this type. MapVector MinBWs; /// A type representing the costs for instructions if they were to be /// scalarized rather than vectorized. The entries are Instruction-Cost /// pairs. using ScalarCostsTy = DenseMap; /// A set containing all BasicBlocks that are known to present after /// vectorization as a predicated block. SmallPtrSet PredicatedBBsAfterVectorization; /// A map holding scalar costs for different vectorization factors. The /// presence of a cost for an instruction in the mapping indicates that the /// instruction will be scalarized when vectorizing with the associated /// vectorization factor. The entries are VF-ScalarCostTy pairs. DenseMap InstsToScalarize; /// Holds the instructions known to be uniform after vectorization. /// The data is collected per VF. DenseMap> Uniforms; /// Holds the instructions known to be scalar after vectorization. /// The data is collected per VF. DenseMap> Scalars; /// Holds the instructions (address computations) that are forced to be /// scalarized. DenseMap> ForcedScalars; /// Returns the expected difference in cost from scalarizing the expression /// feeding a predicated instruction \p PredInst. The instructions to /// scalarize and their scalar costs are collected in \p ScalarCosts. A /// non-negative return value implies the expression will be scalarized. /// Currently, only single-use chains are considered for scalarization. int computePredInstDiscount(Instruction *PredInst, ScalarCostsTy &ScalarCosts, unsigned VF); /// Collect the instructions that are uniform after vectorization. An /// instruction is uniform if we represent it with a single scalar value in /// the vectorized loop corresponding to each vector iteration. Examples of /// uniform instructions include pointer operands of consecutive or /// interleaved memory accesses. Note that although uniformity implies an /// instruction will be scalar, the reverse is not true. In general, a /// scalarized instruction will be represented by VF scalar values in the /// vectorized loop, each corresponding to an iteration of the original /// scalar loop. void collectLoopUniforms(unsigned VF); /// Collect the instructions that are scalar after vectorization. An /// instruction is scalar if it is known to be uniform or will be scalarized /// during vectorization. Non-uniform scalarized instructions will be /// represented by VF values in the vectorized loop, each corresponding to an /// iteration of the original scalar loop. void collectLoopScalars(unsigned VF); /// Keeps cost model vectorization decision and cost for instructions. /// Right now it is used for memory instructions only. using DecisionList = DenseMap, std::pair>; DecisionList WideningDecisions; public: /// The loop that we evaluate. Loop *TheLoop; /// Predicated scalar evolution analysis. PredicatedScalarEvolution &PSE; /// Loop Info analysis. LoopInfo *LI; /// Vectorization legality. LoopVectorizationLegality *Legal; /// Vector target information. const TargetTransformInfo &TTI; /// Target Library Info. const TargetLibraryInfo *TLI; /// Demanded bits analysis. DemandedBits *DB; /// Assumption cache. AssumptionCache *AC; /// Interface to emit optimization remarks. OptimizationRemarkEmitter *ORE; const Function *TheFunction; /// Loop Vectorize Hint. const LoopVectorizeHints *Hints; /// Values to ignore in the cost model. SmallPtrSet ValuesToIgnore; /// Values to ignore in the cost model when VF > 1. SmallPtrSet VecValuesToIgnore; }; } // end anonymous namespace namespace llvm { /// InnerLoopVectorizer vectorizes loops which contain only one basic /// LoopVectorizationPlanner - drives the vectorization process after having /// passed Legality checks. /// The planner builds and optimizes the Vectorization Plans which record the /// decisions how to vectorize the given loop. In particular, represent the /// control-flow of the vectorized version, the replication of instructions that /// are to be scalarized, and interleave access groups. class LoopVectorizationPlanner { /// The loop that we evaluate. Loop *OrigLoop; /// Loop Info analysis. LoopInfo *LI; /// Target Library Info. const TargetLibraryInfo *TLI; /// Target Transform Info. const TargetTransformInfo *TTI; /// The legality analysis. LoopVectorizationLegality *Legal; /// The profitablity analysis. LoopVectorizationCostModel &CM; using VPlanPtr = std::unique_ptr; SmallVector VPlans; /// This class is used to enable the VPlan to invoke a method of ILV. This is /// needed until the method is refactored out of ILV and becomes reusable. struct VPCallbackILV : public VPCallback { InnerLoopVectorizer &ILV; VPCallbackILV(InnerLoopVectorizer &ILV) : ILV(ILV) {} Value *getOrCreateVectorValues(Value *V, unsigned Part) override { return ILV.getOrCreateVectorValue(V, Part); } }; /// A builder used to construct the current plan. VPBuilder Builder; /// When we if-convert we need to create edge masks. We have to cache values /// so that we don't end up with exponential recursion/IR. Note that /// if-conversion currently takes place during VPlan-construction, so these /// caches are only used at that stage. using EdgeMaskCacheTy = DenseMap, VPValue *>; using BlockMaskCacheTy = DenseMap; EdgeMaskCacheTy EdgeMaskCache; BlockMaskCacheTy BlockMaskCache; unsigned BestVF = 0; unsigned BestUF = 0; public: LoopVectorizationPlanner(Loop *L, LoopInfo *LI, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, LoopVectorizationLegality *Legal, LoopVectorizationCostModel &CM) : OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM) {} /// Plan how to best vectorize, return the best VF and its cost. LoopVectorizationCostModel::VectorizationFactor plan(bool OptForSize, unsigned UserVF); /// Finalize the best decision and dispose of all other VPlans. void setBestPlan(unsigned VF, unsigned UF); /// Generate the IR code for the body of the vectorized loop according to the /// best selected VPlan. void executePlan(InnerLoopVectorizer &LB, DominatorTree *DT); void printPlans(raw_ostream &O) { for (const auto &Plan : VPlans) O << *Plan; } protected: /// Collect the instructions from the original loop that would be trivially /// dead in the vectorized loop if generated. void collectTriviallyDeadInstructions( SmallPtrSetImpl &DeadInstructions); /// A range of powers-of-2 vectorization factors with fixed start and /// adjustable end. The range includes start and excludes end, e.g.,: /// [1, 9) = {1, 2, 4, 8} struct VFRange { // A power of 2. const unsigned Start; // Need not be a power of 2. If End <= Start range is empty. unsigned End; }; /// Test a \p Predicate on a \p Range of VF's. Return the value of applying /// \p Predicate on Range.Start, possibly decreasing Range.End such that the /// returned value holds for the entire \p Range. bool getDecisionAndClampRange(const std::function &Predicate, VFRange &Range); /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive, /// according to the information gathered by Legal when it checked if it is /// legal to vectorize the loop. void buildVPlans(unsigned MinVF, unsigned MaxVF); private: /// A helper function that computes the predicate of the block BB, assuming /// that the header block of the loop is set to True. It returns the *entry* /// mask for the block BB. VPValue *createBlockInMask(BasicBlock *BB, VPlanPtr &Plan); /// A helper function that computes the predicate of the edge between SRC /// and DST. VPValue *createEdgeMask(BasicBlock *Src, BasicBlock *Dst, VPlanPtr &Plan); /// Check if \I belongs to an Interleave Group within the given VF \p Range, /// \return true in the first returned value if so and false otherwise. /// Build a new VPInterleaveGroup Recipe if \I is the primary member of an IG /// for \p Range.Start, and provide it as the second returned value. /// Note that if \I is an adjunct member of an IG for \p Range.Start, the /// \return value is , as it is handled by another recipe. /// \p Range.End may be decreased to ensure same decision from \p Range.Start /// to \p Range.End. VPInterleaveRecipe *tryToInterleaveMemory(Instruction *I, VFRange &Range); // Check if \I is a memory instruction to be widened for \p Range.Start and // potentially masked. Such instructions are handled by a recipe that takes an // additional VPInstruction for the mask. VPWidenMemoryInstructionRecipe *tryToWidenMemory(Instruction *I, VFRange &Range, VPlanPtr &Plan); /// Check if an induction recipe should be constructed for \I within the given /// VF \p Range. If so build and return it. If not, return null. \p Range.End /// may be decreased to ensure same decision from \p Range.Start to /// \p Range.End. VPWidenIntOrFpInductionRecipe *tryToOptimizeInduction(Instruction *I, VFRange &Range); /// Handle non-loop phi nodes. Currently all such phi nodes are turned into /// a sequence of select instructions as the vectorizer currently performs /// full if-conversion. VPBlendRecipe *tryToBlend(Instruction *I, VPlanPtr &Plan); /// Check if \p I can be widened within the given VF \p Range. If \p I can be /// widened for \p Range.Start, check if the last recipe of \p VPBB can be /// extended to include \p I or else build a new VPWidenRecipe for it and /// append it to \p VPBB. Return true if \p I can be widened for Range.Start, /// false otherwise. Range.End may be decreased to ensure same decision from /// \p Range.Start to \p Range.End. bool tryToWiden(Instruction *I, VPBasicBlock *VPBB, VFRange &Range); /// Build a VPReplicationRecipe for \p I and enclose it within a Region if it /// is predicated. \return \p VPBB augmented with this new recipe if \p I is /// not predicated, otherwise \return a new VPBasicBlock that succeeds the new /// Region. Update the packing decision of predicated instructions if they /// feed \p I. Range.End may be decreased to ensure same recipe behavior from /// \p Range.Start to \p Range.End. VPBasicBlock *handleReplication( Instruction *I, VFRange &Range, VPBasicBlock *VPBB, DenseMap &PredInst2Recipe, VPlanPtr &Plan); /// Create a replicating region for instruction \p I that requires /// predication. \p PredRecipe is a VPReplicateRecipe holding \p I. VPRegionBlock *createReplicateRegion(Instruction *I, VPRecipeBase *PredRecipe, VPlanPtr &Plan); /// Build a VPlan according to the information gathered by Legal. \return a /// VPlan for vectorization factors \p Range.Start and up to \p Range.End /// exclusive, possibly decreasing \p Range.End. VPlanPtr buildVPlan(VFRange &Range, const SmallPtrSetImpl &NeedDef); }; } // end namespace llvm namespace { /// \brief This holds vectorization requirements that must be verified late in /// the process. The requirements are set by legalize and costmodel. Once /// vectorization has been determined to be possible and profitable the /// requirements can be verified by looking for metadata or compiler options. /// For example, some loops require FP commutativity which is only allowed if /// vectorization is explicitly specified or if the fast-math compiler option /// has been provided. /// Late evaluation of these requirements allows helpful diagnostics to be /// composed that tells the user what need to be done to vectorize the loop. For /// example, by specifying #pragma clang loop vectorize or -ffast-math. Late /// evaluation should be used only when diagnostics can generated that can be /// followed by a non-expert user. class LoopVectorizationRequirements { public: LoopVectorizationRequirements(OptimizationRemarkEmitter &ORE) : ORE(ORE) {} void addUnsafeAlgebraInst(Instruction *I) { // First unsafe algebra instruction. if (!UnsafeAlgebraInst) UnsafeAlgebraInst = I; } void addRuntimePointerChecks(unsigned Num) { NumRuntimePointerChecks = Num; } bool doesNotMeet(Function *F, Loop *L, const LoopVectorizeHints &Hints) { const char *PassName = Hints.vectorizeAnalysisPassName(); bool Failed = false; if (UnsafeAlgebraInst && !Hints.allowReordering()) { ORE.emit([&]() { return OptimizationRemarkAnalysisFPCommute( PassName, "CantReorderFPOps", UnsafeAlgebraInst->getDebugLoc(), UnsafeAlgebraInst->getParent()) << "loop not vectorized: cannot prove it is safe to reorder " "floating-point operations"; }); Failed = true; } // Test if runtime memcheck thresholds are exceeded. bool PragmaThresholdReached = NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold; bool ThresholdReached = NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold; if ((ThresholdReached && !Hints.allowReordering()) || PragmaThresholdReached) { ORE.emit([&]() { return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps", L->getStartLoc(), L->getHeader()) << "loop not vectorized: cannot prove it is safe to reorder " "memory operations"; }); DEBUG(dbgs() << "LV: Too many memory checks needed.\n"); Failed = true; } return Failed; } private: unsigned NumRuntimePointerChecks = 0; Instruction *UnsafeAlgebraInst = nullptr; /// Interface to emit optimization remarks. OptimizationRemarkEmitter &ORE; }; } // end anonymous namespace static void addAcyclicInnerLoop(Loop &L, SmallVectorImpl &V) { if (L.empty()) { if (!hasCyclesInLoopBody(L)) V.push_back(&L); return; } for (Loop *InnerL : L) addAcyclicInnerLoop(*InnerL, V); } namespace { /// The LoopVectorize Pass. struct LoopVectorize : public FunctionPass { /// Pass identification, replacement for typeid static char ID; LoopVectorizePass Impl; explicit LoopVectorize(bool NoUnrolling = false, bool AlwaysVectorize = true) : FunctionPass(ID) { Impl.DisableUnrolling = NoUnrolling; Impl.AlwaysVectorize = AlwaysVectorize; initializeLoopVectorizePass(*PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override { if (skipFunction(F)) return false; auto *SE = &getAnalysis().getSE(); auto *LI = &getAnalysis().getLoopInfo(); auto *TTI = &getAnalysis().getTTI(F); auto *DT = &getAnalysis().getDomTree(); auto *BFI = &getAnalysis().getBFI(); auto *TLIP = getAnalysisIfAvailable(); auto *TLI = TLIP ? &TLIP->getTLI() : nullptr; auto *AA = &getAnalysis().getAAResults(); auto *AC = &getAnalysis().getAssumptionCache(F); auto *LAA = &getAnalysis(); auto *DB = &getAnalysis().getDemandedBits(); auto *ORE = &getAnalysis().getORE(); std::function GetLAA = [&](Loop &L) -> const LoopAccessInfo & { return LAA->getInfo(&L); }; return Impl.runImpl(F, *SE, *LI, *TTI, *DT, *BFI, TLI, *DB, *AA, *AC, GetLAA, *ORE); } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); } }; } // end anonymous namespace //===----------------------------------------------------------------------===// // Implementation of LoopVectorizationLegality, InnerLoopVectorizer and // LoopVectorizationCostModel and LoopVectorizationPlanner. //===----------------------------------------------------------------------===// Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) { // We need to place the broadcast of invariant variables outside the loop. Instruction *Instr = dyn_cast(V); bool NewInstr = (Instr && Instr->getParent() == LoopVectorBody); bool Invariant = OrigLoop->isLoopInvariant(V) && !NewInstr; // Place the code for broadcasting invariant variables in the new preheader. IRBuilder<>::InsertPointGuard Guard(Builder); if (Invariant) Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator()); // Broadcast the scalar into all locations in the vector. Value *Shuf = Builder.CreateVectorSplat(VF, V, "broadcast"); return Shuf; } void InnerLoopVectorizer::createVectorIntOrFpInductionPHI( const InductionDescriptor &II, Value *Step, Instruction *EntryVal) { Value *Start = II.getStartValue(); // Construct the initial value of the vector IV in the vector loop preheader auto CurrIP = Builder.saveIP(); Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator()); if (isa(EntryVal)) { assert(Start->getType()->isIntegerTy() && "Truncation requires an integer type"); auto *TruncType = cast(EntryVal->getType()); Step = Builder.CreateTrunc(Step, TruncType); Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType); } Value *SplatStart = Builder.CreateVectorSplat(VF, Start); Value *SteppedStart = getStepVector(SplatStart, 0, Step, II.getInductionOpcode()); // We create vector phi nodes for both integer and floating-point induction // variables. Here, we determine the kind of arithmetic we will perform. Instruction::BinaryOps AddOp; Instruction::BinaryOps MulOp; if (Step->getType()->isIntegerTy()) { AddOp = Instruction::Add; MulOp = Instruction::Mul; } else { AddOp = II.getInductionOpcode(); MulOp = Instruction::FMul; } // Multiply the vectorization factor by the step using integer or // floating-point arithmetic as appropriate. Value *ConstVF = getSignedIntOrFpConstant(Step->getType(), VF); Value *Mul = addFastMathFlag(Builder.CreateBinOp(MulOp, Step, ConstVF)); // Create a vector splat to use in the induction update. // // FIXME: If the step is non-constant, we create the vector splat with // IRBuilder. IRBuilder can constant-fold the multiply, but it doesn't // handle a constant vector splat. Value *SplatVF = isa(Mul) ? ConstantVector::getSplat(VF, cast(Mul)) : Builder.CreateVectorSplat(VF, Mul); Builder.restoreIP(CurrIP); // We may need to add the step a number of times, depending on the unroll // factor. The last of those goes into the PHI. PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind", &*LoopVectorBody->getFirstInsertionPt()); Instruction *LastInduction = VecInd; for (unsigned Part = 0; Part < UF; ++Part) { VectorLoopValueMap.setVectorValue(EntryVal, Part, LastInduction); if (isa(EntryVal)) addMetadata(LastInduction, EntryVal); else recordVectorLoopValueForInductionCast(II, LastInduction, Part); LastInduction = cast(addFastMathFlag( Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add"))); } // Move the last step to the end of the latch block. This ensures consistent // placement of all induction updates. auto *LoopVectorLatch = LI->getLoopFor(LoopVectorBody)->getLoopLatch(); auto *Br = cast(LoopVectorLatch->getTerminator()); auto *ICmp = cast(Br->getCondition()); LastInduction->moveBefore(ICmp); LastInduction->setName("vec.ind.next"); VecInd->addIncoming(SteppedStart, LoopVectorPreHeader); VecInd->addIncoming(LastInduction, LoopVectorLatch); } bool InnerLoopVectorizer::shouldScalarizeInstruction(Instruction *I) const { return Cost->isScalarAfterVectorization(I, VF) || Cost->isProfitableToScalarize(I, VF); } bool InnerLoopVectorizer::needsScalarInduction(Instruction *IV) const { if (shouldScalarizeInstruction(IV)) return true; auto isScalarInst = [&](User *U) -> bool { auto *I = cast(U); return (OrigLoop->contains(I) && shouldScalarizeInstruction(I)); }; return llvm::any_of(IV->users(), isScalarInst); } void InnerLoopVectorizer::recordVectorLoopValueForInductionCast( const InductionDescriptor &ID, Value *VectorLoopVal, unsigned Part, unsigned Lane) { const SmallVectorImpl &Casts = ID.getCastInsts(); if (Casts.empty()) return; // Only the first Cast instruction in the Casts vector is of interest. // The rest of the Casts (if exist) have no uses outside the // induction update chain itself. Instruction *CastInst = *Casts.begin(); if (Lane < UINT_MAX) VectorLoopValueMap.setScalarValue(CastInst, {Part, Lane}, VectorLoopVal); else VectorLoopValueMap.setVectorValue(CastInst, Part, VectorLoopVal); } void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, TruncInst *Trunc) { assert((IV->getType()->isIntegerTy() || IV != OldInduction) && "Primary induction variable must have an integer type"); auto II = Legal->getInductionVars()->find(IV); assert(II != Legal->getInductionVars()->end() && "IV is not an induction"); auto ID = II->second; assert(IV->getType() == ID.getStartValue()->getType() && "Types must match"); // The scalar value to broadcast. This will be derived from the canonical // induction variable. Value *ScalarIV = nullptr; // The value from the original loop to which we are mapping the new induction // variable. Instruction *EntryVal = Trunc ? cast(Trunc) : IV; // True if we have vectorized the induction variable. auto VectorizedIV = false; // Determine if we want a scalar version of the induction variable. This is // true if the induction variable itself is not widened, or if it has at // least one user in the loop that is not widened. auto NeedsScalarIV = VF > 1 && needsScalarInduction(EntryVal); // Generate code for the induction step. Note that induction steps are // required to be loop-invariant assert(PSE.getSE()->isLoopInvariant(ID.getStep(), OrigLoop) && "Induction step should be loop invariant"); auto &DL = OrigLoop->getHeader()->getModule()->getDataLayout(); Value *Step = nullptr; if (PSE.getSE()->isSCEVable(IV->getType())) { SCEVExpander Exp(*PSE.getSE(), DL, "induction"); Step = Exp.expandCodeFor(ID.getStep(), ID.getStep()->getType(), LoopVectorPreHeader->getTerminator()); } else { Step = cast(ID.getStep())->getValue(); } // Try to create a new independent vector induction variable. If we can't // create the phi node, we will splat the scalar induction variable in each // loop iteration. if (VF > 1 && !shouldScalarizeInstruction(EntryVal)) { createVectorIntOrFpInductionPHI(ID, Step, EntryVal); VectorizedIV = true; } // If we haven't yet vectorized the induction variable, or if we will create // a scalar one, we need to define the scalar induction variable and step // values. If we were given a truncation type, truncate the canonical // induction variable and step. Otherwise, derive these values from the // induction descriptor. if (!VectorizedIV || NeedsScalarIV) { ScalarIV = Induction; if (IV != OldInduction) { ScalarIV = IV->getType()->isIntegerTy() ? Builder.CreateSExtOrTrunc(Induction, IV->getType()) : Builder.CreateCast(Instruction::SIToFP, Induction, IV->getType()); ScalarIV = ID.transform(Builder, ScalarIV, PSE.getSE(), DL); ScalarIV->setName("offset.idx"); } if (Trunc) { auto *TruncType = cast(Trunc->getType()); assert(Step->getType()->isIntegerTy() && "Truncation requires an integer step"); ScalarIV = Builder.CreateTrunc(ScalarIV, TruncType); Step = Builder.CreateTrunc(Step, TruncType); } } // If we haven't yet vectorized the induction variable, splat the scalar // induction variable, and build the necessary step vectors. // TODO: Don't do it unless the vectorized IV is really required. if (!VectorizedIV) { Value *Broadcasted = getBroadcastInstrs(ScalarIV); for (unsigned Part = 0; Part < UF; ++Part) { Value *EntryPart = getStepVector(Broadcasted, VF * Part, Step, ID.getInductionOpcode()); VectorLoopValueMap.setVectorValue(EntryVal, Part, EntryPart); if (Trunc) addMetadata(EntryPart, Trunc); else recordVectorLoopValueForInductionCast(ID, EntryPart, Part); } } // If an induction variable is only used for counting loop iterations or // calculating addresses, it doesn't need to be widened. Create scalar steps // that can be used by instructions we will later scalarize. Note that the // addition of the scalar steps will not increase the number of instructions // in the loop in the common case prior to InstCombine. We will be trading // one vector extract for each scalar step. if (NeedsScalarIV) buildScalarSteps(ScalarIV, Step, EntryVal, ID); } Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx, Value *Step, Instruction::BinaryOps BinOp) { // Create and check the types. assert(Val->getType()->isVectorTy() && "Must be a vector"); int VLen = Val->getType()->getVectorNumElements(); Type *STy = Val->getType()->getScalarType(); assert((STy->isIntegerTy() || STy->isFloatingPointTy()) && "Induction Step must be an integer or FP"); assert(Step->getType() == STy && "Step has wrong type"); SmallVector Indices; if (STy->isIntegerTy()) { // Create a vector of consecutive numbers from zero to VF. for (int i = 0; i < VLen; ++i) Indices.push_back(ConstantInt::get(STy, StartIdx + i)); // Add the consecutive indices to the vector value. Constant *Cv = ConstantVector::get(Indices); assert(Cv->getType() == Val->getType() && "Invalid consecutive vec"); Step = Builder.CreateVectorSplat(VLen, Step); assert(Step->getType() == Val->getType() && "Invalid step vec"); // FIXME: The newly created binary instructions should contain nsw/nuw flags, // which can be found from the original scalar operations. Step = Builder.CreateMul(Cv, Step); return Builder.CreateAdd(Val, Step, "induction"); } // Floating point induction. assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) && "Binary Opcode should be specified for FP induction"); // Create a vector of consecutive numbers from zero to VF. for (int i = 0; i < VLen; ++i) Indices.push_back(ConstantFP::get(STy, (double)(StartIdx + i))); // Add the consecutive indices to the vector value. Constant *Cv = ConstantVector::get(Indices); Step = Builder.CreateVectorSplat(VLen, Step); // Floating point operations had to be 'fast' to enable the induction. FastMathFlags Flags; Flags.setFast(); Value *MulOp = Builder.CreateFMul(Cv, Step); if (isa(MulOp)) // Have to check, MulOp may be a constant cast(MulOp)->setFastMathFlags(Flags); Value *BOp = Builder.CreateBinOp(BinOp, Val, MulOp, "induction"); if (isa(BOp)) cast(BOp)->setFastMathFlags(Flags); return BOp; } void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step, Value *EntryVal, const InductionDescriptor &ID) { // We shouldn't have to build scalar steps if we aren't vectorizing. assert(VF > 1 && "VF should be greater than one"); // Get the value type and ensure it and the step have the same integer type. Type *ScalarIVTy = ScalarIV->getType()->getScalarType(); assert(ScalarIVTy == Step->getType() && "Val and Step should have the same type"); // We build scalar steps for both integer and floating-point induction // variables. Here, we determine the kind of arithmetic we will perform. Instruction::BinaryOps AddOp; Instruction::BinaryOps MulOp; if (ScalarIVTy->isIntegerTy()) { AddOp = Instruction::Add; MulOp = Instruction::Mul; } else { AddOp = ID.getInductionOpcode(); MulOp = Instruction::FMul; } // Determine the number of scalars we need to generate for each unroll // iteration. If EntryVal is uniform, we only need to generate the first // lane. Otherwise, we generate all VF values. unsigned Lanes = Cost->isUniformAfterVectorization(cast(EntryVal), VF) ? 1 : VF; // Compute the scalar steps and save the results in VectorLoopValueMap. for (unsigned Part = 0; Part < UF; ++Part) { for (unsigned Lane = 0; Lane < Lanes; ++Lane) { auto *StartIdx = getSignedIntOrFpConstant(ScalarIVTy, VF * Part + Lane); auto *Mul = addFastMathFlag(Builder.CreateBinOp(MulOp, StartIdx, Step)); auto *Add = addFastMathFlag(Builder.CreateBinOp(AddOp, ScalarIV, Mul)); VectorLoopValueMap.setScalarValue(EntryVal, {Part, Lane}, Add); recordVectorLoopValueForInductionCast(ID, Add, Part, Lane); } } } int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { const ValueToValueMap &Strides = getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap(); int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, true, false); if (Stride == 1 || Stride == -1) return Stride; return 0; } bool LoopVectorizationLegality::isUniform(Value *V) { return LAI->isUniform(V); } Value *InnerLoopVectorizer::getOrCreateVectorValue(Value *V, unsigned Part) { assert(V != Induction && "The new induction variable should not be used."); assert(!V->getType()->isVectorTy() && "Can't widen a vector"); assert(!V->getType()->isVoidTy() && "Type does not produce a value"); // If we have a stride that is replaced by one, do it here. if (Legal->hasStride(V)) V = ConstantInt::get(V->getType(), 1); // If we have a vector mapped to this value, return it. if (VectorLoopValueMap.hasVectorValue(V, Part)) return VectorLoopValueMap.getVectorValue(V, Part); // If the value has not been vectorized, check if it has been scalarized // instead. If it has been scalarized, and we actually need the value in // vector form, we will construct the vector values on demand. if (VectorLoopValueMap.hasAnyScalarValue(V)) { Value *ScalarValue = VectorLoopValueMap.getScalarValue(V, {Part, 0}); // If we've scalarized a value, that value should be an instruction. auto *I = cast(V); // If we aren't vectorizing, we can just copy the scalar map values over to // the vector map. if (VF == 1) { VectorLoopValueMap.setVectorValue(V, Part, ScalarValue); return ScalarValue; } // Get the last scalar instruction we generated for V and Part. If the value // is known to be uniform after vectorization, this corresponds to lane zero // of the Part unroll iteration. Otherwise, the last instruction is the one // we created for the last vector lane of the Part unroll iteration. unsigned LastLane = Cost->isUniformAfterVectorization(I, VF) ? 0 : VF - 1; auto *LastInst = cast( VectorLoopValueMap.getScalarValue(V, {Part, LastLane})); // Set the insert point after the last scalarized instruction. This ensures // the insertelement sequence will directly follow the scalar definitions. auto OldIP = Builder.saveIP(); auto NewIP = std::next(BasicBlock::iterator(LastInst)); Builder.SetInsertPoint(&*NewIP); // However, if we are vectorizing, we need to construct the vector values. // If the value is known to be uniform after vectorization, we can just // broadcast the scalar value corresponding to lane zero for each unroll // iteration. Otherwise, we construct the vector values using insertelement // instructions. Since the resulting vectors are stored in // VectorLoopValueMap, we will only generate the insertelements once. Value *VectorValue = nullptr; if (Cost->isUniformAfterVectorization(I, VF)) { VectorValue = getBroadcastInstrs(ScalarValue); VectorLoopValueMap.setVectorValue(V, Part, VectorValue); } else { // Initialize packing with insertelements to start from undef. Value *Undef = UndefValue::get(VectorType::get(V->getType(), VF)); VectorLoopValueMap.setVectorValue(V, Part, Undef); for (unsigned Lane = 0; Lane < VF; ++Lane) packScalarIntoVectorValue(V, {Part, Lane}); VectorValue = VectorLoopValueMap.getVectorValue(V, Part); } Builder.restoreIP(OldIP); return VectorValue; } // If this scalar is unknown, assume that it is a constant or that it is // loop invariant. Broadcast V and save the value for future uses. Value *B = getBroadcastInstrs(V); VectorLoopValueMap.setVectorValue(V, Part, B); return B; } Value * InnerLoopVectorizer::getOrCreateScalarValue(Value *V, const VPIteration &Instance) { // If the value is not an instruction contained in the loop, it should // already be scalar. if (OrigLoop->isLoopInvariant(V)) return V; assert(Instance.Lane > 0 ? !Cost->isUniformAfterVectorization(cast(V), VF) : true && "Uniform values only have lane zero"); // If the value from the original loop has not been vectorized, it is // represented by UF x VF scalar values in the new loop. Return the requested // scalar value. if (VectorLoopValueMap.hasScalarValue(V, Instance)) return VectorLoopValueMap.getScalarValue(V, Instance); // If the value has not been scalarized, get its entry in VectorLoopValueMap // for the given unroll part. If this entry is not a vector type (i.e., the // vectorization factor is one), there is no need to generate an // extractelement instruction. auto *U = getOrCreateVectorValue(V, Instance.Part); if (!U->getType()->isVectorTy()) { assert(VF == 1 && "Value not scalarized has non-vector type"); return U; } // Otherwise, the value from the original loop has been vectorized and is // represented by UF vector values. Extract and return the requested scalar // value from the appropriate vector lane. return Builder.CreateExtractElement(U, Builder.getInt32(Instance.Lane)); } void InnerLoopVectorizer::packScalarIntoVectorValue( Value *V, const VPIteration &Instance) { assert(V != Induction && "The new induction variable should not be used."); assert(!V->getType()->isVectorTy() && "Can't pack a vector"); assert(!V->getType()->isVoidTy() && "Type does not produce a value"); Value *ScalarInst = VectorLoopValueMap.getScalarValue(V, Instance); Value *VectorValue = VectorLoopValueMap.getVectorValue(V, Instance.Part); VectorValue = Builder.CreateInsertElement(VectorValue, ScalarInst, Builder.getInt32(Instance.Lane)); VectorLoopValueMap.resetVectorValue(V, Instance.Part, VectorValue); } Value *InnerLoopVectorizer::reverseVector(Value *Vec) { assert(Vec->getType()->isVectorTy() && "Invalid type"); SmallVector ShuffleMask; for (unsigned i = 0; i < VF; ++i) ShuffleMask.push_back(Builder.getInt32(VF - i - 1)); return Builder.CreateShuffleVector(Vec, UndefValue::get(Vec->getType()), ConstantVector::get(ShuffleMask), "reverse"); } // Try to vectorize the interleave group that \p Instr belongs to. // // E.g. Translate following interleaved load group (factor = 3): // for (i = 0; i < N; i+=3) { // R = Pic[i]; // Member of index 0 // G = Pic[i+1]; // Member of index 1 // B = Pic[i+2]; // Member of index 2 // ... // do something to R, G, B // } // To: // %wide.vec = load <12 x i32> ; Read 4 tuples of R,G,B // %R.vec = shuffle %wide.vec, undef, <0, 3, 6, 9> ; R elements // %G.vec = shuffle %wide.vec, undef, <1, 4, 7, 10> ; G elements // %B.vec = shuffle %wide.vec, undef, <2, 5, 8, 11> ; B elements // // Or translate following interleaved store group (factor = 3): // for (i = 0; i < N; i+=3) { // ... do something to R, G, B // Pic[i] = R; // Member of index 0 // Pic[i+1] = G; // Member of index 1 // Pic[i+2] = B; // Member of index 2 // } // To: // %R_G.vec = shuffle %R.vec, %G.vec, <0, 1, 2, ..., 7> // %B_U.vec = shuffle %B.vec, undef, <0, 1, 2, 3, u, u, u, u> // %interleaved.vec = shuffle %R_G.vec, %B_U.vec, // <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> ; Interleave R,G,B elements // store <12 x i32> %interleaved.vec ; Write 4 tuples of R,G,B void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) { const InterleaveGroup *Group = Legal->getInterleavedAccessGroup(Instr); assert(Group && "Fail to get an interleaved access group."); // Skip if current instruction is not the insert position. if (Instr != Group->getInsertPos()) return; const DataLayout &DL = Instr->getModule()->getDataLayout(); Value *Ptr = getPointerOperand(Instr); // Prepare for the vector type of the interleaved load/store. Type *ScalarTy = getMemInstValueType(Instr); unsigned InterleaveFactor = Group->getFactor(); Type *VecTy = VectorType::get(ScalarTy, InterleaveFactor * VF); Type *PtrTy = VecTy->getPointerTo(getMemInstAddressSpace(Instr)); // Prepare for the new pointers. setDebugLocFromInst(Builder, Ptr); SmallVector NewPtrs; unsigned Index = Group->getIndex(Instr); // If the group is reverse, adjust the index to refer to the last vector lane // instead of the first. We adjust the index from the first vector lane, // rather than directly getting the pointer for lane VF - 1, because the // pointer operand of the interleaved access is supposed to be uniform. For // uniform instructions, we're only required to generate a value for the // first vector lane in each unroll iteration. if (Group->isReverse()) Index += (VF - 1) * Group->getFactor(); for (unsigned Part = 0; Part < UF; Part++) { Value *NewPtr = getOrCreateScalarValue(Ptr, {Part, 0}); // Notice current instruction could be any index. Need to adjust the address // to the member of index 0. // // E.g. a = A[i+1]; // Member of index 1 (Current instruction) // b = A[i]; // Member of index 0 // Current pointer is pointed to A[i+1], adjust it to A[i]. // // E.g. A[i+1] = a; // Member of index 1 // A[i] = b; // Member of index 0 // A[i+2] = c; // Member of index 2 (Current instruction) // Current pointer is pointed to A[i+2], adjust it to A[i]. NewPtr = Builder.CreateGEP(NewPtr, Builder.getInt32(-Index)); // Cast to the vector pointer type. NewPtrs.push_back(Builder.CreateBitCast(NewPtr, PtrTy)); } setDebugLocFromInst(Builder, Instr); Value *UndefVec = UndefValue::get(VecTy); // Vectorize the interleaved load group. if (isa(Instr)) { // For each unroll part, create a wide load for the group. SmallVector NewLoads; for (unsigned Part = 0; Part < UF; Part++) { auto *NewLoad = Builder.CreateAlignedLoad( NewPtrs[Part], Group->getAlignment(), "wide.vec"); Group->addMetadata(NewLoad); NewLoads.push_back(NewLoad); } // For each member in the group, shuffle out the appropriate data from the // wide loads. for (unsigned I = 0; I < InterleaveFactor; ++I) { Instruction *Member = Group->getMember(I); // Skip the gaps in the group. if (!Member) continue; Constant *StrideMask = createStrideMask(Builder, I, InterleaveFactor, VF); for (unsigned Part = 0; Part < UF; Part++) { Value *StridedVec = Builder.CreateShuffleVector( NewLoads[Part], UndefVec, StrideMask, "strided.vec"); // If this member has different type, cast the result type. if (Member->getType() != ScalarTy) { VectorType *OtherVTy = VectorType::get(Member->getType(), VF); StridedVec = createBitOrPointerCast(StridedVec, OtherVTy, DL); } if (Group->isReverse()) StridedVec = reverseVector(StridedVec); VectorLoopValueMap.setVectorValue(Member, Part, StridedVec); } } return; } // The sub vector type for current instruction. VectorType *SubVT = VectorType::get(ScalarTy, VF); // Vectorize the interleaved store group. for (unsigned Part = 0; Part < UF; Part++) { // Collect the stored vector from each member. SmallVector StoredVecs; for (unsigned i = 0; i < InterleaveFactor; i++) { // Interleaved store group doesn't allow a gap, so each index has a member Instruction *Member = Group->getMember(i); assert(Member && "Fail to get a member from an interleaved store group"); Value *StoredVec = getOrCreateVectorValue( cast(Member)->getValueOperand(), Part); if (Group->isReverse()) StoredVec = reverseVector(StoredVec); // If this member has different type, cast it to a unified type. if (StoredVec->getType() != SubVT) StoredVec = createBitOrPointerCast(StoredVec, SubVT, DL); StoredVecs.push_back(StoredVec); } // Concatenate all vectors into a wide vector. Value *WideVec = concatenateVectors(Builder, StoredVecs); // Interleave the elements in the wide vector. Constant *IMask = createInterleaveMask(Builder, VF, InterleaveFactor); Value *IVec = Builder.CreateShuffleVector(WideVec, UndefVec, IMask, "interleaved.vec"); Instruction *NewStoreInstr = Builder.CreateAlignedStore(IVec, NewPtrs[Part], Group->getAlignment()); Group->addMetadata(NewStoreInstr); } } void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr, VectorParts *BlockInMask) { // Attempt to issue a wide load. LoadInst *LI = dyn_cast(Instr); StoreInst *SI = dyn_cast(Instr); assert((LI || SI) && "Invalid Load/Store instruction"); LoopVectorizationCostModel::InstWidening Decision = Cost->getWideningDecision(Instr, VF); assert(Decision != LoopVectorizationCostModel::CM_Unknown && "CM decision should be taken at this point"); if (Decision == LoopVectorizationCostModel::CM_Interleave) return vectorizeInterleaveGroup(Instr); Type *ScalarDataTy = getMemInstValueType(Instr); Type *DataTy = VectorType::get(ScalarDataTy, VF); Value *Ptr = getPointerOperand(Instr); unsigned Alignment = getMemInstAlignment(Instr); // An alignment of 0 means target abi alignment. We need to use the scalar's // target abi alignment in such a case. const DataLayout &DL = Instr->getModule()->getDataLayout(); if (!Alignment) Alignment = DL.getABITypeAlignment(ScalarDataTy); unsigned AddressSpace = getMemInstAddressSpace(Instr); // Determine if the pointer operand of the access is either consecutive or // reverse consecutive. bool Reverse = (Decision == LoopVectorizationCostModel::CM_Widen_Reverse); bool ConsecutiveStride = Reverse || (Decision == LoopVectorizationCostModel::CM_Widen); bool CreateGatherScatter = (Decision == LoopVectorizationCostModel::CM_GatherScatter); // Either Ptr feeds a vector load/store, or a vector GEP should feed a vector // gather/scatter. Otherwise Decision should have been to Scalarize. assert((ConsecutiveStride || CreateGatherScatter) && "The instruction should be scalarized"); // Handle consecutive loads/stores. if (ConsecutiveStride) Ptr = getOrCreateScalarValue(Ptr, {0, 0}); VectorParts Mask; bool isMaskRequired = BlockInMask; if (isMaskRequired) Mask = *BlockInMask; // Handle Stores: if (SI) { assert(!Legal->isUniform(SI->getPointerOperand()) && "We do not allow storing to uniform addresses"); setDebugLocFromInst(Builder, SI); for (unsigned Part = 0; Part < UF; ++Part) { Instruction *NewSI = nullptr; Value *StoredVal = getOrCreateVectorValue(SI->getValueOperand(), Part); if (CreateGatherScatter) { Value *MaskPart = isMaskRequired ? Mask[Part] : nullptr; Value *VectorGep = getOrCreateVectorValue(Ptr, Part); NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment, MaskPart); } else { // Calculate the pointer for the specific unroll-part. Value *PartPtr = Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(Part * VF)); if (Reverse) { // If we store to reverse consecutive memory locations, then we need // to reverse the order of elements in the stored value. StoredVal = reverseVector(StoredVal); // We don't want to update the value in the map as it might be used in // another expression. So don't call resetVectorValue(StoredVal). // If the address is consecutive but reversed, then the // wide store needs to start at the last vector element. PartPtr = Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(-Part * VF)); PartPtr = Builder.CreateGEP(nullptr, PartPtr, Builder.getInt32(1 - VF)); if (isMaskRequired) // Reverse of a null all-one mask is a null mask. Mask[Part] = reverseVector(Mask[Part]); } Value *VecPtr = Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace)); if (isMaskRequired) NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment, Mask[Part]); else NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment); } addMetadata(NewSI, SI); } return; } // Handle loads. assert(LI && "Must have a load instruction"); setDebugLocFromInst(Builder, LI); for (unsigned Part = 0; Part < UF; ++Part) { Value *NewLI; if (CreateGatherScatter) { Value *MaskPart = isMaskRequired ? Mask[Part] : nullptr; Value *VectorGep = getOrCreateVectorValue(Ptr, Part); NewLI = Builder.CreateMaskedGather(VectorGep, Alignment, MaskPart, nullptr, "wide.masked.gather"); addMetadata(NewLI, LI); } else { // Calculate the pointer for the specific unroll-part. Value *PartPtr = Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(Part * VF)); if (Reverse) { // If the address is consecutive but reversed, then the // wide load needs to start at the last vector element. PartPtr = Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(-Part * VF)); PartPtr = Builder.CreateGEP(nullptr, PartPtr, Builder.getInt32(1 - VF)); if (isMaskRequired) // Reverse of a null all-one mask is a null mask. Mask[Part] = reverseVector(Mask[Part]); } Value *VecPtr = Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace)); if (isMaskRequired) NewLI = Builder.CreateMaskedLoad(VecPtr, Alignment, Mask[Part], UndefValue::get(DataTy), "wide.masked.load"); else NewLI = Builder.CreateAlignedLoad(VecPtr, Alignment, "wide.load"); // Add metadata to the load, but setVectorValue to the reverse shuffle. addMetadata(NewLI, LI); if (Reverse) NewLI = reverseVector(NewLI); } VectorLoopValueMap.setVectorValue(Instr, Part, NewLI); } } void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, const VPIteration &Instance, bool IfPredicateInstr) { assert(!Instr->getType()->isAggregateType() && "Can't handle vectors"); setDebugLocFromInst(Builder, Instr); // Does this instruction return a value ? bool IsVoidRetTy = Instr->getType()->isVoidTy(); Instruction *Cloned = Instr->clone(); if (!IsVoidRetTy) Cloned->setName(Instr->getName() + ".cloned"); // Replace the operands of the cloned instructions with their scalar // equivalents in the new loop. for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) { auto *NewOp = getOrCreateScalarValue(Instr->getOperand(op), Instance); Cloned->setOperand(op, NewOp); } addNewMetadata(Cloned, Instr); // Place the cloned scalar in the new loop. Builder.Insert(Cloned); // Add the cloned scalar to the scalar map entry. VectorLoopValueMap.setScalarValue(Instr, Instance, Cloned); // If we just cloned a new assumption, add it the assumption cache. if (auto *II = dyn_cast(Cloned)) if (II->getIntrinsicID() == Intrinsic::assume) AC->registerAssumption(II); // End if-block. if (IfPredicateInstr) PredicatedInstructions.push_back(Cloned); } PHINode *InnerLoopVectorizer::createInductionVariable(Loop *L, Value *Start, Value *End, Value *Step, Instruction *DL) { BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); // As we're just creating this loop, it's possible no latch exists // yet. If so, use the header as this will be a single block loop. if (!Latch) Latch = Header; IRBuilder<> Builder(&*Header->getFirstInsertionPt()); Instruction *OldInst = getDebugLocFromInstOrOperands(OldInduction); setDebugLocFromInst(Builder, OldInst); auto *Induction = Builder.CreatePHI(Start->getType(), 2, "index"); Builder.SetInsertPoint(Latch->getTerminator()); setDebugLocFromInst(Builder, OldInst); // Create i+1 and fill the PHINode. Value *Next = Builder.CreateAdd(Induction, Step, "index.next"); Induction->addIncoming(Start, L->getLoopPreheader()); Induction->addIncoming(Next, Latch); // Create the compare. Value *ICmp = Builder.CreateICmpEQ(Next, End); Builder.CreateCondBr(ICmp, L->getExitBlock(), Header); // Now we have two terminators. Remove the old one from the block. Latch->getTerminator()->eraseFromParent(); return Induction; } Value *InnerLoopVectorizer::getOrCreateTripCount(Loop *L) { if (TripCount) return TripCount; IRBuilder<> Builder(L->getLoopPreheader()->getTerminator()); // Find the loop boundaries. ScalarEvolution *SE = PSE.getSE(); const SCEV *BackedgeTakenCount = PSE.getBackedgeTakenCount(); assert(BackedgeTakenCount != SE->getCouldNotCompute() && "Invalid loop count"); Type *IdxTy = Legal->getWidestInductionType(); // The exit count might have the type of i64 while the phi is i32. This can // happen if we have an induction variable that is sign extended before the // compare. The only way that we get a backedge taken count is that the // induction variable was signed and as such will not overflow. In such a case // truncation is legal. if (BackedgeTakenCount->getType()->getPrimitiveSizeInBits() > IdxTy->getPrimitiveSizeInBits()) BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount, IdxTy); BackedgeTakenCount = SE->getNoopOrZeroExtend(BackedgeTakenCount, IdxTy); // Get the total trip count from the count by adding 1. const SCEV *ExitCount = SE->getAddExpr( BackedgeTakenCount, SE->getOne(BackedgeTakenCount->getType())); const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); // Expand the trip count and place the new instructions in the preheader. // Notice that the pre-header does not change, only the loop body. SCEVExpander Exp(*SE, DL, "induction"); // Count holds the overall loop count (N). TripCount = Exp.expandCodeFor(ExitCount, ExitCount->getType(), L->getLoopPreheader()->getTerminator()); if (TripCount->getType()->isPointerTy()) TripCount = CastInst::CreatePointerCast(TripCount, IdxTy, "exitcount.ptrcnt.to.int", L->getLoopPreheader()->getTerminator()); return TripCount; } Value *InnerLoopVectorizer::getOrCreateVectorTripCount(Loop *L) { if (VectorTripCount) return VectorTripCount; Value *TC = getOrCreateTripCount(L); IRBuilder<> Builder(L->getLoopPreheader()->getTerminator()); // Now we need to generate the expression for the part of the loop that the // vectorized body will execute. This is equal to N - (N % Step) if scalar // iterations are not required for correctness, or N - Step, otherwise. Step // is equal to the vectorization factor (number of SIMD elements) times the // unroll factor (number of SIMD instructions). Constant *Step = ConstantInt::get(TC->getType(), VF * UF); Value *R = Builder.CreateURem(TC, Step, "n.mod.vf"); // If there is a non-reversed interleaved group that may speculatively access // memory out-of-bounds, we need to ensure that there will be at least one // iteration of the scalar epilogue loop. Thus, if the step evenly divides // the trip count, we set the remainder to be equal to the step. If the step // does not evenly divide the trip count, no adjustment is necessary since // there will already be scalar iterations. Note that the minimum iterations // check ensures that N >= Step. if (VF > 1 && Legal->requiresScalarEpilogue()) { auto *IsZero = Builder.CreateICmpEQ(R, ConstantInt::get(R->getType(), 0)); R = Builder.CreateSelect(IsZero, Step, R); } VectorTripCount = Builder.CreateSub(TC, R, "n.vec"); return VectorTripCount; } Value *InnerLoopVectorizer::createBitOrPointerCast(Value *V, VectorType *DstVTy, const DataLayout &DL) { // Verify that V is a vector type with same number of elements as DstVTy. unsigned VF = DstVTy->getNumElements(); VectorType *SrcVecTy = cast(V->getType()); assert((VF == SrcVecTy->getNumElements()) && "Vector dimensions do not match"); Type *SrcElemTy = SrcVecTy->getElementType(); Type *DstElemTy = DstVTy->getElementType(); assert((DL.getTypeSizeInBits(SrcElemTy) == DL.getTypeSizeInBits(DstElemTy)) && "Vector elements must have same size"); // Do a direct cast if element types are castable. if (CastInst::isBitOrNoopPointerCastable(SrcElemTy, DstElemTy, DL)) { return Builder.CreateBitOrPointerCast(V, DstVTy); } // V cannot be directly casted to desired vector type. // May happen when V is a floating point vector but DstVTy is a vector of // pointers or vice-versa. Handle this using a two-step bitcast using an // intermediate Integer type for the bitcast i.e. Ptr <-> Int <-> Float. assert((DstElemTy->isPointerTy() != SrcElemTy->isPointerTy()) && "Only one type should be a pointer type"); assert((DstElemTy->isFloatingPointTy() != SrcElemTy->isFloatingPointTy()) && "Only one type should be a floating point type"); Type *IntTy = IntegerType::getIntNTy(V->getContext(), DL.getTypeSizeInBits(SrcElemTy)); VectorType *VecIntTy = VectorType::get(IntTy, VF); Value *CastVal = Builder.CreateBitOrPointerCast(V, VecIntTy); return Builder.CreateBitOrPointerCast(CastVal, DstVTy); } void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L, BasicBlock *Bypass) { Value *Count = getOrCreateTripCount(L); BasicBlock *BB = L->getLoopPreheader(); IRBuilder<> Builder(BB->getTerminator()); // Generate code to check if the loop's trip count is less than VF * UF, or // equal to it in case a scalar epilogue is required; this implies that the // vector trip count is zero. This check also covers the case where adding one // to the backedge-taken count overflowed leading to an incorrect trip count // of zero. In this case we will also jump to the scalar loop. auto P = Legal->requiresScalarEpilogue() ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT; Value *CheckMinIters = Builder.CreateICmp( P, Count, ConstantInt::get(Count->getType(), VF * UF), "min.iters.check"); BasicBlock *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph"); // Update dominator tree immediately if the generated block is a // LoopBypassBlock because SCEV expansions to generate loop bypass // checks may query it before the current function is finished. DT->addNewBlock(NewBB, BB); if (L->getParentLoop()) L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI); ReplaceInstWithInst(BB->getTerminator(), BranchInst::Create(Bypass, NewBB, CheckMinIters)); LoopBypassBlocks.push_back(BB); } void InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) { BasicBlock *BB = L->getLoopPreheader(); // Generate the code to check that the SCEV assumptions that we made. // We want the new basic block to start at the first instruction in a // sequence of instructions that form a check. SCEVExpander Exp(*PSE.getSE(), Bypass->getModule()->getDataLayout(), "scev.check"); Value *SCEVCheck = Exp.expandCodeForPredicate(&PSE.getUnionPredicate(), BB->getTerminator()); if (auto *C = dyn_cast(SCEVCheck)) if (C->isZero()) return; // Create a new block containing the stride check. BB->setName("vector.scevcheck"); auto *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph"); // Update dominator tree immediately if the generated block is a // LoopBypassBlock because SCEV expansions to generate loop bypass // checks may query it before the current function is finished. DT->addNewBlock(NewBB, BB); if (L->getParentLoop()) L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI); ReplaceInstWithInst(BB->getTerminator(), BranchInst::Create(Bypass, NewBB, SCEVCheck)); LoopBypassBlocks.push_back(BB); AddedSafetyChecks = true; } void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass) { BasicBlock *BB = L->getLoopPreheader(); // Generate the code that checks in runtime if arrays overlap. We put the // checks into a separate block to make the more common case of few elements // faster. Instruction *FirstCheckInst; Instruction *MemRuntimeCheck; std::tie(FirstCheckInst, MemRuntimeCheck) = Legal->getLAI()->addRuntimeChecks(BB->getTerminator()); if (!MemRuntimeCheck) return; // Create a new block containing the memory check. BB->setName("vector.memcheck"); auto *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph"); // Update dominator tree immediately if the generated block is a // LoopBypassBlock because SCEV expansions to generate loop bypass // checks may query it before the current function is finished. DT->addNewBlock(NewBB, BB); if (L->getParentLoop()) L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI); ReplaceInstWithInst(BB->getTerminator(), BranchInst::Create(Bypass, NewBB, MemRuntimeCheck)); LoopBypassBlocks.push_back(BB); AddedSafetyChecks = true; // We currently don't use LoopVersioning for the actual loop cloning but we // still use it to add the noalias metadata. LVer = llvm::make_unique(*Legal->getLAI(), OrigLoop, LI, DT, PSE.getSE()); LVer->prepareNoAliasMetadata(); } BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() { /* In this function we generate a new loop. The new loop will contain the vectorized instructions while the old loop will continue to run the scalar remainder. [ ] <-- loop iteration number check. / | / v | [ ] <-- vector loop bypass (may consist of multiple blocks). | / | | / v || [ ] <-- vector pre header. |/ | | v | [ ] \ | [ ]_| <-- vector loop. | | | v | -[ ] <--- middle-block. | / | | / v -|- >[ ] <--- new preheader. | | | v | [ ] \ | [ ]_| <-- old scalar loop to handle remainder. \ | \ v >[ ] <-- exit block. ... */ BasicBlock *OldBasicBlock = OrigLoop->getHeader(); BasicBlock *VectorPH = OrigLoop->getLoopPreheader(); BasicBlock *ExitBlock = OrigLoop->getExitBlock(); assert(VectorPH && "Invalid loop structure"); assert(ExitBlock && "Must have an exit block"); // Some loops have a single integer induction variable, while other loops // don't. One example is c++ iterators that often have multiple pointer // induction variables. In the code below we also support a case where we // don't have a single induction variable. // // We try to obtain an induction variable from the original loop as hard // as possible. However if we don't find one that: // - is an integer // - counts from zero, stepping by one // - is the size of the widest induction variable type // then we create a new one. OldInduction = Legal->getPrimaryInduction(); Type *IdxTy = Legal->getWidestInductionType(); // Split the single block loop into the two loop structure described above. BasicBlock *VecBody = VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.body"); BasicBlock *MiddleBlock = VecBody->splitBasicBlock(VecBody->getTerminator(), "middle.block"); BasicBlock *ScalarPH = MiddleBlock->splitBasicBlock(MiddleBlock->getTerminator(), "scalar.ph"); // Create and register the new vector loop. Loop *Lp = LI->AllocateLoop(); Loop *ParentLoop = OrigLoop->getParentLoop(); // Insert the new loop into the loop nest and register the new basic blocks // before calling any utilities such as SCEV that require valid LoopInfo. if (ParentLoop) { ParentLoop->addChildLoop(Lp); ParentLoop->addBasicBlockToLoop(ScalarPH, *LI); ParentLoop->addBasicBlockToLoop(MiddleBlock, *LI); } else { LI->addTopLevelLoop(Lp); } Lp->addBasicBlockToLoop(VecBody, *LI); // Find the loop boundaries. Value *Count = getOrCreateTripCount(Lp); Value *StartIdx = ConstantInt::get(IdxTy, 0); // Now, compare the new count to zero. If it is zero skip the vector loop and // jump to the scalar loop. This check also covers the case where the // backedge-taken count is uint##_max: adding one to it will overflow leading // to an incorrect trip count of zero. In this (rare) case we will also jump // to the scalar loop. emitMinimumIterationCountCheck(Lp, ScalarPH); // Generate the code to check any assumptions that we've made for SCEV // expressions. emitSCEVChecks(Lp, ScalarPH); // Generate the code that checks in runtime if arrays overlap. We put the // checks into a separate block to make the more common case of few elements // faster. emitMemRuntimeChecks(Lp, ScalarPH); // Generate the induction variable. // The loop step is equal to the vectorization factor (num of SIMD elements) // times the unroll factor (num of SIMD instructions). Value *CountRoundDown = getOrCreateVectorTripCount(Lp); Constant *Step = ConstantInt::get(IdxTy, VF * UF); Induction = createInductionVariable(Lp, StartIdx, CountRoundDown, Step, getDebugLocFromInstOrOperands(OldInduction)); // We are going to resume the execution of the scalar loop. // Go over all of the induction variables that we found and fix the // PHIs that are left in the scalar version of the loop. // The starting values of PHI nodes depend on the counter of the last // iteration in the vectorized loop. // If we come from a bypass edge then we need to start from the original // start value. // This variable saves the new starting index for the scalar loop. It is used // to test if there are any tail iterations left once the vector loop has // completed. LoopVectorizationLegality::InductionList *List = Legal->getInductionVars(); for (auto &InductionEntry : *List) { PHINode *OrigPhi = InductionEntry.first; InductionDescriptor II = InductionEntry.second; // Create phi nodes to merge from the backedge-taken check block. PHINode *BCResumeVal = PHINode::Create( OrigPhi->getType(), 3, "bc.resume.val", ScalarPH->getTerminator()); Value *&EndValue = IVEndValues[OrigPhi]; if (OrigPhi == OldInduction) { // We know what the end value is. EndValue = CountRoundDown; } else { IRBuilder<> B(Lp->getLoopPreheader()->getTerminator()); Type *StepType = II.getStep()->getType(); Instruction::CastOps CastOp = CastInst::getCastOpcode(CountRoundDown, true, StepType, true); Value *CRD = B.CreateCast(CastOp, CountRoundDown, StepType, "cast.crd"); const DataLayout &DL = OrigLoop->getHeader()->getModule()->getDataLayout(); EndValue = II.transform(B, CRD, PSE.getSE(), DL); EndValue->setName("ind.end"); } // The new PHI merges the original incoming value, in case of a bypass, // or the value at the end of the vectorized loop. BCResumeVal->addIncoming(EndValue, MiddleBlock); // Fix the scalar body counter (PHI node). unsigned BlockIdx = OrigPhi->getBasicBlockIndex(ScalarPH); // The old induction's phi node in the scalar body needs the truncated // value. for (BasicBlock *BB : LoopBypassBlocks) BCResumeVal->addIncoming(II.getStartValue(), BB); OrigPhi->setIncomingValue(BlockIdx, BCResumeVal); } // Add a check in the middle block to see if we have completed // all of the iterations in the first vector loop. // If (N - N%VF) == N, then we *don't* need to run the remainder. Value *CmpN = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, Count, CountRoundDown, "cmp.n", MiddleBlock->getTerminator()); ReplaceInstWithInst(MiddleBlock->getTerminator(), BranchInst::Create(ExitBlock, ScalarPH, CmpN)); // Get ready to start creating new instructions into the vectorized body. Builder.SetInsertPoint(&*VecBody->getFirstInsertionPt()); // Save the state. LoopVectorPreHeader = Lp->getLoopPreheader(); LoopScalarPreHeader = ScalarPH; LoopMiddleBlock = MiddleBlock; LoopExitBlock = ExitBlock; LoopVectorBody = VecBody; LoopScalarBody = OldBasicBlock; // Keep all loop hints from the original loop on the vector loop (we'll // replace the vectorizer-specific hints below). if (MDNode *LID = OrigLoop->getLoopID()) Lp->setLoopID(LID); LoopVectorizeHints Hints(Lp, true, *ORE); Hints.setAlreadyVectorized(); return LoopVectorPreHeader; } // Fix up external users of the induction variable. At this point, we are // in LCSSA form, with all external PHIs that use the IV having one input value, // coming from the remainder loop. We need those PHIs to also have a correct // value for the IV when arriving directly from the middle block. void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II, Value *CountRoundDown, Value *EndValue, BasicBlock *MiddleBlock) { // There are two kinds of external IV usages - those that use the value // computed in the last iteration (the PHI) and those that use the penultimate // value (the value that feeds into the phi from the loop latch). // We allow both, but they, obviously, have different values. assert(OrigLoop->getExitBlock() && "Expected a single exit block"); DenseMap MissingVals; // An external user of the last iteration's value should see the value that // the remainder loop uses to initialize its own IV. Value *PostInc = OrigPhi->getIncomingValueForBlock(OrigLoop->getLoopLatch()); for (User *U : PostInc->users()) { Instruction *UI = cast(U); if (!OrigLoop->contains(UI)) { assert(isa(UI) && "Expected LCSSA form"); MissingVals[UI] = EndValue; } } // An external user of the penultimate value need to see EndValue - Step. // The simplest way to get this is to recompute it from the constituent SCEVs, // that is Start + (Step * (CRD - 1)). for (User *U : OrigPhi->users()) { auto *UI = cast(U); if (!OrigLoop->contains(UI)) { const DataLayout &DL = OrigLoop->getHeader()->getModule()->getDataLayout(); assert(isa(UI) && "Expected LCSSA form"); IRBuilder<> B(MiddleBlock->getTerminator()); Value *CountMinusOne = B.CreateSub( CountRoundDown, ConstantInt::get(CountRoundDown->getType(), 1)); Value *CMO = !II.getStep()->getType()->isIntegerTy() ? B.CreateCast(Instruction::SIToFP, CountMinusOne, II.getStep()->getType()) : B.CreateSExtOrTrunc(CountMinusOne, II.getStep()->getType()); CMO->setName("cast.cmo"); Value *Escape = II.transform(B, CMO, PSE.getSE(), DL); Escape->setName("ind.escape"); MissingVals[UI] = Escape; } } for (auto &I : MissingVals) { PHINode *PHI = cast(I.first); // One corner case we have to handle is two IVs "chasing" each-other, // that is %IV2 = phi [...], [ %IV1, %latch ] // In this case, if IV1 has an external use, we need to avoid adding both // "last value of IV1" and "penultimate value of IV2". So, verify that we // don't already have an incoming value for the middle block. if (PHI->getBasicBlockIndex(MiddleBlock) == -1) PHI->addIncoming(I.second, MiddleBlock); } } namespace { struct CSEDenseMapInfo { static bool canHandle(const Instruction *I) { return isa(I) || isa(I) || isa(I) || isa(I); } static inline Instruction *getEmptyKey() { return DenseMapInfo::getEmptyKey(); } static inline Instruction *getTombstoneKey() { return DenseMapInfo::getTombstoneKey(); } static unsigned getHashValue(const Instruction *I) { assert(canHandle(I) && "Unknown instruction!"); return hash_combine(I->getOpcode(), hash_combine_range(I->value_op_begin(), I->value_op_end())); } static bool isEqual(const Instruction *LHS, const Instruction *RHS) { if (LHS == getEmptyKey() || RHS == getEmptyKey() || LHS == getTombstoneKey() || RHS == getTombstoneKey()) return LHS == RHS; return LHS->isIdenticalTo(RHS); } }; } // end anonymous namespace ///\brief Perform cse of induction variable instructions. static void cse(BasicBlock *BB) { // Perform simple cse. SmallDenseMap CSEMap; for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) { Instruction *In = &*I++; if (!CSEDenseMapInfo::canHandle(In)) continue; // Check if we can replace this instruction with any of the // visited instructions. if (Instruction *V = CSEMap.lookup(In)) { In->replaceAllUsesWith(V); In->eraseFromParent(); continue; } CSEMap[In] = In; } } /// \brief Estimate the overhead of scalarizing an instruction. This is a /// convenience wrapper for the type-based getScalarizationOverhead API. static unsigned getScalarizationOverhead(Instruction *I, unsigned VF, const TargetTransformInfo &TTI) { if (VF == 1) return 0; unsigned Cost = 0; Type *RetTy = ToVectorTy(I->getType(), VF); if (!RetTy->isVoidTy() && (!isa(I) || !TTI.supportsEfficientVectorElementLoadStore())) Cost += TTI.getScalarizationOverhead(RetTy, true, false); if (CallInst *CI = dyn_cast(I)) { SmallVector Operands(CI->arg_operands()); Cost += TTI.getOperandsScalarizationOverhead(Operands, VF); } else if (!isa(I) || !TTI.supportsEfficientVectorElementLoadStore()) { SmallVector Operands(I->operand_values()); Cost += TTI.getOperandsScalarizationOverhead(Operands, VF); } return Cost; } // Estimate cost of a call instruction CI if it were vectorized with factor VF. // Return the cost of the instruction, including scalarization overhead if it's // needed. The flag NeedToScalarize shows if the call needs to be scalarized - // i.e. either vector version isn't available, or is too expensive. static unsigned getVectorCallCost(CallInst *CI, unsigned VF, const TargetTransformInfo &TTI, const TargetLibraryInfo *TLI, bool &NeedToScalarize) { Function *F = CI->getCalledFunction(); StringRef FnName = CI->getCalledFunction()->getName(); Type *ScalarRetTy = CI->getType(); SmallVector Tys, ScalarTys; for (auto &ArgOp : CI->arg_operands()) ScalarTys.push_back(ArgOp->getType()); // Estimate cost of scalarized vector call. The source operands are assumed // to be vectors, so we need to extract individual elements from there, // execute VF scalar calls, and then gather the result into the vector return // value. unsigned ScalarCallCost = TTI.getCallInstrCost(F, ScalarRetTy, ScalarTys); if (VF == 1) return ScalarCallCost; // Compute corresponding vector type for return value and arguments. Type *RetTy = ToVectorTy(ScalarRetTy, VF); for (Type *ScalarTy : ScalarTys) Tys.push_back(ToVectorTy(ScalarTy, VF)); // Compute costs of unpacking argument values for the scalar calls and // packing the return values to a vector. unsigned ScalarizationCost = getScalarizationOverhead(CI, VF, TTI); unsigned Cost = ScalarCallCost * VF + ScalarizationCost; // If we can't emit a vector call for this function, then the currently found // cost is the cost we need to return. NeedToScalarize = true; if (!TLI || !TLI->isFunctionVectorizable(FnName, VF) || CI->isNoBuiltin()) return Cost; // If the corresponding vector cost is cheaper, return its cost. unsigned VectorCallCost = TTI.getCallInstrCost(nullptr, RetTy, Tys); if (VectorCallCost < Cost) { NeedToScalarize = false; return VectorCallCost; } return Cost; } // Estimate cost of an intrinsic call instruction CI if it were vectorized with // factor VF. Return the cost of the instruction, including scalarization // overhead if it's needed. static unsigned getVectorIntrinsicCost(CallInst *CI, unsigned VF, const TargetTransformInfo &TTI, const TargetLibraryInfo *TLI) { Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); assert(ID && "Expected intrinsic call!"); FastMathFlags FMF; if (auto *FPMO = dyn_cast(CI)) FMF = FPMO->getFastMathFlags(); SmallVector Operands(CI->arg_operands()); return TTI.getIntrinsicInstrCost(ID, CI->getType(), Operands, FMF, VF); } static Type *smallestIntegerVectorType(Type *T1, Type *T2) { auto *I1 = cast(T1->getVectorElementType()); auto *I2 = cast(T2->getVectorElementType()); return I1->getBitWidth() < I2->getBitWidth() ? T1 : T2; } static Type *largestIntegerVectorType(Type *T1, Type *T2) { auto *I1 = cast(T1->getVectorElementType()); auto *I2 = cast(T2->getVectorElementType()); return I1->getBitWidth() > I2->getBitWidth() ? T1 : T2; } void InnerLoopVectorizer::truncateToMinimalBitwidths() { // For every instruction `I` in MinBWs, truncate the operands, create a // truncated version of `I` and reextend its result. InstCombine runs // later and will remove any ext/trunc pairs. SmallPtrSet Erased; for (const auto &KV : Cost->getMinimalBitwidths()) { // If the value wasn't vectorized, we must maintain the original scalar // type. The absence of the value from VectorLoopValueMap indicates that it // wasn't vectorized. if (!VectorLoopValueMap.hasAnyVectorValue(KV.first)) continue; for (unsigned Part = 0; Part < UF; ++Part) { Value *I = getOrCreateVectorValue(KV.first, Part); if (Erased.count(I) || I->use_empty() || !isa(I)) continue; Type *OriginalTy = I->getType(); Type *ScalarTruncatedTy = IntegerType::get(OriginalTy->getContext(), KV.second); Type *TruncatedTy = VectorType::get(ScalarTruncatedTy, OriginalTy->getVectorNumElements()); if (TruncatedTy == OriginalTy) continue; IRBuilder<> B(cast(I)); auto ShrinkOperand = [&](Value *V) -> Value * { if (auto *ZI = dyn_cast(V)) if (ZI->getSrcTy() == TruncatedTy) return ZI->getOperand(0); return B.CreateZExtOrTrunc(V, TruncatedTy); }; // The actual instruction modification depends on the instruction type, // unfortunately. Value *NewI = nullptr; if (auto *BO = dyn_cast(I)) { NewI = B.CreateBinOp(BO->getOpcode(), ShrinkOperand(BO->getOperand(0)), ShrinkOperand(BO->getOperand(1))); // Any wrapping introduced by shrinking this operation shouldn't be // considered undefined behavior. So, we can't unconditionally copy // arithmetic wrapping flags to NewI. cast(NewI)->copyIRFlags(I, /*IncludeWrapFlags=*/false); } else if (auto *CI = dyn_cast(I)) { NewI = B.CreateICmp(CI->getPredicate(), ShrinkOperand(CI->getOperand(0)), ShrinkOperand(CI->getOperand(1))); } else if (auto *SI = dyn_cast(I)) { NewI = B.CreateSelect(SI->getCondition(), ShrinkOperand(SI->getTrueValue()), ShrinkOperand(SI->getFalseValue())); } else if (auto *CI = dyn_cast(I)) { switch (CI->getOpcode()) { default: llvm_unreachable("Unhandled cast!"); case Instruction::Trunc: NewI = ShrinkOperand(CI->getOperand(0)); break; case Instruction::SExt: NewI = B.CreateSExtOrTrunc( CI->getOperand(0), smallestIntegerVectorType(OriginalTy, TruncatedTy)); break; case Instruction::ZExt: NewI = B.CreateZExtOrTrunc( CI->getOperand(0), smallestIntegerVectorType(OriginalTy, TruncatedTy)); break; } } else if (auto *SI = dyn_cast(I)) { auto Elements0 = SI->getOperand(0)->getType()->getVectorNumElements(); auto *O0 = B.CreateZExtOrTrunc( SI->getOperand(0), VectorType::get(ScalarTruncatedTy, Elements0)); auto Elements1 = SI->getOperand(1)->getType()->getVectorNumElements(); auto *O1 = B.CreateZExtOrTrunc( SI->getOperand(1), VectorType::get(ScalarTruncatedTy, Elements1)); NewI = B.CreateShuffleVector(O0, O1, SI->getMask()); } else if (isa(I)) { // Don't do anything with the operands, just extend the result. continue; } else if (auto *IE = dyn_cast(I)) { auto Elements = IE->getOperand(0)->getType()->getVectorNumElements(); auto *O0 = B.CreateZExtOrTrunc( IE->getOperand(0), VectorType::get(ScalarTruncatedTy, Elements)); auto *O1 = B.CreateZExtOrTrunc(IE->getOperand(1), ScalarTruncatedTy); NewI = B.CreateInsertElement(O0, O1, IE->getOperand(2)); } else if (auto *EE = dyn_cast(I)) { auto Elements = EE->getOperand(0)->getType()->getVectorNumElements(); auto *O0 = B.CreateZExtOrTrunc( EE->getOperand(0), VectorType::get(ScalarTruncatedTy, Elements)); NewI = B.CreateExtractElement(O0, EE->getOperand(2)); } else { llvm_unreachable("Unhandled instruction type!"); } // Lastly, extend the result. NewI->takeName(cast(I)); Value *Res = B.CreateZExtOrTrunc(NewI, OriginalTy); I->replaceAllUsesWith(Res); cast(I)->eraseFromParent(); Erased.insert(I); VectorLoopValueMap.resetVectorValue(KV.first, Part, Res); } } // We'll have created a bunch of ZExts that are now parentless. Clean up. for (const auto &KV : Cost->getMinimalBitwidths()) { // If the value wasn't vectorized, we must maintain the original scalar // type. The absence of the value from VectorLoopValueMap indicates that it // wasn't vectorized. if (!VectorLoopValueMap.hasAnyVectorValue(KV.first)) continue; for (unsigned Part = 0; Part < UF; ++Part) { Value *I = getOrCreateVectorValue(KV.first, Part); ZExtInst *Inst = dyn_cast(I); if (Inst && Inst->use_empty()) { Value *NewI = Inst->getOperand(0); Inst->eraseFromParent(); VectorLoopValueMap.resetVectorValue(KV.first, Part, NewI); } } } } void InnerLoopVectorizer::fixVectorizedLoop() { // Insert truncates and extends for any truncated instructions as hints to // InstCombine. if (VF > 1) truncateToMinimalBitwidths(); // At this point every instruction in the original loop is widened to a // vector form. Now we need to fix the recurrences in the loop. These PHI // nodes are currently empty because we did not want to introduce cycles. // This is the second stage of vectorizing recurrences. fixCrossIterationPHIs(); // Update the dominator tree. // // FIXME: After creating the structure of the new loop, the dominator tree is // no longer up-to-date, and it remains that way until we update it // here. An out-of-date dominator tree is problematic for SCEV, // because SCEVExpander uses it to guide code generation. The // vectorizer use SCEVExpanders in several places. Instead, we should // keep the dominator tree up-to-date as we go. updateAnalysis(); // Fix-up external users of the induction variables. for (auto &Entry : *Legal->getInductionVars()) fixupIVUsers(Entry.first, Entry.second, getOrCreateVectorTripCount(LI->getLoopFor(LoopVectorBody)), IVEndValues[Entry.first], LoopMiddleBlock); fixLCSSAPHIs(); for (Instruction *PI : PredicatedInstructions) sinkScalarOperands(&*PI); // Remove redundant induction instructions. cse(LoopVectorBody); } void InnerLoopVectorizer::fixCrossIterationPHIs() { // In order to support recurrences we need to be able to vectorize Phi nodes. // Phi nodes have cycles, so we need to vectorize them in two stages. This is // stage #2: We now need to fix the recurrences by adding incoming edges to // the currently empty PHI nodes. At this point every instruction in the // original loop is widened to a vector form so we can use them to construct // the incoming edges. for (PHINode &Phi : OrigLoop->getHeader()->phis()) { // Handle first-order recurrences and reductions that need to be fixed. if (Legal->isFirstOrderRecurrence(&Phi)) fixFirstOrderRecurrence(&Phi); else if (Legal->isReductionVariable(&Phi)) fixReduction(&Phi); } } void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) { // This is the second phase of vectorizing first-order recurrences. An // overview of the transformation is described below. Suppose we have the // following loop. // // for (int i = 0; i < n; ++i) // b[i] = a[i] - a[i - 1]; // // There is a first-order recurrence on "a". For this loop, the shorthand // scalar IR looks like: // // scalar.ph: // s_init = a[-1] // br scalar.body // // scalar.body: // i = phi [0, scalar.ph], [i+1, scalar.body] // s1 = phi [s_init, scalar.ph], [s2, scalar.body] // s2 = a[i] // b[i] = s2 - s1 // br cond, scalar.body, ... // // In this example, s1 is a recurrence because it's value depends on the // previous iteration. In the first phase of vectorization, we created a // temporary value for s1. We now complete the vectorization and produce the // shorthand vector IR shown below (for VF = 4, UF = 1). // // vector.ph: // v_init = vector(..., ..., ..., a[-1]) // br vector.body // // vector.body // i = phi [0, vector.ph], [i+4, vector.body] // v1 = phi [v_init, vector.ph], [v2, vector.body] // v2 = a[i, i+1, i+2, i+3]; // v3 = vector(v1(3), v2(0, 1, 2)) // b[i, i+1, i+2, i+3] = v2 - v3 // br cond, vector.body, middle.block // // middle.block: // x = v2(3) // br scalar.ph // // scalar.ph: // s_init = phi [x, middle.block], [a[-1], otherwise] // br scalar.body // // After execution completes the vector loop, we extract the next value of // the recurrence (x) to use as the initial value in the scalar loop. // Get the original loop preheader and single loop latch. auto *Preheader = OrigLoop->getLoopPreheader(); auto *Latch = OrigLoop->getLoopLatch(); // Get the initial and previous values of the scalar recurrence. auto *ScalarInit = Phi->getIncomingValueForBlock(Preheader); auto *Previous = Phi->getIncomingValueForBlock(Latch); // Create a vector from the initial value. auto *VectorInit = ScalarInit; if (VF > 1) { Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator()); VectorInit = Builder.CreateInsertElement( UndefValue::get(VectorType::get(VectorInit->getType(), VF)), VectorInit, Builder.getInt32(VF - 1), "vector.recur.init"); } // We constructed a temporary phi node in the first phase of vectorization. // This phi node will eventually be deleted. Builder.SetInsertPoint( cast(VectorLoopValueMap.getVectorValue(Phi, 0))); // Create a phi node for the new recurrence. The current value will either be // the initial value inserted into a vector or loop-varying vector value. auto *VecPhi = Builder.CreatePHI(VectorInit->getType(), 2, "vector.recur"); VecPhi->addIncoming(VectorInit, LoopVectorPreHeader); // Get the vectorized previous value of the last part UF - 1. It appears last // among all unrolled iterations, due to the order of their construction. Value *PreviousLastPart = getOrCreateVectorValue(Previous, UF - 1); // Set the insertion point after the previous value if it is an instruction. // Note that the previous value may have been constant-folded so it is not // guaranteed to be an instruction in the vector loop. Also, if the previous // value is a phi node, we should insert after all the phi nodes to avoid // breaking basic block verification. if (LI->getLoopFor(LoopVectorBody)->isLoopInvariant(PreviousLastPart) || isa(PreviousLastPart)) Builder.SetInsertPoint(&*LoopVectorBody->getFirstInsertionPt()); else Builder.SetInsertPoint( &*++BasicBlock::iterator(cast(PreviousLastPart))); // We will construct a vector for the recurrence by combining the values for // the current and previous iterations. This is the required shuffle mask. SmallVector ShuffleMask(VF); ShuffleMask[0] = Builder.getInt32(VF - 1); for (unsigned I = 1; I < VF; ++I) ShuffleMask[I] = Builder.getInt32(I + VF - 1); // The vector from which to take the initial value for the current iteration // (actual or unrolled). Initially, this is the vector phi node. Value *Incoming = VecPhi; // Shuffle the current and previous vector and update the vector parts. for (unsigned Part = 0; Part < UF; ++Part) { Value *PreviousPart = getOrCreateVectorValue(Previous, Part); Value *PhiPart = VectorLoopValueMap.getVectorValue(Phi, Part); auto *Shuffle = VF > 1 ? Builder.CreateShuffleVector(Incoming, PreviousPart, ConstantVector::get(ShuffleMask)) : Incoming; PhiPart->replaceAllUsesWith(Shuffle); cast(PhiPart)->eraseFromParent(); VectorLoopValueMap.resetVectorValue(Phi, Part, Shuffle); Incoming = PreviousPart; } // Fix the latch value of the new recurrence in the vector loop. VecPhi->addIncoming(Incoming, LI->getLoopFor(LoopVectorBody)->getLoopLatch()); // Extract the last vector element in the middle block. This will be the // initial value for the recurrence when jumping to the scalar loop. auto *ExtractForScalar = Incoming; if (VF > 1) { Builder.SetInsertPoint(LoopMiddleBlock->getTerminator()); ExtractForScalar = Builder.CreateExtractElement( ExtractForScalar, Builder.getInt32(VF - 1), "vector.recur.extract"); } // Extract the second last element in the middle block if the // Phi is used outside the loop. We need to extract the phi itself // and not the last element (the phi update in the current iteration). This // will be the value when jumping to the exit block from the LoopMiddleBlock, // when the scalar loop is not run at all. Value *ExtractForPhiUsedOutsideLoop = nullptr; if (VF > 1) ExtractForPhiUsedOutsideLoop = Builder.CreateExtractElement( Incoming, Builder.getInt32(VF - 2), "vector.recur.extract.for.phi"); // When loop is unrolled without vectorizing, initialize // ExtractForPhiUsedOutsideLoop with the value just prior to unrolled value of // `Incoming`. This is analogous to the vectorized case above: extracting the // second last element when VF > 1. else if (UF > 1) ExtractForPhiUsedOutsideLoop = getOrCreateVectorValue(Previous, UF - 2); // Fix the initial value of the original recurrence in the scalar loop. Builder.SetInsertPoint(&*LoopScalarPreHeader->begin()); auto *Start = Builder.CreatePHI(Phi->getType(), 2, "scalar.recur.init"); for (auto *BB : predecessors(LoopScalarPreHeader)) { auto *Incoming = BB == LoopMiddleBlock ? ExtractForScalar : ScalarInit; Start->addIncoming(Incoming, BB); } Phi->setIncomingValue(Phi->getBasicBlockIndex(LoopScalarPreHeader), Start); Phi->setName("scalar.recur"); // Finally, fix users of the recurrence outside the loop. The users will need // either the last value of the scalar recurrence or the last value of the // vector recurrence we extracted in the middle block. Since the loop is in // LCSSA form, we just need to find the phi node for the original scalar // recurrence in the exit block, and then add an edge for the middle block. for (PHINode &LCSSAPhi : LoopExitBlock->phis()) { if (LCSSAPhi.getIncomingValue(0) == Phi) { LCSSAPhi.addIncoming(ExtractForPhiUsedOutsideLoop, LoopMiddleBlock); break; } } } void InnerLoopVectorizer::fixReduction(PHINode *Phi) { Constant *Zero = Builder.getInt32(0); // Get it's reduction variable descriptor. assert(Legal->isReductionVariable(Phi) && "Unable to find the reduction variable"); RecurrenceDescriptor RdxDesc = (*Legal->getReductionVars())[Phi]; RecurrenceDescriptor::RecurrenceKind RK = RdxDesc.getRecurrenceKind(); TrackingVH ReductionStartValue = RdxDesc.getRecurrenceStartValue(); Instruction *LoopExitInst = RdxDesc.getLoopExitInstr(); RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind = RdxDesc.getMinMaxRecurrenceKind(); setDebugLocFromInst(Builder, ReductionStartValue); // We need to generate a reduction vector from the incoming scalar. // To do so, we need to generate the 'identity' vector and override // one of the elements with the incoming scalar reduction. We need // to do it in the vector-loop preheader. Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator()); // This is the vector-clone of the value that leaves the loop. Type *VecTy = getOrCreateVectorValue(LoopExitInst, 0)->getType(); // Find the reduction identity variable. Zero for addition, or, xor, // one for multiplication, -1 for And. Value *Identity; Value *VectorStart; if (RK == RecurrenceDescriptor::RK_IntegerMinMax || RK == RecurrenceDescriptor::RK_FloatMinMax) { // MinMax reduction have the start value as their identify. if (VF == 1) { VectorStart = Identity = ReductionStartValue; } else { VectorStart = Identity = Builder.CreateVectorSplat(VF, ReductionStartValue, "minmax.ident"); } } else { // Handle other reduction kinds: Constant *Iden = RecurrenceDescriptor::getRecurrenceIdentity( RK, VecTy->getScalarType()); if (VF == 1) { Identity = Iden; // This vector is the Identity vector where the first element is the // incoming scalar reduction. VectorStart = ReductionStartValue; } else { Identity = ConstantVector::getSplat(VF, Iden); // This vector is the Identity vector where the first element is the // incoming scalar reduction. VectorStart = Builder.CreateInsertElement(Identity, ReductionStartValue, Zero); } } // Fix the vector-loop phi. // Reductions do not have to start at zero. They can start with // any loop invariant values. BasicBlock *Latch = OrigLoop->getLoopLatch(); Value *LoopVal = Phi->getIncomingValueForBlock(Latch); for (unsigned Part = 0; Part < UF; ++Part) { Value *VecRdxPhi = getOrCreateVectorValue(Phi, Part); Value *Val = getOrCreateVectorValue(LoopVal, Part); // Make sure to add the reduction stat value only to the // first unroll part. Value *StartVal = (Part == 0) ? VectorStart : Identity; cast(VecRdxPhi)->addIncoming(StartVal, LoopVectorPreHeader); cast(VecRdxPhi) ->addIncoming(Val, LI->getLoopFor(LoopVectorBody)->getLoopLatch()); } // Before each round, move the insertion point right between // the PHIs and the values we are going to write. // This allows us to write both PHINodes and the extractelement // instructions. Builder.SetInsertPoint(&*LoopMiddleBlock->getFirstInsertionPt()); setDebugLocFromInst(Builder, LoopExitInst); // If the vector reduction can be performed in a smaller type, we truncate // then extend the loop exit value to enable InstCombine to evaluate the // entire expression in the smaller type. if (VF > 1 && Phi->getType() != RdxDesc.getRecurrenceType()) { Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), VF); Builder.SetInsertPoint( LI->getLoopFor(LoopVectorBody)->getLoopLatch()->getTerminator()); VectorParts RdxParts(UF); for (unsigned Part = 0; Part < UF; ++Part) { RdxParts[Part] = VectorLoopValueMap.getVectorValue(LoopExitInst, Part); Value *Trunc = Builder.CreateTrunc(RdxParts[Part], RdxVecTy); Value *Extnd = RdxDesc.isSigned() ? Builder.CreateSExt(Trunc, VecTy) : Builder.CreateZExt(Trunc, VecTy); for (Value::user_iterator UI = RdxParts[Part]->user_begin(); UI != RdxParts[Part]->user_end();) if (*UI != Trunc) { (*UI++)->replaceUsesOfWith(RdxParts[Part], Extnd); RdxParts[Part] = Extnd; } else { ++UI; } } Builder.SetInsertPoint(&*LoopMiddleBlock->getFirstInsertionPt()); for (unsigned Part = 0; Part < UF; ++Part) { RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy); VectorLoopValueMap.resetVectorValue(LoopExitInst, Part, RdxParts[Part]); } } // Reduce all of the unrolled parts into a single vector. Value *ReducedPartRdx = VectorLoopValueMap.getVectorValue(LoopExitInst, 0); unsigned Op = RecurrenceDescriptor::getRecurrenceBinOp(RK); setDebugLocFromInst(Builder, ReducedPartRdx); for (unsigned Part = 1; Part < UF; ++Part) { Value *RdxPart = VectorLoopValueMap.getVectorValue(LoopExitInst, Part); if (Op != Instruction::ICmp && Op != Instruction::FCmp) // Floating point operations had to be 'fast' to enable the reduction. ReducedPartRdx = addFastMathFlag( Builder.CreateBinOp((Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx")); else ReducedPartRdx = RecurrenceDescriptor::createMinMaxOp( Builder, MinMaxKind, ReducedPartRdx, RdxPart); } if (VF > 1) { bool NoNaN = Legal->hasFunNoNaNAttr(); ReducedPartRdx = createTargetReduction(Builder, TTI, RdxDesc, ReducedPartRdx, NoNaN); // If the reduction can be performed in a smaller type, we need to extend // the reduction to the wider type before we branch to the original loop. if (Phi->getType() != RdxDesc.getRecurrenceType()) ReducedPartRdx = RdxDesc.isSigned() ? Builder.CreateSExt(ReducedPartRdx, Phi->getType()) : Builder.CreateZExt(ReducedPartRdx, Phi->getType()); } // Create a phi node that merges control-flow from the backedge-taken check // block and the middle block. PHINode *BCBlockPhi = PHINode::Create(Phi->getType(), 2, "bc.merge.rdx", LoopScalarPreHeader->getTerminator()); for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I) BCBlockPhi->addIncoming(ReductionStartValue, LoopBypassBlocks[I]); BCBlockPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock); // Now, we need to fix the users of the reduction variable // inside and outside of the scalar remainder loop. // We know that the loop is in LCSSA form. We need to update the // PHI nodes in the exit blocks. for (PHINode &LCSSAPhi : LoopExitBlock->phis()) { // All PHINodes need to have a single entry edge, or two if // we already fixed them. assert(LCSSAPhi.getNumIncomingValues() < 3 && "Invalid LCSSA PHI"); // We found a reduction value exit-PHI. Update it with the // incoming bypass edge. if (LCSSAPhi.getIncomingValue(0) == LoopExitInst) LCSSAPhi.addIncoming(ReducedPartRdx, LoopMiddleBlock); } // end of the LCSSA phi scan. // Fix the scalar loop reduction variable with the incoming reduction sum // from the vector body and from the backedge value. int IncomingEdgeBlockIdx = Phi->getBasicBlockIndex(OrigLoop->getLoopLatch()); assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index"); // Pick the other block. int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1); Phi->setIncomingValue(SelfEdgeBlockIdx, BCBlockPhi); Phi->setIncomingValue(IncomingEdgeBlockIdx, LoopExitInst); } void InnerLoopVectorizer::fixLCSSAPHIs() { for (PHINode &LCSSAPhi : LoopExitBlock->phis()) { if (LCSSAPhi.getNumIncomingValues() == 1) { assert(OrigLoop->isLoopInvariant(LCSSAPhi.getIncomingValue(0)) && "Incoming value isn't loop invariant"); LCSSAPhi.addIncoming(LCSSAPhi.getIncomingValue(0), LoopMiddleBlock); } } } void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) { // The basic block and loop containing the predicated instruction. auto *PredBB = PredInst->getParent(); auto *VectorLoop = LI->getLoopFor(PredBB); // Initialize a worklist with the operands of the predicated instruction. SetVector Worklist(PredInst->op_begin(), PredInst->op_end()); // Holds instructions that we need to analyze again. An instruction may be // reanalyzed if we don't yet know if we can sink it or not. SmallVector InstsToReanalyze; // Returns true if a given use occurs in the predicated block. Phi nodes use // their operands in their corresponding predecessor blocks. auto isBlockOfUsePredicated = [&](Use &U) -> bool { auto *I = cast(U.getUser()); BasicBlock *BB = I->getParent(); if (auto *Phi = dyn_cast(I)) BB = Phi->getIncomingBlock( PHINode::getIncomingValueNumForOperand(U.getOperandNo())); return BB == PredBB; }; // Iteratively sink the scalarized operands of the predicated instruction // into the block we created for it. When an instruction is sunk, it's // operands are then added to the worklist. The algorithm ends after one pass // through the worklist doesn't sink a single instruction. bool Changed; do { // Add the instructions that need to be reanalyzed to the worklist, and // reset the changed indicator. Worklist.insert(InstsToReanalyze.begin(), InstsToReanalyze.end()); InstsToReanalyze.clear(); Changed = false; while (!Worklist.empty()) { auto *I = dyn_cast(Worklist.pop_back_val()); // We can't sink an instruction if it is a phi node, is already in the // predicated block, is not in the loop, or may have side effects. if (!I || isa(I) || I->getParent() == PredBB || !VectorLoop->contains(I) || I->mayHaveSideEffects()) continue; // It's legal to sink the instruction if all its uses occur in the // predicated block. Otherwise, there's nothing to do yet, and we may // need to reanalyze the instruction. if (!llvm::all_of(I->uses(), isBlockOfUsePredicated)) { InstsToReanalyze.push_back(I); continue; } // Move the instruction to the beginning of the predicated block, and add // it's operands to the worklist. I->moveBefore(&*PredBB->getFirstInsertionPt()); Worklist.insert(I->op_begin(), I->op_end()); // The sinking may have enabled other instructions to be sunk, so we will // need to iterate. Changed = true; } } while (Changed); } void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF, unsigned VF) { assert(PN->getParent() == OrigLoop->getHeader() && "Non-header phis should have been handled elsewhere"); PHINode *P = cast(PN); // In order to support recurrences we need to be able to vectorize Phi nodes. // Phi nodes have cycles, so we need to vectorize them in two stages. This is // stage #1: We create a new vector PHI node with no incoming edges. We'll use // this value when we vectorize all of the instructions that use the PHI. if (Legal->isReductionVariable(P) || Legal->isFirstOrderRecurrence(P)) { for (unsigned Part = 0; Part < UF; ++Part) { // This is phase one of vectorizing PHIs. Type *VecTy = (VF == 1) ? PN->getType() : VectorType::get(PN->getType(), VF); Value *EntryPart = PHINode::Create( VecTy, 2, "vec.phi", &*LoopVectorBody->getFirstInsertionPt()); VectorLoopValueMap.setVectorValue(P, Part, EntryPart); } return; } setDebugLocFromInst(Builder, P); // This PHINode must be an induction variable. // Make sure that we know about it. assert(Legal->getInductionVars()->count(P) && "Not an induction variable"); InductionDescriptor II = Legal->getInductionVars()->lookup(P); const DataLayout &DL = OrigLoop->getHeader()->getModule()->getDataLayout(); // FIXME: The newly created binary instructions should contain nsw/nuw flags, // which can be found from the original scalar operations. switch (II.getKind()) { case InductionDescriptor::IK_NoInduction: llvm_unreachable("Unknown induction"); case InductionDescriptor::IK_IntInduction: case InductionDescriptor::IK_FpInduction: llvm_unreachable("Integer/fp induction is handled elsewhere."); case InductionDescriptor::IK_PtrInduction: { // Handle the pointer induction variable case. assert(P->getType()->isPointerTy() && "Unexpected type."); // This is the normalized GEP that starts counting at zero. Value *PtrInd = Induction; PtrInd = Builder.CreateSExtOrTrunc(PtrInd, II.getStep()->getType()); // Determine the number of scalars we need to generate for each unroll // iteration. If the instruction is uniform, we only need to generate the // first lane. Otherwise, we generate all VF values. unsigned Lanes = Cost->isUniformAfterVectorization(P, VF) ? 1 : VF; // These are the scalar results. Notice that we don't generate vector GEPs // because scalar GEPs result in better code. for (unsigned Part = 0; Part < UF; ++Part) { for (unsigned Lane = 0; Lane < Lanes; ++Lane) { Constant *Idx = ConstantInt::get(PtrInd->getType(), Lane + Part * VF); Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx); Value *SclrGep = II.transform(Builder, GlobalIdx, PSE.getSE(), DL); SclrGep->setName("next.gep"); VectorLoopValueMap.setScalarValue(P, {Part, Lane}, SclrGep); } } return; } } } /// A helper function for checking whether an integer division-related /// instruction may divide by zero (in which case it must be predicated if /// executed conditionally in the scalar code). /// TODO: It may be worthwhile to generalize and check isKnownNonZero(). /// Non-zero divisors that are non compile-time constants will not be /// converted into multiplication, so we will still end up scalarizing /// the division, but can do so w/o predication. static bool mayDivideByZero(Instruction &I) { assert((I.getOpcode() == Instruction::UDiv || I.getOpcode() == Instruction::SDiv || I.getOpcode() == Instruction::URem || I.getOpcode() == Instruction::SRem) && "Unexpected instruction"); Value *Divisor = I.getOperand(1); auto *CInt = dyn_cast(Divisor); return !CInt || CInt->isZero(); } void InnerLoopVectorizer::widenInstruction(Instruction &I) { switch (I.getOpcode()) { case Instruction::Br: case Instruction::PHI: llvm_unreachable("This instruction is handled by a different recipe."); case Instruction::GetElementPtr: { // Construct a vector GEP by widening the operands of the scalar GEP as // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP // results in a vector of pointers when at least one operand of the GEP // is vector-typed. Thus, to keep the representation compact, we only use // vector-typed operands for loop-varying values. auto *GEP = cast(&I); if (VF > 1 && OrigLoop->hasLoopInvariantOperands(GEP)) { // If we are vectorizing, but the GEP has only loop-invariant operands, // the GEP we build (by only using vector-typed operands for // loop-varying values) would be a scalar pointer. Thus, to ensure we // produce a vector of pointers, we need to either arbitrarily pick an // operand to broadcast, or broadcast a clone of the original GEP. // Here, we broadcast a clone of the original. // // TODO: If at some point we decide to scalarize instructions having // loop-invariant operands, this special case will no longer be // required. We would add the scalarization decision to // collectLoopScalars() and teach getVectorValue() to broadcast // the lane-zero scalar value. auto *Clone = Builder.Insert(GEP->clone()); for (unsigned Part = 0; Part < UF; ++Part) { Value *EntryPart = Builder.CreateVectorSplat(VF, Clone); VectorLoopValueMap.setVectorValue(&I, Part, EntryPart); addMetadata(EntryPart, GEP); } } else { // If the GEP has at least one loop-varying operand, we are sure to // produce a vector of pointers. But if we are only unrolling, we want // to produce a scalar GEP for each unroll part. Thus, the GEP we // produce with the code below will be scalar (if VF == 1) or vector // (otherwise). Note that for the unroll-only case, we still maintain // values in the vector mapping with initVector, as we do for other // instructions. for (unsigned Part = 0; Part < UF; ++Part) { // The pointer operand of the new GEP. If it's loop-invariant, we // won't broadcast it. auto *Ptr = OrigLoop->isLoopInvariant(GEP->getPointerOperand()) ? GEP->getPointerOperand() : getOrCreateVectorValue(GEP->getPointerOperand(), Part); // Collect all the indices for the new GEP. If any index is // loop-invariant, we won't broadcast it. SmallVector Indices; for (auto &U : make_range(GEP->idx_begin(), GEP->idx_end())) { if (OrigLoop->isLoopInvariant(U.get())) Indices.push_back(U.get()); else Indices.push_back(getOrCreateVectorValue(U.get(), Part)); } // Create the new GEP. Note that this GEP may be a scalar if VF == 1, // but it should be a vector, otherwise. auto *NewGEP = GEP->isInBounds() ? Builder.CreateInBoundsGEP(Ptr, Indices) : Builder.CreateGEP(Ptr, Indices); assert((VF == 1 || NewGEP->getType()->isVectorTy()) && "NewGEP is not a pointer vector"); VectorLoopValueMap.setVectorValue(&I, Part, NewGEP); addMetadata(NewGEP, GEP); } } break; } case Instruction::UDiv: case Instruction::SDiv: case Instruction::SRem: case Instruction::URem: case Instruction::Add: case Instruction::FAdd: case Instruction::Sub: case Instruction::FSub: case Instruction::Mul: case Instruction::FMul: case Instruction::FDiv: case Instruction::FRem: case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: case Instruction::And: case Instruction::Or: case Instruction::Xor: { // Just widen binops. auto *BinOp = cast(&I); setDebugLocFromInst(Builder, BinOp); for (unsigned Part = 0; Part < UF; ++Part) { Value *A = getOrCreateVectorValue(BinOp->getOperand(0), Part); Value *B = getOrCreateVectorValue(BinOp->getOperand(1), Part); Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A, B); if (BinaryOperator *VecOp = dyn_cast(V)) VecOp->copyIRFlags(BinOp); // Use this vector value for all users of the original instruction. VectorLoopValueMap.setVectorValue(&I, Part, V); addMetadata(V, BinOp); } break; } case Instruction::Select: { // Widen selects. // If the selector is loop invariant we can create a select // instruction with a scalar condition. Otherwise, use vector-select. auto *SE = PSE.getSE(); bool InvariantCond = SE->isLoopInvariant(PSE.getSCEV(I.getOperand(0)), OrigLoop); setDebugLocFromInst(Builder, &I); // The condition can be loop invariant but still defined inside the // loop. This means that we can't just use the original 'cond' value. // We have to take the 'vectorized' value and pick the first lane. // Instcombine will make this a no-op. auto *ScalarCond = getOrCreateScalarValue(I.getOperand(0), {0, 0}); for (unsigned Part = 0; Part < UF; ++Part) { Value *Cond = getOrCreateVectorValue(I.getOperand(0), Part); Value *Op0 = getOrCreateVectorValue(I.getOperand(1), Part); Value *Op1 = getOrCreateVectorValue(I.getOperand(2), Part); Value *Sel = Builder.CreateSelect(InvariantCond ? ScalarCond : Cond, Op0, Op1); VectorLoopValueMap.setVectorValue(&I, Part, Sel); addMetadata(Sel, &I); } break; } case Instruction::ICmp: case Instruction::FCmp: { // Widen compares. Generate vector compares. bool FCmp = (I.getOpcode() == Instruction::FCmp); auto *Cmp = dyn_cast(&I); setDebugLocFromInst(Builder, Cmp); for (unsigned Part = 0; Part < UF; ++Part) { Value *A = getOrCreateVectorValue(Cmp->getOperand(0), Part); Value *B = getOrCreateVectorValue(Cmp->getOperand(1), Part); Value *C = nullptr; if (FCmp) { // Propagate fast math flags. IRBuilder<>::FastMathFlagGuard FMFG(Builder); Builder.setFastMathFlags(Cmp->getFastMathFlags()); C = Builder.CreateFCmp(Cmp->getPredicate(), A, B); } else { C = Builder.CreateICmp(Cmp->getPredicate(), A, B); } VectorLoopValueMap.setVectorValue(&I, Part, C); addMetadata(C, &I); } break; } case Instruction::ZExt: case Instruction::SExt: case Instruction::FPToUI: case Instruction::FPToSI: case Instruction::FPExt: case Instruction::PtrToInt: case Instruction::IntToPtr: case Instruction::SIToFP: case Instruction::UIToFP: case Instruction::Trunc: case Instruction::FPTrunc: case Instruction::BitCast: { auto *CI = dyn_cast(&I); setDebugLocFromInst(Builder, CI); /// Vectorize casts. Type *DestTy = (VF == 1) ? CI->getType() : VectorType::get(CI->getType(), VF); for (unsigned Part = 0; Part < UF; ++Part) { Value *A = getOrCreateVectorValue(CI->getOperand(0), Part); Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy); VectorLoopValueMap.setVectorValue(&I, Part, Cast); addMetadata(Cast, &I); } break; } case Instruction::Call: { // Ignore dbg intrinsics. if (isa(I)) break; setDebugLocFromInst(Builder, &I); Module *M = I.getParent()->getParent()->getParent(); auto *CI = cast(&I); StringRef FnName = CI->getCalledFunction()->getName(); Function *F = CI->getCalledFunction(); Type *RetTy = ToVectorTy(CI->getType(), VF); SmallVector Tys; for (Value *ArgOperand : CI->arg_operands()) Tys.push_back(ToVectorTy(ArgOperand->getType(), VF)); Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); // The flag shows whether we use Intrinsic or a usual Call for vectorized // version of the instruction. // Is it beneficial to perform intrinsic call compared to lib call? bool NeedToScalarize; unsigned CallCost = getVectorCallCost(CI, VF, *TTI, TLI, NeedToScalarize); bool UseVectorIntrinsic = ID && getVectorIntrinsicCost(CI, VF, *TTI, TLI) <= CallCost; assert((UseVectorIntrinsic || !NeedToScalarize) && "Instruction should be scalarized elsewhere."); for (unsigned Part = 0; Part < UF; ++Part) { SmallVector Args; for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) { Value *Arg = CI->getArgOperand(i); // Some intrinsics have a scalar argument - don't replace it with a // vector. if (!UseVectorIntrinsic || !hasVectorInstrinsicScalarOpd(ID, i)) Arg = getOrCreateVectorValue(CI->getArgOperand(i), Part); Args.push_back(Arg); } Function *VectorF; if (UseVectorIntrinsic) { // Use vector version of the intrinsic. Type *TysForDecl[] = {CI->getType()}; if (VF > 1) TysForDecl[0] = VectorType::get(CI->getType()->getScalarType(), VF); VectorF = Intrinsic::getDeclaration(M, ID, TysForDecl); } else { // Use vector version of the library call. StringRef VFnName = TLI->getVectorizedFunction(FnName, VF); assert(!VFnName.empty() && "Vector function name is empty."); VectorF = M->getFunction(VFnName); if (!VectorF) { // Generate a declaration FunctionType *FTy = FunctionType::get(RetTy, Tys, false); VectorF = Function::Create(FTy, Function::ExternalLinkage, VFnName, M); VectorF->copyAttributesFrom(F); } } assert(VectorF && "Can't create vector function."); SmallVector OpBundles; CI->getOperandBundlesAsDefs(OpBundles); CallInst *V = Builder.CreateCall(VectorF, Args, OpBundles); if (isa(V)) V->copyFastMathFlags(CI); VectorLoopValueMap.setVectorValue(&I, Part, V); addMetadata(V, &I); } break; } default: // This instruction is not vectorized by simple widening. DEBUG(dbgs() << "LV: Found an unhandled instruction: " << I); llvm_unreachable("Unhandled instruction!"); } // end of switch. } void InnerLoopVectorizer::updateAnalysis() { // Forget the original basic block. PSE.getSE()->forgetLoop(OrigLoop); // Update the dominator tree information. assert(DT->properlyDominates(LoopBypassBlocks.front(), LoopExitBlock) && "Entry does not dominate exit."); DT->addNewBlock(LoopMiddleBlock, LI->getLoopFor(LoopVectorBody)->getLoopLatch()); DT->addNewBlock(LoopScalarPreHeader, LoopBypassBlocks[0]); DT->changeImmediateDominator(LoopScalarBody, LoopScalarPreHeader); DT->changeImmediateDominator(LoopExitBlock, LoopBypassBlocks[0]); DEBUG(DT->verifyDomTree()); } /// \brief Check whether it is safe to if-convert this phi node. /// /// Phi nodes with constant expressions that can trap are not safe to if /// convert. static bool canIfConvertPHINodes(BasicBlock *BB) { for (PHINode &Phi : BB->phis()) { for (Value *V : Phi.incoming_values()) if (auto *C = dyn_cast(V)) if (C->canTrap()) return false; } return true; } bool LoopVectorizationLegality::canVectorizeWithIfConvert() { if (!EnableIfConversion) { ORE->emit(createMissedAnalysis("IfConversionDisabled") << "if-conversion is disabled"); return false; } assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable"); // A list of pointers that we can safely read and write to. SmallPtrSet SafePointes; // Collect safe addresses. for (BasicBlock *BB : TheLoop->blocks()) { if (blockNeedsPredication(BB)) continue; for (Instruction &I : *BB) if (auto *Ptr = getPointerOperand(&I)) SafePointes.insert(Ptr); } // Collect the blocks that need predication. BasicBlock *Header = TheLoop->getHeader(); for (BasicBlock *BB : TheLoop->blocks()) { // We don't support switch statements inside loops. if (!isa(BB->getTerminator())) { ORE->emit(createMissedAnalysis("LoopContainsSwitch", BB->getTerminator()) << "loop contains a switch statement"); return false; } // We must be able to predicate all blocks that need to be predicated. if (blockNeedsPredication(BB)) { if (!blockCanBePredicated(BB, SafePointes)) { ORE->emit(createMissedAnalysis("NoCFGForSelect", BB->getTerminator()) << "control flow cannot be substituted for a select"); return false; } } else if (BB != Header && !canIfConvertPHINodes(BB)) { ORE->emit(createMissedAnalysis("NoCFGForSelect", BB->getTerminator()) << "control flow cannot be substituted for a select"); return false; } } // We can if-convert this loop. return true; } bool LoopVectorizationLegality::canVectorize() { // Store the result and return it at the end instead of exiting early, in case // allowExtraAnalysis is used to report multiple reasons for not vectorizing. bool Result = true; bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); // We must have a loop in canonical form. Loops with indirectbr in them cannot // be canonicalized. if (!TheLoop->getLoopPreheader()) { DEBUG(dbgs() << "LV: Loop doesn't have a legal pre-header.\n"); ORE->emit(createMissedAnalysis("CFGNotUnderstood") << "loop control flow is not understood by vectorizer"); if (DoExtraAnalysis) Result = false; else return false; } // FIXME: The code is currently dead, since the loop gets sent to // LoopVectorizationLegality is already an innermost loop. // // We can only vectorize innermost loops. if (!TheLoop->empty()) { ORE->emit(createMissedAnalysis("NotInnermostLoop") << "loop is not the innermost loop"); if (DoExtraAnalysis) Result = false; else return false; } // We must have a single backedge. if (TheLoop->getNumBackEdges() != 1) { ORE->emit(createMissedAnalysis("CFGNotUnderstood") << "loop control flow is not understood by vectorizer"); if (DoExtraAnalysis) Result = false; else return false; } // We must have a single exiting block. if (!TheLoop->getExitingBlock()) { ORE->emit(createMissedAnalysis("CFGNotUnderstood") << "loop control flow is not understood by vectorizer"); if (DoExtraAnalysis) Result = false; else return false; } // We only handle bottom-tested loops, i.e. loop in which the condition is // checked at the end of each iteration. With that we can assume that all // instructions in the loop are executed the same number of times. if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) { ORE->emit(createMissedAnalysis("CFGNotUnderstood") << "loop control flow is not understood by vectorizer"); if (DoExtraAnalysis) Result = false; else return false; } // We need to have a loop header. DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName() << '\n'); // Check if we can if-convert non-single-bb loops. unsigned NumBlocks = TheLoop->getNumBlocks(); if (NumBlocks != 1 && !canVectorizeWithIfConvert()) { DEBUG(dbgs() << "LV: Can't if-convert the loop.\n"); if (DoExtraAnalysis) Result = false; else return false; } // Check if we can vectorize the instructions and CFG in this loop. if (!canVectorizeInstrs()) { DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n"); if (DoExtraAnalysis) Result = false; else return false; } // Go over each instruction and look at memory deps. if (!canVectorizeMemory()) { DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n"); if (DoExtraAnalysis) Result = false; else return false; } DEBUG(dbgs() << "LV: We can vectorize this loop" << (LAI->getRuntimePointerChecking()->Need ? " (with a runtime bound check)" : "") << "!\n"); bool UseInterleaved = TTI->enableInterleavedAccessVectorization(); // If an override option has been passed in for interleaved accesses, use it. if (EnableInterleavedMemAccesses.getNumOccurrences() > 0) UseInterleaved = EnableInterleavedMemAccesses; // Analyze interleaved memory accesses. if (UseInterleaved) InterleaveInfo.analyzeInterleaving(*getSymbolicStrides()); unsigned SCEVThreshold = VectorizeSCEVCheckThreshold; if (Hints->getForce() == LoopVectorizeHints::FK_Enabled) SCEVThreshold = PragmaVectorizeSCEVCheckThreshold; if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) { ORE->emit(createMissedAnalysis("TooManySCEVRunTimeChecks") << "Too many SCEV assumptions need to be made and checked " << "at runtime"); DEBUG(dbgs() << "LV: Too many SCEV checks needed.\n"); if (DoExtraAnalysis) Result = false; else return false; } // Okay! We've done all the tests. If any have failed, return false. Otherwise // we can vectorize, and at this point we don't have any other mem analysis // which may limit our maximum vectorization factor, so just return true with // no restrictions. return Result; } static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) { if (Ty->isPointerTy()) return DL.getIntPtrType(Ty); // It is possible that char's or short's overflow when we ask for the loop's // trip count, work around this by changing the type size. if (Ty->getScalarSizeInBits() < 32) return Type::getInt32Ty(Ty->getContext()); return Ty; } static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) { Ty0 = convertPointerToIntegerType(DL, Ty0); Ty1 = convertPointerToIntegerType(DL, Ty1); if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits()) return Ty0; return Ty1; } /// \brief Check that the instruction has outside loop users and is not an /// identified reduction variable. static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, SmallPtrSetImpl &AllowedExit) { // Reduction and Induction instructions are allowed to have exit users. All // other instructions must not have external users. if (!AllowedExit.count(Inst)) // Check that all of the users of the loop are inside the BB. for (User *U : Inst->users()) { Instruction *UI = cast(U); // This user may be a reduction exit value. if (!TheLoop->contains(UI)) { DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n'); return true; } } return false; } void LoopVectorizationLegality::addInductionPhi( PHINode *Phi, const InductionDescriptor &ID, SmallPtrSetImpl &AllowedExit) { Inductions[Phi] = ID; // In case this induction also comes with casts that we know we can ignore // in the vectorized loop body, record them here. All casts could be recorded // here for ignoring, but suffices to record only the first (as it is the // only one that may bw used outside the cast sequence). const SmallVectorImpl &Casts = ID.getCastInsts(); if (!Casts.empty()) InductionCastsToIgnore.insert(*Casts.begin()); Type *PhiTy = Phi->getType(); const DataLayout &DL = Phi->getModule()->getDataLayout(); // Get the widest type. if (!PhiTy->isFloatingPointTy()) { if (!WidestIndTy) WidestIndTy = convertPointerToIntegerType(DL, PhiTy); else WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy); } // Int inductions are special because we only allow one IV. if (ID.getKind() == InductionDescriptor::IK_IntInduction && ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() && isa(ID.getStartValue()) && cast(ID.getStartValue())->isNullValue()) { // Use the phi node with the widest type as induction. Use the last // one if there are multiple (no good reason for doing this other // than it is expedient). We've checked that it begins at zero and // steps by one, so this is a canonical induction variable. if (!PrimaryInduction || PhiTy == WidestIndTy) PrimaryInduction = Phi; } // Both the PHI node itself, and the "post-increment" value feeding // back into the PHI node may have external users. // We can allow those uses, except if the SCEVs we have for them rely // on predicates that only hold within the loop, since allowing the exit // currently means re-using this SCEV outside the loop. if (PSE.getUnionPredicate().isAlwaysTrue()) { AllowedExit.insert(Phi); AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch())); } DEBUG(dbgs() << "LV: Found an induction variable.\n"); } bool LoopVectorizationLegality::canVectorizeInstrs() { BasicBlock *Header = TheLoop->getHeader(); // Look for the attribute signaling the absence of NaNs. Function &F = *Header->getParent(); HasFunNoNaNAttr = F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; // For each block in the loop. for (BasicBlock *BB : TheLoop->blocks()) { // Scan the instructions in the block and look for hazards. for (Instruction &I : *BB) { if (auto *Phi = dyn_cast(&I)) { Type *PhiTy = Phi->getType(); // Check that this PHI type is allowed. if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() && !PhiTy->isPointerTy()) { ORE->emit(createMissedAnalysis("CFGNotUnderstood", Phi) << "loop control flow is not understood by vectorizer"); DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n"); return false; } // If this PHINode is not in the header block, then we know that we // can convert it to select during if-conversion. No need to check if // the PHIs in this block are induction or reduction variables. if (BB != Header) { // Check that this instruction has no outside users or is an // identified reduction value with an outside user. if (!hasOutsideLoopUser(TheLoop, Phi, AllowedExit)) continue; ORE->emit(createMissedAnalysis("NeitherInductionNorReduction", Phi) << "value could not be identified as " "an induction or reduction variable"); return false; } // We only allow if-converted PHIs with exactly two incoming values. if (Phi->getNumIncomingValues() != 2) { ORE->emit(createMissedAnalysis("CFGNotUnderstood", Phi) << "control flow not understood by vectorizer"); DEBUG(dbgs() << "LV: Found an invalid PHI.\n"); return false; } RecurrenceDescriptor RedDes; - if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes)) { + if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC, + DT)) { if (RedDes.hasUnsafeAlgebra()) Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst()); AllowedExit.insert(RedDes.getLoopExitInstr()); Reductions[Phi] = RedDes; continue; } InductionDescriptor ID; if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) { addInductionPhi(Phi, ID, AllowedExit); if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr) Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst()); continue; } if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop, SinkAfter, DT)) { FirstOrderRecurrences.insert(Phi); continue; } // As a last resort, coerce the PHI to a AddRec expression // and re-try classifying it a an induction PHI. if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) { addInductionPhi(Phi, ID, AllowedExit); continue; } ORE->emit(createMissedAnalysis("NonReductionValueUsedOutsideLoop", Phi) << "value that could not be identified as " "reduction is used outside the loop"); DEBUG(dbgs() << "LV: Found an unidentified PHI." << *Phi << "\n"); return false; } // end of PHI handling // We handle calls that: // * Are debug info intrinsics. // * Have a mapping to an IR intrinsic. // * Have a vector version available. auto *CI = dyn_cast(&I); if (CI && !getVectorIntrinsicIDForCall(CI, TLI) && !isa(CI) && !(CI->getCalledFunction() && TLI && TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) { ORE->emit(createMissedAnalysis("CantVectorizeCall", CI) << "call instruction cannot be vectorized"); DEBUG(dbgs() << "LV: Found a non-intrinsic, non-libfunc callsite.\n"); return false; } // Intrinsics such as powi,cttz and ctlz are legal to vectorize if the // second argument is the same (i.e. loop invariant) if (CI && hasVectorInstrinsicScalarOpd( getVectorIntrinsicIDForCall(CI, TLI), 1)) { auto *SE = PSE.getSE(); if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(1)), TheLoop)) { ORE->emit(createMissedAnalysis("CantVectorizeIntrinsic", CI) << "intrinsic instruction cannot be vectorized"); DEBUG(dbgs() << "LV: Found unvectorizable intrinsic " << *CI << "\n"); return false; } } // Check that the instruction return type is vectorizable. // Also, we can't vectorize extractelement instructions. if ((!VectorType::isValidElementType(I.getType()) && !I.getType()->isVoidTy()) || isa(I)) { ORE->emit(createMissedAnalysis("CantVectorizeInstructionReturnType", &I) << "instruction return type cannot be vectorized"); DEBUG(dbgs() << "LV: Found unvectorizable type.\n"); return false; } // Check that the stored type is vectorizable. if (auto *ST = dyn_cast(&I)) { Type *T = ST->getValueOperand()->getType(); if (!VectorType::isValidElementType(T)) { ORE->emit(createMissedAnalysis("CantVectorizeStore", ST) << "store instruction cannot be vectorized"); return false; } // FP instructions can allow unsafe algebra, thus vectorizable by // non-IEEE-754 compliant SIMD units. // This applies to floating-point math operations and calls, not memory // operations, shuffles, or casts, as they don't change precision or // semantics. } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) && !I.isFast()) { DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n"); Hints->setPotentiallyUnsafe(); } // Reduction instructions are allowed to have exit users. // All other instructions must not have external users. if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) { ORE->emit(createMissedAnalysis("ValueUsedOutsideLoop", &I) << "value cannot be used outside the loop"); return false; } } // next instr. } if (!PrimaryInduction) { DEBUG(dbgs() << "LV: Did not find one integer induction var.\n"); if (Inductions.empty()) { ORE->emit(createMissedAnalysis("NoInductionVariable") << "loop induction variable could not be identified"); return false; } } // Now we know the widest induction type, check if our found induction // is the same size. If it's not, unset it here and InnerLoopVectorizer // will create another. if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType()) PrimaryInduction = nullptr; return true; } void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) { // We should not collect Scalars more than once per VF. Right now, this // function is called from collectUniformsAndScalars(), which already does // this check. Collecting Scalars for VF=1 does not make any sense. assert(VF >= 2 && !Scalars.count(VF) && "This function should not be visited twice for the same VF"); SmallSetVector Worklist; // These sets are used to seed the analysis with pointers used by memory // accesses that will remain scalar. SmallSetVector ScalarPtrs; SmallPtrSet PossibleNonScalarPtrs; // A helper that returns true if the use of Ptr by MemAccess will be scalar. // The pointer operands of loads and stores will be scalar as long as the // memory access is not a gather or scatter operation. The value operand of a // store will remain scalar if the store is scalarized. auto isScalarUse = [&](Instruction *MemAccess, Value *Ptr) { InstWidening WideningDecision = getWideningDecision(MemAccess, VF); assert(WideningDecision != CM_Unknown && "Widening decision should be ready at this moment"); if (auto *Store = dyn_cast(MemAccess)) if (Ptr == Store->getValueOperand()) return WideningDecision == CM_Scalarize; assert(Ptr == getPointerOperand(MemAccess) && "Ptr is neither a value or pointer operand"); return WideningDecision != CM_GatherScatter; }; // A helper that returns true if the given value is a bitcast or // getelementptr instruction contained in the loop. auto isLoopVaryingBitCastOrGEP = [&](Value *V) { return ((isa(V) && V->getType()->isPointerTy()) || isa(V)) && !TheLoop->isLoopInvariant(V); }; // A helper that evaluates a memory access's use of a pointer. If the use // will be a scalar use, and the pointer is only used by memory accesses, we // place the pointer in ScalarPtrs. Otherwise, the pointer is placed in // PossibleNonScalarPtrs. auto evaluatePtrUse = [&](Instruction *MemAccess, Value *Ptr) { // We only care about bitcast and getelementptr instructions contained in // the loop. if (!isLoopVaryingBitCastOrGEP(Ptr)) return; // If the pointer has already been identified as scalar (e.g., if it was // also identified as uniform), there's nothing to do. auto *I = cast(Ptr); if (Worklist.count(I)) return; // If the use of the pointer will be a scalar use, and all users of the // pointer are memory accesses, place the pointer in ScalarPtrs. Otherwise, // place the pointer in PossibleNonScalarPtrs. if (isScalarUse(MemAccess, Ptr) && llvm::all_of(I->users(), [&](User *U) { return isa(U) || isa(U); })) ScalarPtrs.insert(I); else PossibleNonScalarPtrs.insert(I); }; // We seed the scalars analysis with three classes of instructions: (1) // instructions marked uniform-after-vectorization, (2) bitcast and // getelementptr instructions used by memory accesses requiring a scalar use, // and (3) pointer induction variables and their update instructions (we // currently only scalarize these). // // (1) Add to the worklist all instructions that have been identified as // uniform-after-vectorization. Worklist.insert(Uniforms[VF].begin(), Uniforms[VF].end()); // (2) Add to the worklist all bitcast and getelementptr instructions used by // memory accesses requiring a scalar use. The pointer operands of loads and // stores will be scalar as long as the memory accesses is not a gather or // scatter operation. The value operand of a store will remain scalar if the // store is scalarized. for (auto *BB : TheLoop->blocks()) for (auto &I : *BB) { if (auto *Load = dyn_cast(&I)) { evaluatePtrUse(Load, Load->getPointerOperand()); } else if (auto *Store = dyn_cast(&I)) { evaluatePtrUse(Store, Store->getPointerOperand()); evaluatePtrUse(Store, Store->getValueOperand()); } } for (auto *I : ScalarPtrs) if (!PossibleNonScalarPtrs.count(I)) { DEBUG(dbgs() << "LV: Found scalar instruction: " << *I << "\n"); Worklist.insert(I); } // (3) Add to the worklist all pointer induction variables and their update // instructions. // // TODO: Once we are able to vectorize pointer induction variables we should // no longer insert them into the worklist here. auto *Latch = TheLoop->getLoopLatch(); for (auto &Induction : *Legal->getInductionVars()) { auto *Ind = Induction.first; auto *IndUpdate = cast(Ind->getIncomingValueForBlock(Latch)); if (Induction.second.getKind() != InductionDescriptor::IK_PtrInduction) continue; Worklist.insert(Ind); Worklist.insert(IndUpdate); DEBUG(dbgs() << "LV: Found scalar instruction: " << *Ind << "\n"); DEBUG(dbgs() << "LV: Found scalar instruction: " << *IndUpdate << "\n"); } // Insert the forced scalars. // FIXME: Currently widenPHIInstruction() often creates a dead vector // induction variable when the PHI user is scalarized. if (ForcedScalars.count(VF)) for (auto *I : ForcedScalars.find(VF)->second) Worklist.insert(I); // Expand the worklist by looking through any bitcasts and getelementptr // instructions we've already identified as scalar. This is similar to the // expansion step in collectLoopUniforms(); however, here we're only // expanding to include additional bitcasts and getelementptr instructions. unsigned Idx = 0; while (Idx != Worklist.size()) { Instruction *Dst = Worklist[Idx++]; if (!isLoopVaryingBitCastOrGEP(Dst->getOperand(0))) continue; auto *Src = cast(Dst->getOperand(0)); if (llvm::all_of(Src->users(), [&](User *U) -> bool { auto *J = cast(U); return !TheLoop->contains(J) || Worklist.count(J) || ((isa(J) || isa(J)) && isScalarUse(J, Src)); })) { Worklist.insert(Src); DEBUG(dbgs() << "LV: Found scalar instruction: " << *Src << "\n"); } } // An induction variable will remain scalar if all users of the induction // variable and induction variable update remain scalar. for (auto &Induction : *Legal->getInductionVars()) { auto *Ind = Induction.first; auto *IndUpdate = cast(Ind->getIncomingValueForBlock(Latch)); // We already considered pointer induction variables, so there's no reason // to look at their users again. // // TODO: Once we are able to vectorize pointer induction variables we // should no longer skip over them here. if (Induction.second.getKind() == InductionDescriptor::IK_PtrInduction) continue; // Determine if all users of the induction variable are scalar after // vectorization. auto ScalarInd = llvm::all_of(Ind->users(), [&](User *U) -> bool { auto *I = cast(U); return I == IndUpdate || !TheLoop->contains(I) || Worklist.count(I); }); if (!ScalarInd) continue; // Determine if all users of the induction variable update instruction are // scalar after vectorization. auto ScalarIndUpdate = llvm::all_of(IndUpdate->users(), [&](User *U) -> bool { auto *I = cast(U); return I == Ind || !TheLoop->contains(I) || Worklist.count(I); }); if (!ScalarIndUpdate) continue; // The induction variable and its update instruction will remain scalar. Worklist.insert(Ind); Worklist.insert(IndUpdate); DEBUG(dbgs() << "LV: Found scalar instruction: " << *Ind << "\n"); DEBUG(dbgs() << "LV: Found scalar instruction: " << *IndUpdate << "\n"); } Scalars[VF].insert(Worklist.begin(), Worklist.end()); } bool LoopVectorizationLegality::isScalarWithPredication(Instruction *I) { if (!blockNeedsPredication(I->getParent())) return false; switch(I->getOpcode()) { default: break; case Instruction::Store: return !isMaskRequired(I); case Instruction::UDiv: case Instruction::SDiv: case Instruction::SRem: case Instruction::URem: return mayDivideByZero(*I); } return false; } bool LoopVectorizationLegality::memoryInstructionCanBeWidened(Instruction *I, unsigned VF) { // Get and ensure we have a valid memory instruction. LoadInst *LI = dyn_cast(I); StoreInst *SI = dyn_cast(I); assert((LI || SI) && "Invalid memory instruction"); auto *Ptr = getPointerOperand(I); // In order to be widened, the pointer should be consecutive, first of all. if (!isConsecutivePtr(Ptr)) return false; // If the instruction is a store located in a predicated block, it will be // scalarized. if (isScalarWithPredication(I)) return false; // If the instruction's allocated size doesn't equal it's type size, it // requires padding and will be scalarized. auto &DL = I->getModule()->getDataLayout(); auto *ScalarTy = LI ? LI->getType() : SI->getValueOperand()->getType(); if (hasIrregularType(ScalarTy, DL, VF)) return false; return true; } void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) { // We should not collect Uniforms more than once per VF. Right now, // this function is called from collectUniformsAndScalars(), which // already does this check. Collecting Uniforms for VF=1 does not make any // sense. assert(VF >= 2 && !Uniforms.count(VF) && "This function should not be visited twice for the same VF"); // Visit the list of Uniforms. If we'll not find any uniform value, we'll // not analyze again. Uniforms.count(VF) will return 1. Uniforms[VF].clear(); // We now know that the loop is vectorizable! // Collect instructions inside the loop that will remain uniform after // vectorization. // Global values, params and instructions outside of current loop are out of // scope. auto isOutOfScope = [&](Value *V) -> bool { Instruction *I = dyn_cast(V); return (!I || !TheLoop->contains(I)); }; SetVector Worklist; BasicBlock *Latch = TheLoop->getLoopLatch(); // Start with the conditional branch. If the branch condition is an // instruction contained in the loop that is only used by the branch, it is // uniform. auto *Cmp = dyn_cast(Latch->getTerminator()->getOperand(0)); if (Cmp && TheLoop->contains(Cmp) && Cmp->hasOneUse()) { Worklist.insert(Cmp); DEBUG(dbgs() << "LV: Found uniform instruction: " << *Cmp << "\n"); } // Holds consecutive and consecutive-like pointers. Consecutive-like pointers // are pointers that are treated like consecutive pointers during // vectorization. The pointer operands of interleaved accesses are an // example. SmallSetVector ConsecutiveLikePtrs; // Holds pointer operands of instructions that are possibly non-uniform. SmallPtrSet PossibleNonUniformPtrs; auto isUniformDecision = [&](Instruction *I, unsigned VF) { InstWidening WideningDecision = getWideningDecision(I, VF); assert(WideningDecision != CM_Unknown && "Widening decision should be ready at this moment"); return (WideningDecision == CM_Widen || WideningDecision == CM_Widen_Reverse || WideningDecision == CM_Interleave); }; // Iterate over the instructions in the loop, and collect all // consecutive-like pointer operands in ConsecutiveLikePtrs. If it's possible // that a consecutive-like pointer operand will be scalarized, we collect it // in PossibleNonUniformPtrs instead. We use two sets here because a single // getelementptr instruction can be used by both vectorized and scalarized // memory instructions. For example, if a loop loads and stores from the same // location, but the store is conditional, the store will be scalarized, and // the getelementptr won't remain uniform. for (auto *BB : TheLoop->blocks()) for (auto &I : *BB) { // If there's no pointer operand, there's nothing to do. auto *Ptr = dyn_cast_or_null(getPointerOperand(&I)); if (!Ptr) continue; // True if all users of Ptr are memory accesses that have Ptr as their // pointer operand. auto UsersAreMemAccesses = llvm::all_of(Ptr->users(), [&](User *U) -> bool { return getPointerOperand(U) == Ptr; }); // Ensure the memory instruction will not be scalarized or used by // gather/scatter, making its pointer operand non-uniform. If the pointer // operand is used by any instruction other than a memory access, we // conservatively assume the pointer operand may be non-uniform. if (!UsersAreMemAccesses || !isUniformDecision(&I, VF)) PossibleNonUniformPtrs.insert(Ptr); // If the memory instruction will be vectorized and its pointer operand // is consecutive-like, or interleaving - the pointer operand should // remain uniform. else ConsecutiveLikePtrs.insert(Ptr); } // Add to the Worklist all consecutive and consecutive-like pointers that // aren't also identified as possibly non-uniform. for (auto *V : ConsecutiveLikePtrs) if (!PossibleNonUniformPtrs.count(V)) { DEBUG(dbgs() << "LV: Found uniform instruction: " << *V << "\n"); Worklist.insert(V); } // Expand Worklist in topological order: whenever a new instruction // is added , its users should be either already inside Worklist, or // out of scope. It ensures a uniform instruction will only be used // by uniform instructions or out of scope instructions. unsigned idx = 0; while (idx != Worklist.size()) { Instruction *I = Worklist[idx++]; for (auto OV : I->operand_values()) { if (isOutOfScope(OV)) continue; auto *OI = cast(OV); if (llvm::all_of(OI->users(), [&](User *U) -> bool { auto *J = cast(U); return !TheLoop->contains(J) || Worklist.count(J) || (OI == getPointerOperand(J) && isUniformDecision(J, VF)); })) { Worklist.insert(OI); DEBUG(dbgs() << "LV: Found uniform instruction: " << *OI << "\n"); } } } // Returns true if Ptr is the pointer operand of a memory access instruction // I, and I is known to not require scalarization. auto isVectorizedMemAccessUse = [&](Instruction *I, Value *Ptr) -> bool { return getPointerOperand(I) == Ptr && isUniformDecision(I, VF); }; // For an instruction to be added into Worklist above, all its users inside // the loop should also be in Worklist. However, this condition cannot be // true for phi nodes that form a cyclic dependence. We must process phi // nodes separately. An induction variable will remain uniform if all users // of the induction variable and induction variable update remain uniform. // The code below handles both pointer and non-pointer induction variables. for (auto &Induction : *Legal->getInductionVars()) { auto *Ind = Induction.first; auto *IndUpdate = cast(Ind->getIncomingValueForBlock(Latch)); // Determine if all users of the induction variable are uniform after // vectorization. auto UniformInd = llvm::all_of(Ind->users(), [&](User *U) -> bool { auto *I = cast(U); return I == IndUpdate || !TheLoop->contains(I) || Worklist.count(I) || isVectorizedMemAccessUse(I, Ind); }); if (!UniformInd) continue; // Determine if all users of the induction variable update instruction are // uniform after vectorization. auto UniformIndUpdate = llvm::all_of(IndUpdate->users(), [&](User *U) -> bool { auto *I = cast(U); return I == Ind || !TheLoop->contains(I) || Worklist.count(I) || isVectorizedMemAccessUse(I, IndUpdate); }); if (!UniformIndUpdate) continue; // The induction variable and its update instruction will remain uniform. Worklist.insert(Ind); Worklist.insert(IndUpdate); DEBUG(dbgs() << "LV: Found uniform instruction: " << *Ind << "\n"); DEBUG(dbgs() << "LV: Found uniform instruction: " << *IndUpdate << "\n"); } Uniforms[VF].insert(Worklist.begin(), Worklist.end()); } bool LoopVectorizationLegality::canVectorizeMemory() { LAI = &(*GetLAA)(*TheLoop); InterleaveInfo.setLAI(LAI); const OptimizationRemarkAnalysis *LAR = LAI->getReport(); if (LAR) { ORE->emit([&]() { return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(), "loop not vectorized: ", *LAR); }); } if (!LAI->canVectorizeMemory()) return false; if (LAI->hasStoreToLoopInvariantAddress()) { ORE->emit(createMissedAnalysis("CantVectorizeStoreToLoopInvariantAddress") << "write to a loop invariant address could not be vectorized"); DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n"); return false; } Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks()); PSE.addPredicate(LAI->getPSE().getUnionPredicate()); return true; } bool LoopVectorizationLegality::isInductionPhi(const Value *V) { Value *In0 = const_cast(V); PHINode *PN = dyn_cast_or_null(In0); if (!PN) return false; return Inductions.count(PN); } bool LoopVectorizationLegality::isCastedInductionVariable(const Value *V) { auto *Inst = dyn_cast(V); return (Inst && InductionCastsToIgnore.count(Inst)); } bool LoopVectorizationLegality::isInductionVariable(const Value *V) { return isInductionPhi(V) || isCastedInductionVariable(V); } bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) { return FirstOrderRecurrences.count(Phi); } bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) { return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT); } bool LoopVectorizationLegality::blockCanBePredicated( BasicBlock *BB, SmallPtrSetImpl &SafePtrs) { const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel(); for (Instruction &I : *BB) { // Check that we don't have a constant expression that can trap as operand. for (Value *Operand : I.operands()) { if (auto *C = dyn_cast(Operand)) if (C->canTrap()) return false; } // We might be able to hoist the load. if (I.mayReadFromMemory()) { auto *LI = dyn_cast(&I); if (!LI) return false; if (!SafePtrs.count(LI->getPointerOperand())) { if (isLegalMaskedLoad(LI->getType(), LI->getPointerOperand()) || isLegalMaskedGather(LI->getType())) { MaskedOp.insert(LI); continue; } // !llvm.mem.parallel_loop_access implies if-conversion safety. if (IsAnnotatedParallel) continue; return false; } } if (I.mayWriteToMemory()) { auto *SI = dyn_cast(&I); // We only support predication of stores in basic blocks with one // predecessor. if (!SI) return false; // Build a masked store if it is legal for the target. if (isLegalMaskedStore(SI->getValueOperand()->getType(), SI->getPointerOperand()) || isLegalMaskedScatter(SI->getValueOperand()->getType())) { MaskedOp.insert(SI); continue; } bool isSafePtr = (SafePtrs.count(SI->getPointerOperand()) != 0); bool isSinglePredecessor = SI->getParent()->getSinglePredecessor(); if (++NumPredStores > NumberOfStoresToPredicate || !isSafePtr || !isSinglePredecessor) return false; } if (I.mayThrow()) return false; } return true; } void InterleavedAccessInfo::collectConstStrideAccesses( MapVector &AccessStrideInfo, const ValueToValueMap &Strides) { auto &DL = TheLoop->getHeader()->getModule()->getDataLayout(); // Since it's desired that the load/store instructions be maintained in // "program order" for the interleaved access analysis, we have to visit the // blocks in the loop in reverse postorder (i.e., in a topological order). // Such an ordering will ensure that any load/store that may be executed // before a second load/store will precede the second load/store in // AccessStrideInfo. LoopBlocksDFS DFS(TheLoop); DFS.perform(LI); for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) for (auto &I : *BB) { auto *LI = dyn_cast(&I); auto *SI = dyn_cast(&I); if (!LI && !SI) continue; Value *Ptr = getPointerOperand(&I); // We don't check wrapping here because we don't know yet if Ptr will be // part of a full group or a group with gaps. Checking wrapping for all // pointers (even those that end up in groups with no gaps) will be overly // conservative. For full groups, wrapping should be ok since if we would // wrap around the address space we would do a memory access at nullptr // even without the transformation. The wrapping checks are therefore // deferred until after we've formed the interleaved groups. int64_t Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, /*Assume=*/true, /*ShouldCheckWrap=*/false); const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); PointerType *PtrTy = dyn_cast(Ptr->getType()); uint64_t Size = DL.getTypeAllocSize(PtrTy->getElementType()); // An alignment of 0 means target ABI alignment. unsigned Align = getMemInstAlignment(&I); if (!Align) Align = DL.getABITypeAlignment(PtrTy->getElementType()); AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, Align); } } // Analyze interleaved accesses and collect them into interleaved load and // store groups. // // When generating code for an interleaved load group, we effectively hoist all // loads in the group to the location of the first load in program order. When // generating code for an interleaved store group, we sink all stores to the // location of the last store. This code motion can change the order of load // and store instructions and may break dependences. // // The code generation strategy mentioned above ensures that we won't violate // any write-after-read (WAR) dependences. // // E.g., for the WAR dependence: a = A[i]; // (1) // A[i] = b; // (2) // // The store group of (2) is always inserted at or below (2), and the load // group of (1) is always inserted at or above (1). Thus, the instructions will // never be reordered. All other dependences are checked to ensure the // correctness of the instruction reordering. // // The algorithm visits all memory accesses in the loop in bottom-up program // order. Program order is established by traversing the blocks in the loop in // reverse postorder when collecting the accesses. // // We visit the memory accesses in bottom-up order because it can simplify the // construction of store groups in the presence of write-after-write (WAW) // dependences. // // E.g., for the WAW dependence: A[i] = a; // (1) // A[i] = b; // (2) // A[i + 1] = c; // (3) // // We will first create a store group with (3) and (2). (1) can't be added to // this group because it and (2) are dependent. However, (1) can be grouped // with other accesses that may precede it in program order. Note that a // bottom-up order does not imply that WAW dependences should not be checked. void InterleavedAccessInfo::analyzeInterleaving( const ValueToValueMap &Strides) { DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n"); // Holds all accesses with a constant stride. MapVector AccessStrideInfo; collectConstStrideAccesses(AccessStrideInfo, Strides); if (AccessStrideInfo.empty()) return; // Collect the dependences in the loop. collectDependences(); // Holds all interleaved store groups temporarily. SmallSetVector StoreGroups; // Holds all interleaved load groups temporarily. SmallSetVector LoadGroups; // Search in bottom-up program order for pairs of accesses (A and B) that can // form interleaved load or store groups. In the algorithm below, access A // precedes access B in program order. We initialize a group for B in the // outer loop of the algorithm, and then in the inner loop, we attempt to // insert each A into B's group if: // // 1. A and B have the same stride, // 2. A and B have the same memory object size, and // 3. A belongs in B's group according to its distance from B. // // Special care is taken to ensure group formation will not break any // dependences. for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend(); BI != E; ++BI) { Instruction *B = BI->first; StrideDescriptor DesB = BI->second; // Initialize a group for B if it has an allowable stride. Even if we don't // create a group for B, we continue with the bottom-up algorithm to ensure // we don't break any of B's dependences. InterleaveGroup *Group = nullptr; if (isStrided(DesB.Stride)) { Group = getInterleaveGroup(B); if (!Group) { DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B << '\n'); Group = createInterleaveGroup(B, DesB.Stride, DesB.Align); } if (B->mayWriteToMemory()) StoreGroups.insert(Group); else LoadGroups.insert(Group); } for (auto AI = std::next(BI); AI != E; ++AI) { Instruction *A = AI->first; StrideDescriptor DesA = AI->second; // Our code motion strategy implies that we can't have dependences // between accesses in an interleaved group and other accesses located // between the first and last member of the group. Note that this also // means that a group can't have more than one member at a given offset. // The accesses in a group can have dependences with other accesses, but // we must ensure we don't extend the boundaries of the group such that // we encompass those dependent accesses. // // For example, assume we have the sequence of accesses shown below in a // stride-2 loop: // // (1, 2) is a group | A[i] = a; // (1) // | A[i-1] = b; // (2) | // A[i-3] = c; // (3) // A[i] = d; // (4) | (2, 4) is not a group // // Because accesses (2) and (3) are dependent, we can group (2) with (1) // but not with (4). If we did, the dependent access (3) would be within // the boundaries of the (2, 4) group. if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI)) { // If a dependence exists and A is already in a group, we know that A // must be a store since A precedes B and WAR dependences are allowed. // Thus, A would be sunk below B. We release A's group to prevent this // illegal code motion. A will then be free to form another group with // instructions that precede it. if (isInterleaved(A)) { InterleaveGroup *StoreGroup = getInterleaveGroup(A); StoreGroups.remove(StoreGroup); releaseGroup(StoreGroup); } // If a dependence exists and A is not already in a group (or it was // and we just released it), B might be hoisted above A (if B is a // load) or another store might be sunk below A (if B is a store). In // either case, we can't add additional instructions to B's group. B // will only form a group with instructions that it precedes. break; } // At this point, we've checked for illegal code motion. If either A or B // isn't strided, there's nothing left to do. if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride)) continue; // Ignore A if it's already in a group or isn't the same kind of memory // operation as B. if (isInterleaved(A) || A->mayReadFromMemory() != B->mayReadFromMemory()) continue; // Check rules 1 and 2. Ignore A if its stride or size is different from // that of B. if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size) continue; // Ignore A if the memory object of A and B don't belong to the same // address space if (getMemInstAddressSpace(A) != getMemInstAddressSpace(B)) continue; // Calculate the distance from A to B. const SCEVConstant *DistToB = dyn_cast( PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev)); if (!DistToB) continue; int64_t DistanceToB = DistToB->getAPInt().getSExtValue(); // Check rule 3. Ignore A if its distance to B is not a multiple of the // size. if (DistanceToB % static_cast(DesB.Size)) continue; // Ignore A if either A or B is in a predicated block. Although we // currently prevent group formation for predicated accesses, we may be // able to relax this limitation in the future once we handle more // complicated blocks. if (isPredicated(A->getParent()) || isPredicated(B->getParent())) continue; // The index of A is the index of B plus A's distance to B in multiples // of the size. int IndexA = Group->getIndex(B) + DistanceToB / static_cast(DesB.Size); // Try to insert A into B's group. if (Group->insertMember(A, IndexA, DesA.Align)) { DEBUG(dbgs() << "LV: Inserted:" << *A << '\n' << " into the interleave group with" << *B << '\n'); InterleaveGroupMap[A] = Group; // Set the first load in program order as the insert position. if (A->mayReadFromMemory()) Group->setInsertPos(A); } } // Iteration over A accesses. } // Iteration over B accesses. // Remove interleaved store groups with gaps. for (InterleaveGroup *Group : StoreGroups) if (Group->getNumMembers() != Group->getFactor()) { DEBUG(dbgs() << "LV: Invalidate candidate interleaved store group due " "to gaps.\n"); releaseGroup(Group); } // Remove interleaved groups with gaps (currently only loads) whose memory // accesses may wrap around. We have to revisit the getPtrStride analysis, // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does // not check wrapping (see documentation there). // FORNOW we use Assume=false; // TODO: Change to Assume=true but making sure we don't exceed the threshold // of runtime SCEV assumptions checks (thereby potentially failing to // vectorize altogether). // Additional optional optimizations: // TODO: If we are peeling the loop and we know that the first pointer doesn't // wrap then we can deduce that all pointers in the group don't wrap. // This means that we can forcefully peel the loop in order to only have to // check the first pointer for no-wrap. When we'll change to use Assume=true // we'll only need at most one runtime check per interleaved group. for (InterleaveGroup *Group : LoadGroups) { // Case 1: A full group. Can Skip the checks; For full groups, if the wide // load would wrap around the address space we would do a memory access at // nullptr even without the transformation. if (Group->getNumMembers() == Group->getFactor()) continue; // Case 2: If first and last members of the group don't wrap this implies // that all the pointers in the group don't wrap. // So we check only group member 0 (which is always guaranteed to exist), // and group member Factor - 1; If the latter doesn't exist we rely on // peeling (if it is a non-reveresed accsess -- see Case 3). Value *FirstMemberPtr = getPointerOperand(Group->getMember(0)); if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false, /*ShouldCheckWrap=*/true)) { DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to " "first group member potentially pointer-wrapping.\n"); releaseGroup(Group); continue; } Instruction *LastMember = Group->getMember(Group->getFactor() - 1); if (LastMember) { Value *LastMemberPtr = getPointerOperand(LastMember); if (!getPtrStride(PSE, LastMemberPtr, TheLoop, Strides, /*Assume=*/false, /*ShouldCheckWrap=*/true)) { DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to " "last group member potentially pointer-wrapping.\n"); releaseGroup(Group); } } else { // Case 3: A non-reversed interleaved load group with gaps: We need // to execute at least one scalar epilogue iteration. This will ensure // we don't speculatively access memory out-of-bounds. We only need // to look for a member at index factor - 1, since every group must have // a member at index zero. if (Group->isReverse()) { DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to " "a reverse access with gaps.\n"); releaseGroup(Group); continue; } DEBUG(dbgs() << "LV: Interleaved group requires epilogue iteration.\n"); RequiresScalarEpilogue = true; } } } Optional LoopVectorizationCostModel::computeMaxVF(bool OptForSize) { if (!EnableCondStoresVectorization && Legal->getNumPredStores()) { ORE->emit(createMissedAnalysis("ConditionalStore") << "store that is conditionally executed prevents vectorization"); DEBUG(dbgs() << "LV: No vectorization. There are conditional stores.\n"); return None; } if (Legal->getRuntimePointerChecking()->Need && TTI.hasBranchDivergence()) { // TODO: It may by useful to do since it's still likely to be dynamically // uniform if the target can skip. DEBUG(dbgs() << "LV: Not inserting runtime ptr check for divergent target"); ORE->emit( createMissedAnalysis("CantVersionLoopWithDivergentTarget") << "runtime pointer checks needed. Not enabled for divergent target"); return None; } unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop); if (!OptForSize) // Remaining checks deal with scalar loop when OptForSize. return computeFeasibleMaxVF(OptForSize, TC); if (Legal->getRuntimePointerChecking()->Need) { ORE->emit(createMissedAnalysis("CantVersionLoopWithOptForSize") << "runtime pointer checks needed. Enable vectorization of this " "loop with '#pragma clang loop vectorize(enable)' when " "compiling with -Os/-Oz"); DEBUG(dbgs() << "LV: Aborting. Runtime ptr check is required with -Os/-Oz.\n"); return None; } // If we optimize the program for size, avoid creating the tail loop. DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n'); // If we don't know the precise trip count, don't try to vectorize. if (TC < 2) { ORE->emit( createMissedAnalysis("UnknownLoopCountComplexCFG") << "unable to calculate the loop count due to complex control flow"); DEBUG(dbgs() << "LV: Aborting. A tail loop is required with -Os/-Oz.\n"); return None; } unsigned MaxVF = computeFeasibleMaxVF(OptForSize, TC); if (TC % MaxVF != 0) { // If the trip count that we found modulo the vectorization factor is not // zero then we require a tail. // FIXME: look for a smaller MaxVF that does divide TC rather than give up. // FIXME: return None if loop requiresScalarEpilog(), or look for a // smaller MaxVF that does not require a scalar epilog. ORE->emit(createMissedAnalysis("NoTailLoopWithOptForSize") << "cannot optimize for size and vectorize at the " "same time. Enable vectorization of this loop " "with '#pragma clang loop vectorize(enable)' " "when compiling with -Os/-Oz"); DEBUG(dbgs() << "LV: Aborting. A tail loop is required with -Os/-Oz.\n"); return None; } return MaxVF; } unsigned LoopVectorizationCostModel::computeFeasibleMaxVF(bool OptForSize, unsigned ConstTripCount) { MinBWs = computeMinimumValueSizes(TheLoop->getBlocks(), *DB, &TTI); unsigned SmallestType, WidestType; std::tie(SmallestType, WidestType) = getSmallestAndWidestTypes(); unsigned WidestRegister = TTI.getRegisterBitWidth(true); // Get the maximum safe dependence distance in bits computed by LAA. // It is computed by MaxVF * sizeOf(type) * 8, where type is taken from // the memory accesses that is most restrictive (involved in the smallest // dependence distance). unsigned MaxSafeRegisterWidth = Legal->getMaxSafeRegisterWidth(); WidestRegister = std::min(WidestRegister, MaxSafeRegisterWidth); unsigned MaxVectorSize = WidestRegister / WidestType; DEBUG(dbgs() << "LV: The Smallest and Widest types: " << SmallestType << " / " << WidestType << " bits.\n"); DEBUG(dbgs() << "LV: The Widest register safe to use is: " << WidestRegister << " bits.\n"); assert(MaxVectorSize <= 64 && "Did not expect to pack so many elements" " into one vector!"); if (MaxVectorSize == 0) { DEBUG(dbgs() << "LV: The target has no vector registers.\n"); MaxVectorSize = 1; return MaxVectorSize; } else if (ConstTripCount && ConstTripCount < MaxVectorSize && isPowerOf2_32(ConstTripCount)) { // We need to clamp the VF to be the ConstTripCount. There is no point in // choosing a higher viable VF as done in the loop below. DEBUG(dbgs() << "LV: Clamping the MaxVF to the constant trip count: " << ConstTripCount << "\n"); MaxVectorSize = ConstTripCount; return MaxVectorSize; } unsigned MaxVF = MaxVectorSize; if (MaximizeBandwidth && !OptForSize) { // Collect all viable vectorization factors larger than the default MaxVF // (i.e. MaxVectorSize). SmallVector VFs; unsigned NewMaxVectorSize = WidestRegister / SmallestType; for (unsigned VS = MaxVectorSize * 2; VS <= NewMaxVectorSize; VS *= 2) VFs.push_back(VS); // For each VF calculate its register usage. auto RUs = calculateRegisterUsage(VFs); // Select the largest VF which doesn't require more registers than existing // ones. unsigned TargetNumRegisters = TTI.getNumberOfRegisters(true); for (int i = RUs.size() - 1; i >= 0; --i) { if (RUs[i].MaxLocalUsers <= TargetNumRegisters) { MaxVF = VFs[i]; break; } } } return MaxVF; } LoopVectorizationCostModel::VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(unsigned MaxVF) { float Cost = expectedCost(1).first; #ifndef NDEBUG const float ScalarCost = Cost; #endif /* NDEBUG */ unsigned Width = 1; DEBUG(dbgs() << "LV: Scalar loop costs: " << (int)ScalarCost << ".\n"); bool ForceVectorization = Hints->getForce() == LoopVectorizeHints::FK_Enabled; // Ignore scalar width, because the user explicitly wants vectorization. if (ForceVectorization && MaxVF > 1) { Width = 2; Cost = expectedCost(Width).first / (float)Width; } for (unsigned i = 2; i <= MaxVF; i *= 2) { // Notice that the vector loop needs to be executed less times, so // we need to divide the cost of the vector loops by the width of // the vector elements. VectorizationCostTy C = expectedCost(i); float VectorCost = C.first / (float)i; DEBUG(dbgs() << "LV: Vector loop of width " << i << " costs: " << (int)VectorCost << ".\n"); if (!C.second && !ForceVectorization) { DEBUG( dbgs() << "LV: Not considering vector loop of width " << i << " because it will not generate any vector instructions.\n"); continue; } if (VectorCost < Cost) { Cost = VectorCost; Width = i; } } DEBUG(if (ForceVectorization && Width > 1 && Cost >= ScalarCost) dbgs() << "LV: Vectorization seems to be not beneficial, " << "but was forced by a user.\n"); DEBUG(dbgs() << "LV: Selecting VF: " << Width << ".\n"); VectorizationFactor Factor = {Width, (unsigned)(Width * Cost)}; return Factor; } std::pair LoopVectorizationCostModel::getSmallestAndWidestTypes() { unsigned MinWidth = -1U; unsigned MaxWidth = 8; const DataLayout &DL = TheFunction->getParent()->getDataLayout(); // For each block. for (BasicBlock *BB : TheLoop->blocks()) { // For each instruction in the loop. for (Instruction &I : *BB) { Type *T = I.getType(); // Skip ignored values. if (ValuesToIgnore.count(&I)) continue; // Only examine Loads, Stores and PHINodes. if (!isa(I) && !isa(I) && !isa(I)) continue; // Examine PHI nodes that are reduction variables. Update the type to // account for the recurrence type. if (auto *PN = dyn_cast(&I)) { if (!Legal->isReductionVariable(PN)) continue; RecurrenceDescriptor RdxDesc = (*Legal->getReductionVars())[PN]; T = RdxDesc.getRecurrenceType(); } // Examine the stored values. if (auto *ST = dyn_cast(&I)) T = ST->getValueOperand()->getType(); // Ignore loaded pointer types and stored pointer types that are not // vectorizable. // // FIXME: The check here attempts to predict whether a load or store will // be vectorized. We only know this for certain after a VF has // been selected. Here, we assume that if an access can be // vectorized, it will be. We should also look at extending this // optimization to non-pointer types. // if (T->isPointerTy() && !isConsecutiveLoadOrStore(&I) && !Legal->isAccessInterleaved(&I) && !Legal->isLegalGatherOrScatter(&I)) continue; MinWidth = std::min(MinWidth, (unsigned)DL.getTypeSizeInBits(T->getScalarType())); MaxWidth = std::max(MaxWidth, (unsigned)DL.getTypeSizeInBits(T->getScalarType())); } } return {MinWidth, MaxWidth}; } unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, unsigned VF, unsigned LoopCost) { // -- The interleave heuristics -- // We interleave the loop in order to expose ILP and reduce the loop overhead. // There are many micro-architectural considerations that we can't predict // at this level. For example, frontend pressure (on decode or fetch) due to // code size, or the number and capabilities of the execution ports. // // We use the following heuristics to select the interleave count: // 1. If the code has reductions, then we interleave to break the cross // iteration dependency. // 2. If the loop is really small, then we interleave to reduce the loop // overhead. // 3. We don't interleave if we think that we will spill registers to memory // due to the increased register pressure. // When we optimize for size, we don't interleave. if (OptForSize) return 1; // We used the distance for the interleave count. if (Legal->getMaxSafeDepDistBytes() != -1U) return 1; // Do not interleave loops with a relatively small trip count. unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop); if (TC > 1 && TC < TinyTripCountInterleaveThreshold) return 1; unsigned TargetNumRegisters = TTI.getNumberOfRegisters(VF > 1); DEBUG(dbgs() << "LV: The target has " << TargetNumRegisters << " registers\n"); if (VF == 1) { if (ForceTargetNumScalarRegs.getNumOccurrences() > 0) TargetNumRegisters = ForceTargetNumScalarRegs; } else { if (ForceTargetNumVectorRegs.getNumOccurrences() > 0) TargetNumRegisters = ForceTargetNumVectorRegs; } RegisterUsage R = calculateRegisterUsage({VF})[0]; // We divide by these constants so assume that we have at least one // instruction that uses at least one register. R.MaxLocalUsers = std::max(R.MaxLocalUsers, 1U); R.NumInstructions = std::max(R.NumInstructions, 1U); // We calculate the interleave count using the following formula. // Subtract the number of loop invariants from the number of available // registers. These registers are used by all of the interleaved instances. // Next, divide the remaining registers by the number of registers that is // required by the loop, in order to estimate how many parallel instances // fit without causing spills. All of this is rounded down if necessary to be // a power of two. We want power of two interleave count to simplify any // addressing operations or alignment considerations. unsigned IC = PowerOf2Floor((TargetNumRegisters - R.LoopInvariantRegs) / R.MaxLocalUsers); // Don't count the induction variable as interleaved. if (EnableIndVarRegisterHeur) IC = PowerOf2Floor((TargetNumRegisters - R.LoopInvariantRegs - 1) / std::max(1U, (R.MaxLocalUsers - 1))); // Clamp the interleave ranges to reasonable counts. unsigned MaxInterleaveCount = TTI.getMaxInterleaveFactor(VF); // Check if the user has overridden the max. if (VF == 1) { if (ForceTargetMaxScalarInterleaveFactor.getNumOccurrences() > 0) MaxInterleaveCount = ForceTargetMaxScalarInterleaveFactor; } else { if (ForceTargetMaxVectorInterleaveFactor.getNumOccurrences() > 0) MaxInterleaveCount = ForceTargetMaxVectorInterleaveFactor; } // If we did not calculate the cost for VF (because the user selected the VF) // then we calculate the cost of VF here. if (LoopCost == 0) LoopCost = expectedCost(VF).first; // Clamp the calculated IC to be between the 1 and the max interleave count // that the target allows. if (IC > MaxInterleaveCount) IC = MaxInterleaveCount; else if (IC < 1) IC = 1; // Interleave if we vectorized this loop and there is a reduction that could // benefit from interleaving. if (VF > 1 && !Legal->getReductionVars()->empty()) { DEBUG(dbgs() << "LV: Interleaving because of reductions.\n"); return IC; } // Note that if we've already vectorized the loop we will have done the // runtime check and so interleaving won't require further checks. bool InterleavingRequiresRuntimePointerCheck = (VF == 1 && Legal->getRuntimePointerChecking()->Need); // We want to interleave small loops in order to reduce the loop overhead and // potentially expose ILP opportunities. DEBUG(dbgs() << "LV: Loop cost is " << LoopCost << '\n'); if (!InterleavingRequiresRuntimePointerCheck && LoopCost < SmallLoopCost) { // We assume that the cost overhead is 1 and we use the cost model // to estimate the cost of the loop and interleave until the cost of the // loop overhead is about 5% of the cost of the loop. unsigned SmallIC = std::min(IC, (unsigned)PowerOf2Floor(SmallLoopCost / LoopCost)); // Interleave until store/load ports (estimated by max interleave count) are // saturated. unsigned NumStores = Legal->getNumStores(); unsigned NumLoads = Legal->getNumLoads(); unsigned StoresIC = IC / (NumStores ? NumStores : 1); unsigned LoadsIC = IC / (NumLoads ? NumLoads : 1); // If we have a scalar reduction (vector reductions are already dealt with // by this point), we can increase the critical path length if the loop // we're interleaving is inside another loop. Limit, by default to 2, so the // critical path only gets increased by one reduction operation. if (!Legal->getReductionVars()->empty() && TheLoop->getLoopDepth() > 1) { unsigned F = static_cast(MaxNestedScalarReductionIC); SmallIC = std::min(SmallIC, F); StoresIC = std::min(StoresIC, F); LoadsIC = std::min(LoadsIC, F); } if (EnableLoadStoreRuntimeInterleave && std::max(StoresIC, LoadsIC) > SmallIC) { DEBUG(dbgs() << "LV: Interleaving to saturate store or load ports.\n"); return std::max(StoresIC, LoadsIC); } DEBUG(dbgs() << "LV: Interleaving to reduce branch cost.\n"); return SmallIC; } // Interleave if this is a large loop (small loops are already dealt with by // this point) that could benefit from interleaving. bool HasReductions = !Legal->getReductionVars()->empty(); if (TTI.enableAggressiveInterleaving(HasReductions)) { DEBUG(dbgs() << "LV: Interleaving to expose ILP.\n"); return IC; } DEBUG(dbgs() << "LV: Not Interleaving.\n"); return 1; } SmallVector LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef VFs) { // This function calculates the register usage by measuring the highest number // of values that are alive at a single location. Obviously, this is a very // rough estimation. We scan the loop in a topological order in order and // assign a number to each instruction. We use RPO to ensure that defs are // met before their users. We assume that each instruction that has in-loop // users starts an interval. We record every time that an in-loop value is // used, so we have a list of the first and last occurrences of each // instruction. Next, we transpose this data structure into a multi map that // holds the list of intervals that *end* at a specific location. This multi // map allows us to perform a linear search. We scan the instructions linearly // and record each time that a new interval starts, by placing it in a set. // If we find this value in the multi-map then we remove it from the set. // The max register usage is the maximum size of the set. // We also search for instructions that are defined outside the loop, but are // used inside the loop. We need this number separately from the max-interval // usage number because when we unroll, loop-invariant values do not take // more register. LoopBlocksDFS DFS(TheLoop); DFS.perform(LI); RegisterUsage RU; RU.NumInstructions = 0; // Each 'key' in the map opens a new interval. The values // of the map are the index of the 'last seen' usage of the // instruction that is the key. using IntervalMap = DenseMap; // Maps instruction to its index. DenseMap IdxToInstr; // Marks the end of each interval. IntervalMap EndPoint; // Saves the list of instruction indices that are used in the loop. SmallSet Ends; // Saves the list of values that are used in the loop but are // defined outside the loop, such as arguments and constants. SmallPtrSet LoopInvariants; unsigned Index = 0; for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) { RU.NumInstructions += BB->size(); for (Instruction &I : *BB) { IdxToInstr[Index++] = &I; // Save the end location of each USE. for (Value *U : I.operands()) { auto *Instr = dyn_cast(U); // Ignore non-instruction values such as arguments, constants, etc. if (!Instr) continue; // If this instruction is outside the loop then record it and continue. if (!TheLoop->contains(Instr)) { LoopInvariants.insert(Instr); continue; } // Overwrite previous end points. EndPoint[Instr] = Index; Ends.insert(Instr); } } } // Saves the list of intervals that end with the index in 'key'. using InstrList = SmallVector; DenseMap TransposeEnds; // Transpose the EndPoints to a list of values that end at each index. for (auto &Interval : EndPoint) TransposeEnds[Interval.second].push_back(Interval.first); SmallSet OpenIntervals; // Get the size of the widest register. unsigned MaxSafeDepDist = -1U; if (Legal->getMaxSafeDepDistBytes() != -1U) MaxSafeDepDist = Legal->getMaxSafeDepDistBytes() * 8; unsigned WidestRegister = std::min(TTI.getRegisterBitWidth(true), MaxSafeDepDist); const DataLayout &DL = TheFunction->getParent()->getDataLayout(); SmallVector RUs(VFs.size()); SmallVector MaxUsages(VFs.size(), 0); DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n"); // A lambda that gets the register usage for the given type and VF. auto GetRegUsage = [&DL, WidestRegister](Type *Ty, unsigned VF) { if (Ty->isTokenTy()) return 0U; unsigned TypeSize = DL.getTypeSizeInBits(Ty->getScalarType()); return std::max(1, VF * TypeSize / WidestRegister); }; for (unsigned int i = 0; i < Index; ++i) { Instruction *I = IdxToInstr[i]; // Remove all of the instructions that end at this location. InstrList &List = TransposeEnds[i]; for (Instruction *ToRemove : List) OpenIntervals.erase(ToRemove); // Ignore instructions that are never used within the loop. if (!Ends.count(I)) continue; // Skip ignored values. if (ValuesToIgnore.count(I)) continue; // For each VF find the maximum usage of registers. for (unsigned j = 0, e = VFs.size(); j < e; ++j) { if (VFs[j] == 1) { MaxUsages[j] = std::max(MaxUsages[j], OpenIntervals.size()); continue; } collectUniformsAndScalars(VFs[j]); // Count the number of live intervals. unsigned RegUsage = 0; for (auto Inst : OpenIntervals) { // Skip ignored values for VF > 1. if (VecValuesToIgnore.count(Inst) || isScalarAfterVectorization(Inst, VFs[j])) continue; RegUsage += GetRegUsage(Inst->getType(), VFs[j]); } MaxUsages[j] = std::max(MaxUsages[j], RegUsage); } DEBUG(dbgs() << "LV(REG): At #" << i << " Interval # " << OpenIntervals.size() << '\n'); // Add the current instruction to the list of open intervals. OpenIntervals.insert(I); } for (unsigned i = 0, e = VFs.size(); i < e; ++i) { unsigned Invariant = 0; if (VFs[i] == 1) Invariant = LoopInvariants.size(); else { for (auto Inst : LoopInvariants) Invariant += GetRegUsage(Inst->getType(), VFs[i]); } DEBUG(dbgs() << "LV(REG): VF = " << VFs[i] << '\n'); DEBUG(dbgs() << "LV(REG): Found max usage: " << MaxUsages[i] << '\n'); DEBUG(dbgs() << "LV(REG): Found invariant usage: " << Invariant << '\n'); DEBUG(dbgs() << "LV(REG): LoopSize: " << RU.NumInstructions << '\n'); RU.LoopInvariantRegs = Invariant; RU.MaxLocalUsers = MaxUsages[i]; RUs[i] = RU; } return RUs; } void LoopVectorizationCostModel::collectInstsToScalarize(unsigned VF) { // If we aren't vectorizing the loop, or if we've already collected the // instructions to scalarize, there's nothing to do. Collection may already // have occurred if we have a user-selected VF and are now computing the // expected cost for interleaving. if (VF < 2 || InstsToScalarize.count(VF)) return; // Initialize a mapping for VF in InstsToScalalarize. If we find that it's // not profitable to scalarize any instructions, the presence of VF in the // map will indicate that we've analyzed it already. ScalarCostsTy &ScalarCostsVF = InstsToScalarize[VF]; // Find all the instructions that are scalar with predication in the loop and // determine if it would be better to not if-convert the blocks they are in. // If so, we also record the instructions to scalarize. for (BasicBlock *BB : TheLoop->blocks()) { if (!Legal->blockNeedsPredication(BB)) continue; for (Instruction &I : *BB) if (Legal->isScalarWithPredication(&I)) { ScalarCostsTy ScalarCosts; if (computePredInstDiscount(&I, ScalarCosts, VF) >= 0) ScalarCostsVF.insert(ScalarCosts.begin(), ScalarCosts.end()); // Remember that BB will remain after vectorization. PredicatedBBsAfterVectorization.insert(BB); } } } int LoopVectorizationCostModel::computePredInstDiscount( Instruction *PredInst, DenseMap &ScalarCosts, unsigned VF) { assert(!isUniformAfterVectorization(PredInst, VF) && "Instruction marked uniform-after-vectorization will be predicated"); // Initialize the discount to zero, meaning that the scalar version and the // vector version cost the same. int Discount = 0; // Holds instructions to analyze. The instructions we visit are mapped in // ScalarCosts. Those instructions are the ones that would be scalarized if // we find that the scalar version costs less. SmallVector Worklist; // Returns true if the given instruction can be scalarized. auto canBeScalarized = [&](Instruction *I) -> bool { // We only attempt to scalarize instructions forming a single-use chain // from the original predicated block that would otherwise be vectorized. // Although not strictly necessary, we give up on instructions we know will // already be scalar to avoid traversing chains that are unlikely to be // beneficial. if (!I->hasOneUse() || PredInst->getParent() != I->getParent() || isScalarAfterVectorization(I, VF)) return false; // If the instruction is scalar with predication, it will be analyzed // separately. We ignore it within the context of PredInst. if (Legal->isScalarWithPredication(I)) return false; // If any of the instruction's operands are uniform after vectorization, // the instruction cannot be scalarized. This prevents, for example, a // masked load from being scalarized. // // We assume we will only emit a value for lane zero of an instruction // marked uniform after vectorization, rather than VF identical values. // Thus, if we scalarize an instruction that uses a uniform, we would // create uses of values corresponding to the lanes we aren't emitting code // for. This behavior can be changed by allowing getScalarValue to clone // the lane zero values for uniforms rather than asserting. for (Use &U : I->operands()) if (auto *J = dyn_cast(U.get())) if (isUniformAfterVectorization(J, VF)) return false; // Otherwise, we can scalarize the instruction. return true; }; // Returns true if an operand that cannot be scalarized must be extracted // from a vector. We will account for this scalarization overhead below. Note // that the non-void predicated instructions are placed in their own blocks, // and their return values are inserted into vectors. Thus, an extract would // still be required. auto needsExtract = [&](Instruction *I) -> bool { return TheLoop->contains(I) && !isScalarAfterVectorization(I, VF); }; // Compute the expected cost discount from scalarizing the entire expression // feeding the predicated instruction. We currently only consider expressions // that are single-use instruction chains. Worklist.push_back(PredInst); while (!Worklist.empty()) { Instruction *I = Worklist.pop_back_val(); // If we've already analyzed the instruction, there's nothing to do. if (ScalarCosts.count(I)) continue; // Compute the cost of the vector instruction. Note that this cost already // includes the scalarization overhead of the predicated instruction. unsigned VectorCost = getInstructionCost(I, VF).first; // Compute the cost of the scalarized instruction. This cost is the cost of // the instruction as if it wasn't if-converted and instead remained in the // predicated block. We will scale this cost by block probability after // computing the scalarization overhead. unsigned ScalarCost = VF * getInstructionCost(I, 1).first; // Compute the scalarization overhead of needed insertelement instructions // and phi nodes. if (Legal->isScalarWithPredication(I) && !I->getType()->isVoidTy()) { ScalarCost += TTI.getScalarizationOverhead(ToVectorTy(I->getType(), VF), true, false); ScalarCost += VF * TTI.getCFInstrCost(Instruction::PHI); } // Compute the scalarization overhead of needed extractelement // instructions. For each of the instruction's operands, if the operand can // be scalarized, add it to the worklist; otherwise, account for the // overhead. for (Use &U : I->operands()) if (auto *J = dyn_cast(U.get())) { assert(VectorType::isValidElementType(J->getType()) && "Instruction has non-scalar type"); if (canBeScalarized(J)) Worklist.push_back(J); else if (needsExtract(J)) ScalarCost += TTI.getScalarizationOverhead( ToVectorTy(J->getType(),VF), false, true); } // Scale the total scalar cost by block probability. ScalarCost /= getReciprocalPredBlockProb(); // Compute the discount. A non-negative discount means the vector version // of the instruction costs more, and scalarizing would be beneficial. Discount += VectorCost - ScalarCost; ScalarCosts[I] = ScalarCost; } return Discount; } LoopVectorizationCostModel::VectorizationCostTy LoopVectorizationCostModel::expectedCost(unsigned VF) { VectorizationCostTy Cost; // For each block. for (BasicBlock *BB : TheLoop->blocks()) { VectorizationCostTy BlockCost; // For each instruction in the old loop. for (Instruction &I : *BB) { // Skip dbg intrinsics. if (isa(I)) continue; // Skip ignored values. if (ValuesToIgnore.count(&I) || (VF > 1 && VecValuesToIgnore.count(&I))) continue; VectorizationCostTy C = getInstructionCost(&I, VF); // Check if we should override the cost. if (ForceTargetInstructionCost.getNumOccurrences() > 0) C.first = ForceTargetInstructionCost; BlockCost.first += C.first; BlockCost.second |= C.second; DEBUG(dbgs() << "LV: Found an estimated cost of " << C.first << " for VF " << VF << " For instruction: " << I << '\n'); } // If we are vectorizing a predicated block, it will have been // if-converted. This means that the block's instructions (aside from // stores and instructions that may divide by zero) will now be // unconditionally executed. For the scalar case, we may not always execute // the predicated block. Thus, scale the block's cost by the probability of // executing it. if (VF == 1 && Legal->blockNeedsPredication(BB)) BlockCost.first /= getReciprocalPredBlockProb(); Cost.first += BlockCost.first; Cost.second |= BlockCost.second; } return Cost; } /// \brief Gets Address Access SCEV after verifying that the access pattern /// is loop invariant except the induction variable dependence. /// /// This SCEV can be sent to the Target in order to estimate the address /// calculation cost. static const SCEV *getAddressAccessSCEV( Value *Ptr, LoopVectorizationLegality *Legal, PredicatedScalarEvolution &PSE, const Loop *TheLoop) { auto *Gep = dyn_cast(Ptr); if (!Gep) return nullptr; // We are looking for a gep with all loop invariant indices except for one // which should be an induction variable. auto SE = PSE.getSE(); unsigned NumOperands = Gep->getNumOperands(); for (unsigned i = 1; i < NumOperands; ++i) { Value *Opd = Gep->getOperand(i); if (!SE->isLoopInvariant(SE->getSCEV(Opd), TheLoop) && !Legal->isInductionVariable(Opd)) return nullptr; } // Now we know we have a GEP ptr, %inv, %ind, %inv. return the Ptr SCEV. return PSE.getSCEV(Ptr); } static bool isStrideMul(Instruction *I, LoopVectorizationLegality *Legal) { return Legal->hasStride(I->getOperand(0)) || Legal->hasStride(I->getOperand(1)); } unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I, unsigned VF) { Type *ValTy = getMemInstValueType(I); auto SE = PSE.getSE(); unsigned Alignment = getMemInstAlignment(I); unsigned AS = getMemInstAddressSpace(I); Value *Ptr = getPointerOperand(I); Type *PtrTy = ToVectorTy(Ptr->getType(), VF); // Figure out whether the access is strided and get the stride value // if it's known in compile time const SCEV *PtrSCEV = getAddressAccessSCEV(Ptr, Legal, PSE, TheLoop); // Get the cost of the scalar memory instruction and address computation. unsigned Cost = VF * TTI.getAddressComputationCost(PtrTy, SE, PtrSCEV); Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(), Alignment, AS, I); // Get the overhead of the extractelement and insertelement instructions // we might create due to scalarization. Cost += getScalarizationOverhead(I, VF, TTI); // If we have a predicated store, it may not be executed for each vector // lane. Scale the cost by the probability of executing the predicated // block. if (Legal->isScalarWithPredication(I)) Cost /= getReciprocalPredBlockProb(); return Cost; } unsigned LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I, unsigned VF) { Type *ValTy = getMemInstValueType(I); Type *VectorTy = ToVectorTy(ValTy, VF); unsigned Alignment = getMemInstAlignment(I); Value *Ptr = getPointerOperand(I); unsigned AS = getMemInstAddressSpace(I); int ConsecutiveStride = Legal->isConsecutivePtr(Ptr); assert((ConsecutiveStride == 1 || ConsecutiveStride == -1) && "Stride should be 1 or -1 for consecutive memory access"); unsigned Cost = 0; if (Legal->isMaskRequired(I)) Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS); else Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS, I); bool Reverse = ConsecutiveStride < 0; if (Reverse) Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, 0); return Cost; } unsigned LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I, unsigned VF) { LoadInst *LI = cast(I); Type *ValTy = LI->getType(); Type *VectorTy = ToVectorTy(ValTy, VF); unsigned Alignment = LI->getAlignment(); unsigned AS = LI->getPointerAddressSpace(); return TTI.getAddressComputationCost(ValTy) + TTI.getMemoryOpCost(Instruction::Load, ValTy, Alignment, AS) + TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VectorTy); } unsigned LoopVectorizationCostModel::getGatherScatterCost(Instruction *I, unsigned VF) { Type *ValTy = getMemInstValueType(I); Type *VectorTy = ToVectorTy(ValTy, VF); unsigned Alignment = getMemInstAlignment(I); Value *Ptr = getPointerOperand(I); return TTI.getAddressComputationCost(VectorTy) + TTI.getGatherScatterOpCost(I->getOpcode(), VectorTy, Ptr, Legal->isMaskRequired(I), Alignment); } unsigned LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I, unsigned VF) { Type *ValTy = getMemInstValueType(I); Type *VectorTy = ToVectorTy(ValTy, VF); unsigned AS = getMemInstAddressSpace(I); auto Group = Legal->getInterleavedAccessGroup(I); assert(Group && "Fail to get an interleaved access group."); unsigned InterleaveFactor = Group->getFactor(); Type *WideVecTy = VectorType::get(ValTy, VF * InterleaveFactor); // Holds the indices of existing members in an interleaved load group. // An interleaved store group doesn't need this as it doesn't allow gaps. SmallVector Indices; if (isa(I)) { for (unsigned i = 0; i < InterleaveFactor; i++) if (Group->getMember(i)) Indices.push_back(i); } // Calculate the cost of the whole interleaved group. unsigned Cost = TTI.getInterleavedMemoryOpCost(I->getOpcode(), WideVecTy, Group->getFactor(), Indices, Group->getAlignment(), AS); if (Group->isReverse()) Cost += Group->getNumMembers() * TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, 0); return Cost; } unsigned LoopVectorizationCostModel::getMemoryInstructionCost(Instruction *I, unsigned VF) { // Calculate scalar cost only. Vectorization cost should be ready at this // moment. if (VF == 1) { Type *ValTy = getMemInstValueType(I); unsigned Alignment = getMemInstAlignment(I); unsigned AS = getMemInstAddressSpace(I); return TTI.getAddressComputationCost(ValTy) + TTI.getMemoryOpCost(I->getOpcode(), ValTy, Alignment, AS, I); } return getWideningCost(I, VF); } LoopVectorizationCostModel::VectorizationCostTy LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { // If we know that this instruction will remain uniform, check the cost of // the scalar version. if (isUniformAfterVectorization(I, VF)) VF = 1; if (VF > 1 && isProfitableToScalarize(I, VF)) return VectorizationCostTy(InstsToScalarize[VF][I], false); // Forced scalars do not have any scalarization overhead. if (VF > 1 && ForcedScalars.count(VF) && ForcedScalars.find(VF)->second.count(I)) return VectorizationCostTy((getInstructionCost(I, 1).first * VF), false); Type *VectorTy; unsigned C = getInstructionCost(I, VF, VectorTy); bool TypeNotScalarized = VF > 1 && VectorTy->isVectorTy() && TTI.getNumberOfParts(VectorTy) < VF; return VectorizationCostTy(C, TypeNotScalarized); } void LoopVectorizationCostModel::setCostBasedWideningDecision(unsigned VF) { if (VF == 1) return; for (BasicBlock *BB : TheLoop->blocks()) { // For each instruction in the old loop. for (Instruction &I : *BB) { Value *Ptr = getPointerOperand(&I); if (!Ptr) continue; if (isa(&I) && Legal->isUniform(Ptr)) { // Scalar load + broadcast unsigned Cost = getUniformMemOpCost(&I, VF); setWideningDecision(&I, VF, CM_Scalarize, Cost); continue; } // We assume that widening is the best solution when possible. if (Legal->memoryInstructionCanBeWidened(&I, VF)) { unsigned Cost = getConsecutiveMemOpCost(&I, VF); int ConsecutiveStride = Legal->isConsecutivePtr(getPointerOperand(&I)); assert((ConsecutiveStride == 1 || ConsecutiveStride == -1) && "Expected consecutive stride."); InstWidening Decision = ConsecutiveStride == 1 ? CM_Widen : CM_Widen_Reverse; setWideningDecision(&I, VF, Decision, Cost); continue; } // Choose between Interleaving, Gather/Scatter or Scalarization. unsigned InterleaveCost = std::numeric_limits::max(); unsigned NumAccesses = 1; if (Legal->isAccessInterleaved(&I)) { auto Group = Legal->getInterleavedAccessGroup(&I); assert(Group && "Fail to get an interleaved access group."); // Make one decision for the whole group. if (getWideningDecision(&I, VF) != CM_Unknown) continue; NumAccesses = Group->getNumMembers(); InterleaveCost = getInterleaveGroupCost(&I, VF); } unsigned GatherScatterCost = Legal->isLegalGatherOrScatter(&I) ? getGatherScatterCost(&I, VF) * NumAccesses : std::numeric_limits::max(); unsigned ScalarizationCost = getMemInstScalarizationCost(&I, VF) * NumAccesses; // Choose better solution for the current VF, // write down this decision and use it during vectorization. unsigned Cost; InstWidening Decision; if (InterleaveCost <= GatherScatterCost && InterleaveCost < ScalarizationCost) { Decision = CM_Interleave; Cost = InterleaveCost; } else if (GatherScatterCost < ScalarizationCost) { Decision = CM_GatherScatter; Cost = GatherScatterCost; } else { Decision = CM_Scalarize; Cost = ScalarizationCost; } // If the instructions belongs to an interleave group, the whole group // receives the same decision. The whole group receives the cost, but // the cost will actually be assigned to one instruction. if (auto Group = Legal->getInterleavedAccessGroup(&I)) setWideningDecision(Group, VF, Decision, Cost); else setWideningDecision(&I, VF, Decision, Cost); } } // Make sure that any load of address and any other address computation // remains scalar unless there is gather/scatter support. This avoids // inevitable extracts into address registers, and also has the benefit of // activating LSR more, since that pass can't optimize vectorized // addresses. if (TTI.prefersVectorizedAddressing()) return; // Start with all scalar pointer uses. SmallPtrSet AddrDefs; for (BasicBlock *BB : TheLoop->blocks()) for (Instruction &I : *BB) { Instruction *PtrDef = dyn_cast_or_null(getPointerOperand(&I)); if (PtrDef && TheLoop->contains(PtrDef) && getWideningDecision(&I, VF) != CM_GatherScatter) AddrDefs.insert(PtrDef); } // Add all instructions used to generate the addresses. SmallVector Worklist; for (auto *I : AddrDefs) Worklist.push_back(I); while (!Worklist.empty()) { Instruction *I = Worklist.pop_back_val(); for (auto &Op : I->operands()) if (auto *InstOp = dyn_cast(Op)) if ((InstOp->getParent() == I->getParent()) && !isa(InstOp) && AddrDefs.insert(InstOp).second) Worklist.push_back(InstOp); } for (auto *I : AddrDefs) { if (isa(I)) { // Setting the desired widening decision should ideally be handled in // by cost functions, but since this involves the task of finding out // if the loaded register is involved in an address computation, it is // instead changed here when we know this is the case. InstWidening Decision = getWideningDecision(I, VF); if (Decision == CM_Widen || Decision == CM_Widen_Reverse) // Scalarize a widened load of address. setWideningDecision(I, VF, CM_Scalarize, (VF * getMemoryInstructionCost(I, 1))); else if (auto Group = Legal->getInterleavedAccessGroup(I)) { // Scalarize an interleave group of address loads. for (unsigned I = 0; I < Group->getFactor(); ++I) { if (Instruction *Member = Group->getMember(I)) setWideningDecision(Member, VF, CM_Scalarize, (VF * getMemoryInstructionCost(Member, 1))); } } } else // Make sure I gets scalarized and a cost estimate without // scalarization overhead. ForcedScalars[VF].insert(I); } } unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF, Type *&VectorTy) { Type *RetTy = I->getType(); if (canTruncateToMinimalBitwidth(I, VF)) RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]); VectorTy = isScalarAfterVectorization(I, VF) ? RetTy : ToVectorTy(RetTy, VF); auto SE = PSE.getSE(); // TODO: We need to estimate the cost of intrinsic calls. switch (I->getOpcode()) { case Instruction::GetElementPtr: // We mark this instruction as zero-cost because the cost of GEPs in // vectorized code depends on whether the corresponding memory instruction // is scalarized or not. Therefore, we handle GEPs with the memory // instruction cost. return 0; case Instruction::Br: { // In cases of scalarized and predicated instructions, there will be VF // predicated blocks in the vectorized loop. Each branch around these // blocks requires also an extract of its vector compare i1 element. bool ScalarPredicatedBB = false; BranchInst *BI = cast(I); if (VF > 1 && BI->isConditional() && (PredicatedBBsAfterVectorization.count(BI->getSuccessor(0)) || PredicatedBBsAfterVectorization.count(BI->getSuccessor(1)))) ScalarPredicatedBB = true; if (ScalarPredicatedBB) { // Return cost for branches around scalarized and predicated blocks. Type *Vec_i1Ty = VectorType::get(IntegerType::getInt1Ty(RetTy->getContext()), VF); return (TTI.getScalarizationOverhead(Vec_i1Ty, false, true) + (TTI.getCFInstrCost(Instruction::Br) * VF)); } else if (I->getParent() == TheLoop->getLoopLatch() || VF == 1) // The back-edge branch will remain, as will all scalar branches. return TTI.getCFInstrCost(Instruction::Br); else // This branch will be eliminated by if-conversion. return 0; // Note: We currently assume zero cost for an unconditional branch inside // a predicated block since it will become a fall-through, although we // may decide in the future to call TTI for all branches. } case Instruction::PHI: { auto *Phi = cast(I); // First-order recurrences are replaced by vector shuffles inside the loop. if (VF > 1 && Legal->isFirstOrderRecurrence(Phi)) return TTI.getShuffleCost(TargetTransformInfo::SK_ExtractSubvector, VectorTy, VF - 1, VectorTy); // Phi nodes in non-header blocks (not inductions, reductions, etc.) are // converted into select instructions. We require N - 1 selects per phi // node, where N is the number of incoming values. if (VF > 1 && Phi->getParent() != TheLoop->getHeader()) return (Phi->getNumIncomingValues() - 1) * TTI.getCmpSelInstrCost( Instruction::Select, ToVectorTy(Phi->getType(), VF), ToVectorTy(Type::getInt1Ty(Phi->getContext()), VF)); return TTI.getCFInstrCost(Instruction::PHI); } case Instruction::UDiv: case Instruction::SDiv: case Instruction::URem: case Instruction::SRem: // If we have a predicated instruction, it may not be executed for each // vector lane. Get the scalarization cost and scale this amount by the // probability of executing the predicated block. If the instruction is not // predicated, we fall through to the next case. if (VF > 1 && Legal->isScalarWithPredication(I)) { unsigned Cost = 0; // These instructions have a non-void type, so account for the phi nodes // that we will create. This cost is likely to be zero. The phi node // cost, if any, should be scaled by the block probability because it // models a copy at the end of each predicated block. Cost += VF * TTI.getCFInstrCost(Instruction::PHI); // The cost of the non-predicated instruction. Cost += VF * TTI.getArithmeticInstrCost(I->getOpcode(), RetTy); // The cost of insertelement and extractelement instructions needed for // scalarization. Cost += getScalarizationOverhead(I, VF, TTI); // Scale the cost by the probability of executing the predicated blocks. // This assumes the predicated block for each vector lane is equally // likely. return Cost / getReciprocalPredBlockProb(); } LLVM_FALLTHROUGH; case Instruction::Add: case Instruction::FAdd: case Instruction::Sub: case Instruction::FSub: case Instruction::Mul: case Instruction::FMul: case Instruction::FDiv: case Instruction::FRem: case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: case Instruction::And: case Instruction::Or: case Instruction::Xor: { // Since we will replace the stride by 1 the multiplication should go away. if (I->getOpcode() == Instruction::Mul && isStrideMul(I, Legal)) return 0; // Certain instructions can be cheaper to vectorize if they have a constant // second vector operand. One example of this are shifts on x86. TargetTransformInfo::OperandValueKind Op1VK = TargetTransformInfo::OK_AnyValue; TargetTransformInfo::OperandValueKind Op2VK = TargetTransformInfo::OK_AnyValue; TargetTransformInfo::OperandValueProperties Op1VP = TargetTransformInfo::OP_None; TargetTransformInfo::OperandValueProperties Op2VP = TargetTransformInfo::OP_None; Value *Op2 = I->getOperand(1); // Check for a splat or for a non uniform vector of constants. if (isa(Op2)) { ConstantInt *CInt = cast(Op2); if (CInt && CInt->getValue().isPowerOf2()) Op2VP = TargetTransformInfo::OP_PowerOf2; Op2VK = TargetTransformInfo::OK_UniformConstantValue; } else if (isa(Op2) || isa(Op2)) { Op2VK = TargetTransformInfo::OK_NonUniformConstantValue; Constant *SplatValue = cast(Op2)->getSplatValue(); if (SplatValue) { ConstantInt *CInt = dyn_cast(SplatValue); if (CInt && CInt->getValue().isPowerOf2()) Op2VP = TargetTransformInfo::OP_PowerOf2; Op2VK = TargetTransformInfo::OK_UniformConstantValue; } } else if (Legal->isUniform(Op2)) { Op2VK = TargetTransformInfo::OK_UniformValue; } SmallVector Operands(I->operand_values()); unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1; return N * TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK, Op2VK, Op1VP, Op2VP, Operands); } case Instruction::Select: { SelectInst *SI = cast(I); const SCEV *CondSCEV = SE->getSCEV(SI->getCondition()); bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop)); Type *CondTy = SI->getCondition()->getType(); if (!ScalarCond) CondTy = VectorType::get(CondTy, VF); return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy, I); } case Instruction::ICmp: case Instruction::FCmp: { Type *ValTy = I->getOperand(0)->getType(); Instruction *Op0AsInstruction = dyn_cast(I->getOperand(0)); if (canTruncateToMinimalBitwidth(Op0AsInstruction, VF)) ValTy = IntegerType::get(ValTy->getContext(), MinBWs[Op0AsInstruction]); VectorTy = ToVectorTy(ValTy, VF); return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, nullptr, I); } case Instruction::Store: case Instruction::Load: { unsigned Width = VF; if (Width > 1) { InstWidening Decision = getWideningDecision(I, Width); assert(Decision != CM_Unknown && "CM decision should be taken at this point"); if (Decision == CM_Scalarize) Width = 1; } VectorTy = ToVectorTy(getMemInstValueType(I), Width); return getMemoryInstructionCost(I, VF); } case Instruction::ZExt: case Instruction::SExt: case Instruction::FPToUI: case Instruction::FPToSI: case Instruction::FPExt: case Instruction::PtrToInt: case Instruction::IntToPtr: case Instruction::SIToFP: case Instruction::UIToFP: case Instruction::Trunc: case Instruction::FPTrunc: case Instruction::BitCast: { // We optimize the truncation of induction variables having constant // integer steps. The cost of these truncations is the same as the scalar // operation. if (isOptimizableIVTruncate(I, VF)) { auto *Trunc = cast(I); return TTI.getCastInstrCost(Instruction::Trunc, Trunc->getDestTy(), Trunc->getSrcTy(), Trunc); } Type *SrcScalarTy = I->getOperand(0)->getType(); Type *SrcVecTy = VectorTy->isVectorTy() ? ToVectorTy(SrcScalarTy, VF) : SrcScalarTy; if (canTruncateToMinimalBitwidth(I, VF)) { // This cast is going to be shrunk. This may remove the cast or it might // turn it into slightly different cast. For example, if MinBW == 16, // "zext i8 %1 to i32" becomes "zext i8 %1 to i16". // // Calculate the modified src and dest types. Type *MinVecTy = VectorTy; if (I->getOpcode() == Instruction::Trunc) { SrcVecTy = smallestIntegerVectorType(SrcVecTy, MinVecTy); VectorTy = largestIntegerVectorType(ToVectorTy(I->getType(), VF), MinVecTy); } else if (I->getOpcode() == Instruction::ZExt || I->getOpcode() == Instruction::SExt) { SrcVecTy = largestIntegerVectorType(SrcVecTy, MinVecTy); VectorTy = smallestIntegerVectorType(ToVectorTy(I->getType(), VF), MinVecTy); } } unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1; return N * TTI.getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy, I); } case Instruction::Call: { bool NeedToScalarize; CallInst *CI = cast(I); unsigned CallCost = getVectorCallCost(CI, VF, TTI, TLI, NeedToScalarize); if (getVectorIntrinsicIDForCall(CI, TLI)) return std::min(CallCost, getVectorIntrinsicCost(CI, VF, TTI, TLI)); return CallCost; } default: // The cost of executing VF copies of the scalar instruction. This opcode // is unknown. Assume that it is the same as 'mul'. return VF * TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy) + getScalarizationOverhead(I, VF, TTI); } // end of switch. } char LoopVectorize::ID = 0; static const char lv_name[] = "Loop Vectorization"; INITIALIZE_PASS_BEGIN(LoopVectorize, LV_NAME, lv_name, false, false) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis) INITIALIZE_PASS_DEPENDENCY(DemandedBitsWrapperPass) INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false) namespace llvm { Pass *createLoopVectorizePass(bool NoUnrolling, bool AlwaysVectorize) { return new LoopVectorize(NoUnrolling, AlwaysVectorize); } } // end namespace llvm bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) { // Check if the pointer operand of a load or store instruction is // consecutive. if (auto *Ptr = getPointerOperand(Inst)) return Legal->isConsecutivePtr(Ptr); return false; } void LoopVectorizationCostModel::collectValuesToIgnore() { // Ignore ephemeral values. CodeMetrics::collectEphemeralValues(TheLoop, AC, ValuesToIgnore); // Ignore type-promoting instructions we identified during reduction // detection. for (auto &Reduction : *Legal->getReductionVars()) { RecurrenceDescriptor &RedDes = Reduction.second; SmallPtrSetImpl &Casts = RedDes.getCastInsts(); VecValuesToIgnore.insert(Casts.begin(), Casts.end()); } // Ignore type-casting instructions we identified during induction // detection. for (auto &Induction : *Legal->getInductionVars()) { InductionDescriptor &IndDes = Induction.second; const SmallVectorImpl &Casts = IndDes.getCastInsts(); VecValuesToIgnore.insert(Casts.begin(), Casts.end()); } } LoopVectorizationCostModel::VectorizationFactor LoopVectorizationPlanner::plan(bool OptForSize, unsigned UserVF) { // Width 1 means no vectorize, cost 0 means uncomputed cost. const LoopVectorizationCostModel::VectorizationFactor NoVectorization = {1U, 0U}; Optional MaybeMaxVF = CM.computeMaxVF(OptForSize); if (!MaybeMaxVF.hasValue()) // Cases considered too costly to vectorize. return NoVectorization; if (UserVF) { DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n"); assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two"); // Collect the instructions (and their associated costs) that will be more // profitable to scalarize. CM.selectUserVectorizationFactor(UserVF); buildVPlans(UserVF, UserVF); DEBUG(printPlans(dbgs())); return {UserVF, 0}; } unsigned MaxVF = MaybeMaxVF.getValue(); assert(MaxVF != 0 && "MaxVF is zero."); for (unsigned VF = 1; VF <= MaxVF; VF *= 2) { // Collect Uniform and Scalar instructions after vectorization with VF. CM.collectUniformsAndScalars(VF); // Collect the instructions (and their associated costs) that will be more // profitable to scalarize. if (VF > 1) CM.collectInstsToScalarize(VF); } buildVPlans(1, MaxVF); DEBUG(printPlans(dbgs())); if (MaxVF == 1) return NoVectorization; // Select the optimal vectorization factor. return CM.selectVectorizationFactor(MaxVF); } void LoopVectorizationPlanner::setBestPlan(unsigned VF, unsigned UF) { DEBUG(dbgs() << "Setting best plan to VF=" << VF << ", UF=" << UF << '\n'); BestVF = VF; BestUF = UF; erase_if(VPlans, [VF](const VPlanPtr &Plan) { return !Plan->hasVF(VF); }); assert(VPlans.size() == 1 && "Best VF has not a single VPlan."); } void LoopVectorizationPlanner::executePlan(InnerLoopVectorizer &ILV, DominatorTree *DT) { // Perform the actual loop transformation. // 1. Create a new empty loop. Unlink the old loop and connect the new one. VPCallbackILV CallbackILV(ILV); VPTransformState State{BestVF, BestUF, LI, DT, ILV.Builder, ILV.VectorLoopValueMap, &ILV, CallbackILV}; State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton(); //===------------------------------------------------===// // // Notice: any optimization or new instruction that go // into the code below should also be implemented in // the cost-model. // //===------------------------------------------------===// // 2. Copy and widen instructions from the old loop into the new loop. assert(VPlans.size() == 1 && "Not a single VPlan to execute."); VPlans.front()->execute(&State); // 3. Fix the vectorized code: take care of header phi's, live-outs, // predication, updating analyses. ILV.fixVectorizedLoop(); } void LoopVectorizationPlanner::collectTriviallyDeadInstructions( SmallPtrSetImpl &DeadInstructions) { BasicBlock *Latch = OrigLoop->getLoopLatch(); // We create new control-flow for the vectorized loop, so the original // condition will be dead after vectorization if it's only used by the // branch. auto *Cmp = dyn_cast(Latch->getTerminator()->getOperand(0)); if (Cmp && Cmp->hasOneUse()) DeadInstructions.insert(Cmp); // We create new "steps" for induction variable updates to which the original // induction variables map. An original update instruction will be dead if // all its users except the induction variable are dead. for (auto &Induction : *Legal->getInductionVars()) { PHINode *Ind = Induction.first; auto *IndUpdate = cast(Ind->getIncomingValueForBlock(Latch)); if (llvm::all_of(IndUpdate->users(), [&](User *U) -> bool { return U == Ind || DeadInstructions.count(cast(U)); })) DeadInstructions.insert(IndUpdate); // We record as "Dead" also the type-casting instructions we had identified // during induction analysis. We don't need any handling for them in the // vectorized loop because we have proven that, under a proper runtime // test guarding the vectorized loop, the value of the phi, and the casted // value of the phi, are the same. The last instruction in this casting chain // will get its scalar/vector/widened def from the scalar/vector/widened def // of the respective phi node. Any other casts in the induction def-use chain // have no other uses outside the phi update chain, and will be ignored. InductionDescriptor &IndDes = Induction.second; const SmallVectorImpl &Casts = IndDes.getCastInsts(); DeadInstructions.insert(Casts.begin(), Casts.end()); } } Value *InnerLoopUnroller::reverseVector(Value *Vec) { return Vec; } Value *InnerLoopUnroller::getBroadcastInstrs(Value *V) { return V; } Value *InnerLoopUnroller::getStepVector(Value *Val, int StartIdx, Value *Step, Instruction::BinaryOps BinOp) { // When unrolling and the VF is 1, we only need to add a simple scalar. Type *Ty = Val->getType(); assert(!Ty->isVectorTy() && "Val must be a scalar"); if (Ty->isFloatingPointTy()) { Constant *C = ConstantFP::get(Ty, (double)StartIdx); // Floating point operations had to be 'fast' to enable the unrolling. Value *MulOp = addFastMathFlag(Builder.CreateFMul(C, Step)); return addFastMathFlag(Builder.CreateBinOp(BinOp, Val, MulOp)); } Constant *C = ConstantInt::get(Ty, StartIdx); return Builder.CreateAdd(Val, Builder.CreateMul(C, Step), "induction"); } static void AddRuntimeUnrollDisableMetaData(Loop *L) { SmallVector MDs; // Reserve first location for self reference to the LoopID metadata node. MDs.push_back(nullptr); bool IsUnrollMetadata = false; MDNode *LoopID = L->getLoopID(); if (LoopID) { // First find existing loop unrolling disable metadata. for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { auto *MD = dyn_cast(LoopID->getOperand(i)); if (MD) { const auto *S = dyn_cast(MD->getOperand(0)); IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll.disable"); } MDs.push_back(LoopID->getOperand(i)); } } if (!IsUnrollMetadata) { // Add runtime unroll disable metadata. LLVMContext &Context = L->getHeader()->getContext(); SmallVector DisableOperands; DisableOperands.push_back( MDString::get(Context, "llvm.loop.unroll.runtime.disable")); MDNode *DisableNode = MDNode::get(Context, DisableOperands); MDs.push_back(DisableNode); MDNode *NewLoopID = MDNode::get(Context, MDs); // Set operand 0 to refer to the loop id itself. NewLoopID->replaceOperandWith(0, NewLoopID); L->setLoopID(NewLoopID); } } bool LoopVectorizationPlanner::getDecisionAndClampRange( const std::function &Predicate, VFRange &Range) { assert(Range.End > Range.Start && "Trying to test an empty VF range."); bool PredicateAtRangeStart = Predicate(Range.Start); for (unsigned TmpVF = Range.Start * 2; TmpVF < Range.End; TmpVF *= 2) if (Predicate(TmpVF) != PredicateAtRangeStart) { Range.End = TmpVF; break; } return PredicateAtRangeStart; } /// Build VPlans for the full range of feasible VF's = {\p MinVF, 2 * \p MinVF, /// 4 * \p MinVF, ..., \p MaxVF} by repeatedly building a VPlan for a sub-range /// of VF's starting at a given VF and extending it as much as possible. Each /// vectorization decision can potentially shorten this sub-range during /// buildVPlan(). void LoopVectorizationPlanner::buildVPlans(unsigned MinVF, unsigned MaxVF) { // Collect conditions feeding internal conditional branches; they need to be // represented in VPlan for it to model masking. SmallPtrSet NeedDef; auto *Latch = OrigLoop->getLoopLatch(); for (BasicBlock *BB : OrigLoop->blocks()) { if (BB == Latch) continue; BranchInst *Branch = dyn_cast(BB->getTerminator()); if (Branch && Branch->isConditional()) NeedDef.insert(Branch->getCondition()); } for (unsigned VF = MinVF; VF < MaxVF + 1;) { VFRange SubRange = {VF, MaxVF + 1}; VPlans.push_back(buildVPlan(SubRange, NeedDef)); VF = SubRange.End; } } VPValue *LoopVectorizationPlanner::createEdgeMask(BasicBlock *Src, BasicBlock *Dst, VPlanPtr &Plan) { assert(is_contained(predecessors(Dst), Src) && "Invalid edge"); // Look for cached value. std::pair Edge(Src, Dst); EdgeMaskCacheTy::iterator ECEntryIt = EdgeMaskCache.find(Edge); if (ECEntryIt != EdgeMaskCache.end()) return ECEntryIt->second; VPValue *SrcMask = createBlockInMask(Src, Plan); // The terminator has to be a branch inst! BranchInst *BI = dyn_cast(Src->getTerminator()); assert(BI && "Unexpected terminator found"); if (!BI->isConditional()) return EdgeMaskCache[Edge] = SrcMask; VPValue *EdgeMask = Plan->getVPValue(BI->getCondition()); assert(EdgeMask && "No Edge Mask found for condition"); if (BI->getSuccessor(0) != Dst) EdgeMask = Builder.createNot(EdgeMask); if (SrcMask) // Otherwise block in-mask is all-one, no need to AND. EdgeMask = Builder.createAnd(EdgeMask, SrcMask); return EdgeMaskCache[Edge] = EdgeMask; } VPValue *LoopVectorizationPlanner::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) { assert(OrigLoop->contains(BB) && "Block is not a part of a loop"); // Look for cached value. BlockMaskCacheTy::iterator BCEntryIt = BlockMaskCache.find(BB); if (BCEntryIt != BlockMaskCache.end()) return BCEntryIt->second; // All-one mask is modelled as no-mask following the convention for masked // load/store/gather/scatter. Initialize BlockMask to no-mask. VPValue *BlockMask = nullptr; // Loop incoming mask is all-one. if (OrigLoop->getHeader() == BB) return BlockMaskCache[BB] = BlockMask; // This is the block mask. We OR all incoming edges. for (auto *Predecessor : predecessors(BB)) { VPValue *EdgeMask = createEdgeMask(Predecessor, BB, Plan); if (!EdgeMask) // Mask of predecessor is all-one so mask of block is too. return BlockMaskCache[BB] = EdgeMask; if (!BlockMask) { // BlockMask has its initialized nullptr value. BlockMask = EdgeMask; continue; } BlockMask = Builder.createOr(BlockMask, EdgeMask); } return BlockMaskCache[BB] = BlockMask; } VPInterleaveRecipe * LoopVectorizationPlanner::tryToInterleaveMemory(Instruction *I, VFRange &Range) { const InterleaveGroup *IG = Legal->getInterleavedAccessGroup(I); if (!IG) return nullptr; // Now check if IG is relevant for VF's in the given range. auto isIGMember = [&](Instruction *I) -> std::function { return [=](unsigned VF) -> bool { return (VF >= 2 && // Query is illegal for VF == 1 CM.getWideningDecision(I, VF) == LoopVectorizationCostModel::CM_Interleave); }; }; if (!getDecisionAndClampRange(isIGMember(I), Range)) return nullptr; // I is a member of an InterleaveGroup for VF's in the (possibly trimmed) // range. If it's the primary member of the IG construct a VPInterleaveRecipe. // Otherwise, it's an adjunct member of the IG, do not construct any Recipe. assert(I == IG->getInsertPos() && "Generating a recipe for an adjunct member of an interleave group"); return new VPInterleaveRecipe(IG); } VPWidenMemoryInstructionRecipe * LoopVectorizationPlanner::tryToWidenMemory(Instruction *I, VFRange &Range, VPlanPtr &Plan) { if (!isa(I) && !isa(I)) return nullptr; auto willWiden = [&](unsigned VF) -> bool { if (VF == 1) return false; if (CM.isScalarAfterVectorization(I, VF) || CM.isProfitableToScalarize(I, VF)) return false; LoopVectorizationCostModel::InstWidening Decision = CM.getWideningDecision(I, VF); assert(Decision != LoopVectorizationCostModel::CM_Unknown && "CM decision should be taken at this point."); assert(Decision != LoopVectorizationCostModel::CM_Interleave && "Interleave memory opportunity should be caught earlier."); return Decision != LoopVectorizationCostModel::CM_Scalarize; }; if (!getDecisionAndClampRange(willWiden, Range)) return nullptr; VPValue *Mask = nullptr; if (Legal->isMaskRequired(I)) Mask = createBlockInMask(I->getParent(), Plan); return new VPWidenMemoryInstructionRecipe(*I, Mask); } VPWidenIntOrFpInductionRecipe * LoopVectorizationPlanner::tryToOptimizeInduction(Instruction *I, VFRange &Range) { if (PHINode *Phi = dyn_cast(I)) { // Check if this is an integer or fp induction. If so, build the recipe that // produces its scalar and vector values. InductionDescriptor II = Legal->getInductionVars()->lookup(Phi); if (II.getKind() == InductionDescriptor::IK_IntInduction || II.getKind() == InductionDescriptor::IK_FpInduction) return new VPWidenIntOrFpInductionRecipe(Phi); return nullptr; } // Optimize the special case where the source is a constant integer // induction variable. Notice that we can only optimize the 'trunc' case // because (a) FP conversions lose precision, (b) sext/zext may wrap, and // (c) other casts depend on pointer size. // Determine whether \p K is a truncation based on an induction variable that // can be optimized. auto isOptimizableIVTruncate = [&](Instruction *K) -> std::function { return [=](unsigned VF) -> bool { return CM.isOptimizableIVTruncate(K, VF); }; }; if (isa(I) && getDecisionAndClampRange(isOptimizableIVTruncate(I), Range)) return new VPWidenIntOrFpInductionRecipe(cast(I->getOperand(0)), cast(I)); return nullptr; } VPBlendRecipe * LoopVectorizationPlanner::tryToBlend(Instruction *I, VPlanPtr &Plan) { PHINode *Phi = dyn_cast(I); if (!Phi || Phi->getParent() == OrigLoop->getHeader()) return nullptr; // We know that all PHIs in non-header blocks are converted into selects, so // we don't have to worry about the insertion order and we can just use the // builder. At this point we generate the predication tree. There may be // duplications since this is a simple recursive scan, but future // optimizations will clean it up. SmallVector Masks; unsigned NumIncoming = Phi->getNumIncomingValues(); for (unsigned In = 0; In < NumIncoming; In++) { VPValue *EdgeMask = createEdgeMask(Phi->getIncomingBlock(In), Phi->getParent(), Plan); assert((EdgeMask || NumIncoming == 1) && "Multiple predecessors with one having a full mask"); if (EdgeMask) Masks.push_back(EdgeMask); } return new VPBlendRecipe(Phi, Masks); } bool LoopVectorizationPlanner::tryToWiden(Instruction *I, VPBasicBlock *VPBB, VFRange &Range) { if (Legal->isScalarWithPredication(I)) return false; auto IsVectorizableOpcode = [](unsigned Opcode) { switch (Opcode) { case Instruction::Add: case Instruction::And: case Instruction::AShr: case Instruction::BitCast: case Instruction::Br: case Instruction::Call: case Instruction::FAdd: case Instruction::FCmp: case Instruction::FDiv: case Instruction::FMul: case Instruction::FPExt: case Instruction::FPToSI: case Instruction::FPToUI: case Instruction::FPTrunc: case Instruction::FRem: case Instruction::FSub: case Instruction::GetElementPtr: case Instruction::ICmp: case Instruction::IntToPtr: case Instruction::Load: case Instruction::LShr: case Instruction::Mul: case Instruction::Or: case Instruction::PHI: case Instruction::PtrToInt: case Instruction::SDiv: case Instruction::Select: case Instruction::SExt: case Instruction::Shl: case Instruction::SIToFP: case Instruction::SRem: case Instruction::Store: case Instruction::Sub: case Instruction::Trunc: case Instruction::UDiv: case Instruction::UIToFP: case Instruction::URem: case Instruction::Xor: case Instruction::ZExt: return true; } return false; }; if (!IsVectorizableOpcode(I->getOpcode())) return false; if (CallInst *CI = dyn_cast(I)) { Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); if (ID && (ID == Intrinsic::assume || ID == Intrinsic::lifetime_end || ID == Intrinsic::lifetime_start || ID == Intrinsic::sideeffect)) return false; } auto willWiden = [&](unsigned VF) -> bool { if (!isa(I) && (CM.isScalarAfterVectorization(I, VF) || CM.isProfitableToScalarize(I, VF))) return false; if (CallInst *CI = dyn_cast(I)) { Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); // The following case may be scalarized depending on the VF. // The flag shows whether we use Intrinsic or a usual Call for vectorized // version of the instruction. // Is it beneficial to perform intrinsic call compared to lib call? bool NeedToScalarize; unsigned CallCost = getVectorCallCost(CI, VF, *TTI, TLI, NeedToScalarize); bool UseVectorIntrinsic = ID && getVectorIntrinsicCost(CI, VF, *TTI, TLI) <= CallCost; return UseVectorIntrinsic || !NeedToScalarize; } if (isa(I) || isa(I)) { assert(CM.getWideningDecision(I, VF) == LoopVectorizationCostModel::CM_Scalarize && "Memory widening decisions should have been taken care by now"); return false; } return true; }; if (!getDecisionAndClampRange(willWiden, Range)) return false; // Success: widen this instruction. We optimize the common case where // consecutive instructions can be represented by a single recipe. if (!VPBB->empty()) { VPWidenRecipe *LastWidenRecipe = dyn_cast(&VPBB->back()); if (LastWidenRecipe && LastWidenRecipe->appendInstruction(I)) return true; } VPBB->appendRecipe(new VPWidenRecipe(I)); return true; } VPBasicBlock *LoopVectorizationPlanner::handleReplication( Instruction *I, VFRange &Range, VPBasicBlock *VPBB, DenseMap &PredInst2Recipe, VPlanPtr &Plan) { bool IsUniform = getDecisionAndClampRange( [&](unsigned VF) { return CM.isUniformAfterVectorization(I, VF); }, Range); bool IsPredicated = Legal->isScalarWithPredication(I); auto *Recipe = new VPReplicateRecipe(I, IsUniform, IsPredicated); // Find if I uses a predicated instruction. If so, it will use its scalar // value. Avoid hoisting the insert-element which packs the scalar value into // a vector value, as that happens iff all users use the vector value. for (auto &Op : I->operands()) if (auto *PredInst = dyn_cast(Op)) if (PredInst2Recipe.find(PredInst) != PredInst2Recipe.end()) PredInst2Recipe[PredInst]->setAlsoPack(false); // Finalize the recipe for Instr, first if it is not predicated. if (!IsPredicated) { DEBUG(dbgs() << "LV: Scalarizing:" << *I << "\n"); VPBB->appendRecipe(Recipe); return VPBB; } DEBUG(dbgs() << "LV: Scalarizing and predicating:" << *I << "\n"); assert(VPBB->getSuccessors().empty() && "VPBB has successors when handling predicated replication."); // Record predicated instructions for above packing optimizations. PredInst2Recipe[I] = Recipe; VPBlockBase *Region = VPBB->setOneSuccessor(createReplicateRegion(I, Recipe, Plan)); return cast(Region->setOneSuccessor(new VPBasicBlock())); } VPRegionBlock * LoopVectorizationPlanner::createReplicateRegion(Instruction *Instr, VPRecipeBase *PredRecipe, VPlanPtr &Plan) { // Instructions marked for predication are replicated and placed under an // if-then construct to prevent side-effects. // Generate recipes to compute the block mask for this region. VPValue *BlockInMask = createBlockInMask(Instr->getParent(), Plan); // Build the triangular if-then region. std::string RegionName = (Twine("pred.") + Instr->getOpcodeName()).str(); assert(Instr->getParent() && "Predicated instruction not in any basic block"); auto *BOMRecipe = new VPBranchOnMaskRecipe(BlockInMask); auto *Entry = new VPBasicBlock(Twine(RegionName) + ".entry", BOMRecipe); auto *PHIRecipe = Instr->getType()->isVoidTy() ? nullptr : new VPPredInstPHIRecipe(Instr); auto *Exit = new VPBasicBlock(Twine(RegionName) + ".continue", PHIRecipe); auto *Pred = new VPBasicBlock(Twine(RegionName) + ".if", PredRecipe); VPRegionBlock *Region = new VPRegionBlock(Entry, Exit, RegionName, true); // Note: first set Entry as region entry and then connect successors starting // from it in order, to propagate the "parent" of each VPBasicBlock. Entry->setTwoSuccessors(Pred, Exit); Pred->setOneSuccessor(Exit); return Region; } LoopVectorizationPlanner::VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range, const SmallPtrSetImpl &NeedDef) { EdgeMaskCache.clear(); BlockMaskCache.clear(); DenseMap &SinkAfter = Legal->getSinkAfter(); DenseMap SinkAfterInverse; // Collect instructions from the original loop that will become trivially dead // in the vectorized loop. We don't need to vectorize these instructions. For // example, original induction update instructions can become dead because we // separately emit induction "steps" when generating code for the new loop. // Similarly, we create a new latch condition when setting up the structure // of the new loop, so the old one can become dead. SmallPtrSet DeadInstructions; collectTriviallyDeadInstructions(DeadInstructions); // Hold a mapping from predicated instructions to their recipes, in order to // fix their AlsoPack behavior if a user is determined to replicate and use a // scalar instead of vector value. DenseMap PredInst2Recipe; // Create a dummy pre-entry VPBasicBlock to start building the VPlan. VPBasicBlock *VPBB = new VPBasicBlock("Pre-Entry"); auto Plan = llvm::make_unique(VPBB); // Represent values that will have defs inside VPlan. for (Value *V : NeedDef) Plan->addVPValue(V); // Scan the body of the loop in a topological order to visit each basic block // after having visited its predecessor basic blocks. LoopBlocksDFS DFS(OrigLoop); DFS.perform(LI); for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) { // Relevant instructions from basic block BB will be grouped into VPRecipe // ingredients and fill a new VPBasicBlock. unsigned VPBBsForBB = 0; auto *FirstVPBBForBB = new VPBasicBlock(BB->getName()); VPBB->setOneSuccessor(FirstVPBBForBB); VPBB = FirstVPBBForBB; Builder.setInsertPoint(VPBB); std::vector Ingredients; // Organize the ingredients to vectorize from current basic block in the // right order. for (Instruction &I : *BB) { Instruction *Instr = &I; // First filter out irrelevant instructions, to ensure no recipes are // built for them. if (isa(Instr) || isa(Instr) || DeadInstructions.count(Instr)) continue; // I is a member of an InterleaveGroup for Range.Start. If it's an adjunct // member of the IG, do not construct any Recipe for it. const InterleaveGroup *IG = Legal->getInterleavedAccessGroup(Instr); if (IG && Instr != IG->getInsertPos() && Range.Start >= 2 && // Query is illegal for VF == 1 CM.getWideningDecision(Instr, Range.Start) == LoopVectorizationCostModel::CM_Interleave) { if (SinkAfterInverse.count(Instr)) Ingredients.push_back(SinkAfterInverse.find(Instr)->second); continue; } // Move instructions to handle first-order recurrences, step 1: avoid // handling this instruction until after we've handled the instruction it // should follow. auto SAIt = SinkAfter.find(Instr); if (SAIt != SinkAfter.end()) { DEBUG(dbgs() << "Sinking" << *SAIt->first << " after" << *SAIt->second << " to vectorize a 1st order recurrence.\n"); SinkAfterInverse[SAIt->second] = Instr; continue; } Ingredients.push_back(Instr); // Move instructions to handle first-order recurrences, step 2: push the // instruction to be sunk at its insertion point. auto SAInvIt = SinkAfterInverse.find(Instr); if (SAInvIt != SinkAfterInverse.end()) Ingredients.push_back(SAInvIt->second); } // Introduce each ingredient into VPlan. for (Instruction *Instr : Ingredients) { VPRecipeBase *Recipe = nullptr; // Check if Instr should belong to an interleave memory recipe, or already // does. In the latter case Instr is irrelevant. if ((Recipe = tryToInterleaveMemory(Instr, Range))) { VPBB->appendRecipe(Recipe); continue; } // Check if Instr is a memory operation that should be widened. if ((Recipe = tryToWidenMemory(Instr, Range, Plan))) { VPBB->appendRecipe(Recipe); continue; } // Check if Instr should form some PHI recipe. if ((Recipe = tryToOptimizeInduction(Instr, Range))) { VPBB->appendRecipe(Recipe); continue; } if ((Recipe = tryToBlend(Instr, Plan))) { VPBB->appendRecipe(Recipe); continue; } if (PHINode *Phi = dyn_cast(Instr)) { VPBB->appendRecipe(new VPWidenPHIRecipe(Phi)); continue; } // Check if Instr is to be widened by a general VPWidenRecipe, after // having first checked for specific widening recipes that deal with // Interleave Groups, Inductions and Phi nodes. if (tryToWiden(Instr, VPBB, Range)) continue; // Otherwise, if all widening options failed, Instruction is to be // replicated. This may create a successor for VPBB. VPBasicBlock *NextVPBB = handleReplication(Instr, Range, VPBB, PredInst2Recipe, Plan); if (NextVPBB != VPBB) { VPBB = NextVPBB; VPBB->setName(BB->hasName() ? BB->getName() + "." + Twine(VPBBsForBB++) : ""); } } } // Discard empty dummy pre-entry VPBasicBlock. Note that other VPBasicBlocks // may also be empty, such as the last one VPBB, reflecting original // basic-blocks with no recipes. VPBasicBlock *PreEntry = cast(Plan->getEntry()); assert(PreEntry->empty() && "Expecting empty pre-entry block."); VPBlockBase *Entry = Plan->setEntry(PreEntry->getSingleSuccessor()); PreEntry->disconnectSuccessor(Entry); delete PreEntry; std::string PlanName; raw_string_ostream RSO(PlanName); unsigned VF = Range.Start; Plan->addVF(VF); RSO << "Initial VPlan for VF={" << VF; for (VF *= 2; VF < Range.End; VF *= 2) { Plan->addVF(VF); RSO << "," << VF; } RSO << "},UF>=1"; RSO.flush(); Plan->setName(PlanName); return Plan; } void VPInterleaveRecipe::print(raw_ostream &O, const Twine &Indent) const { O << " +\n" << Indent << "\"INTERLEAVE-GROUP with factor " << IG->getFactor() << " at "; IG->getInsertPos()->printAsOperand(O, false); O << "\\l\""; for (unsigned i = 0; i < IG->getFactor(); ++i) if (Instruction *I = IG->getMember(i)) O << " +\n" << Indent << "\" " << VPlanIngredient(I) << " " << i << "\\l\""; } void VPWidenRecipe::execute(VPTransformState &State) { for (auto &Instr : make_range(Begin, End)) State.ILV->widenInstruction(Instr); } void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) { assert(!State.Instance && "Int or FP induction being replicated."); State.ILV->widenIntOrFpInduction(IV, Trunc); } void VPWidenPHIRecipe::execute(VPTransformState &State) { State.ILV->widenPHIInstruction(Phi, State.UF, State.VF); } void VPBlendRecipe::execute(VPTransformState &State) { State.ILV->setDebugLocFromInst(State.Builder, Phi); // We know that all PHIs in non-header blocks are converted into // selects, so we don't have to worry about the insertion order and we // can just use the builder. // At this point we generate the predication tree. There may be // duplications since this is a simple recursive scan, but future // optimizations will clean it up. unsigned NumIncoming = Phi->getNumIncomingValues(); assert((User || NumIncoming == 1) && "Multiple predecessors with predecessors having a full mask"); // Generate a sequence of selects of the form: // SELECT(Mask3, In3, // SELECT(Mask2, In2, // ( ...))) InnerLoopVectorizer::VectorParts Entry(State.UF); for (unsigned In = 0; In < NumIncoming; ++In) { for (unsigned Part = 0; Part < State.UF; ++Part) { // We might have single edge PHIs (blocks) - use an identity // 'select' for the first PHI operand. Value *In0 = State.ILV->getOrCreateVectorValue(Phi->getIncomingValue(In), Part); if (In == 0) Entry[Part] = In0; // Initialize with the first incoming value. else { // Select between the current value and the previous incoming edge // based on the incoming mask. Value *Cond = State.get(User->getOperand(In), Part); Entry[Part] = State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi"); } } } for (unsigned Part = 0; Part < State.UF; ++Part) State.ValueMap.setVectorValue(Phi, Part, Entry[Part]); } void VPInterleaveRecipe::execute(VPTransformState &State) { assert(!State.Instance && "Interleave group being replicated."); State.ILV->vectorizeInterleaveGroup(IG->getInsertPos()); } void VPReplicateRecipe::execute(VPTransformState &State) { if (State.Instance) { // Generate a single instance. State.ILV->scalarizeInstruction(Ingredient, *State.Instance, IsPredicated); // Insert scalar instance packing it into a vector. if (AlsoPack && State.VF > 1) { // If we're constructing lane 0, initialize to start from undef. if (State.Instance->Lane == 0) { Value *Undef = UndefValue::get(VectorType::get(Ingredient->getType(), State.VF)); State.ValueMap.setVectorValue(Ingredient, State.Instance->Part, Undef); } State.ILV->packScalarIntoVectorValue(Ingredient, *State.Instance); } return; } // Generate scalar instances for all VF lanes of all UF parts, unless the // instruction is uniform inwhich case generate only the first lane for each // of the UF parts. unsigned EndLane = IsUniform ? 1 : State.VF; for (unsigned Part = 0; Part < State.UF; ++Part) for (unsigned Lane = 0; Lane < EndLane; ++Lane) State.ILV->scalarizeInstruction(Ingredient, {Part, Lane}, IsPredicated); } void VPBranchOnMaskRecipe::execute(VPTransformState &State) { assert(State.Instance && "Branch on Mask works only on single instance."); unsigned Part = State.Instance->Part; unsigned Lane = State.Instance->Lane; Value *ConditionBit = nullptr; if (!User) // Block in mask is all-one. ConditionBit = State.Builder.getTrue(); else { VPValue *BlockInMask = User->getOperand(0); ConditionBit = State.get(BlockInMask, Part); if (ConditionBit->getType()->isVectorTy()) ConditionBit = State.Builder.CreateExtractElement( ConditionBit, State.Builder.getInt32(Lane)); } // Replace the temporary unreachable terminator with a new conditional branch, // whose two destinations will be set later when they are created. auto *CurrentTerminator = State.CFG.PrevBB->getTerminator(); assert(isa(CurrentTerminator) && "Expected to replace unreachable terminator with conditional branch."); auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit); CondBr->setSuccessor(0, nullptr); ReplaceInstWithInst(CurrentTerminator, CondBr); } void VPPredInstPHIRecipe::execute(VPTransformState &State) { assert(State.Instance && "Predicated instruction PHI works per instance."); Instruction *ScalarPredInst = cast( State.ValueMap.getScalarValue(PredInst, *State.Instance)); BasicBlock *PredicatedBB = ScalarPredInst->getParent(); BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor(); assert(PredicatingBB && "Predicated block has no single predecessor."); // By current pack/unpack logic we need to generate only a single phi node: if // a vector value for the predicated instruction exists at this point it means // the instruction has vector users only, and a phi for the vector value is // needed. In this case the recipe of the predicated instruction is marked to // also do that packing, thereby "hoisting" the insert-element sequence. // Otherwise, a phi node for the scalar value is needed. unsigned Part = State.Instance->Part; if (State.ValueMap.hasVectorValue(PredInst, Part)) { Value *VectorValue = State.ValueMap.getVectorValue(PredInst, Part); InsertElementInst *IEI = cast(VectorValue); PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2); VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector. VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element. State.ValueMap.resetVectorValue(PredInst, Part, VPhi); // Update cache. } else { Type *PredInstType = PredInst->getType(); PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2); Phi->addIncoming(UndefValue::get(ScalarPredInst->getType()), PredicatingBB); Phi->addIncoming(ScalarPredInst, PredicatedBB); State.ValueMap.resetScalarValue(PredInst, *State.Instance, Phi); } } void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { if (!User) return State.ILV->vectorizeMemoryInstruction(&Instr); // Last (and currently only) operand is a mask. InnerLoopVectorizer::VectorParts MaskValues(State.UF); VPValue *Mask = User->getOperand(User->getNumOperands() - 1); for (unsigned Part = 0; Part < State.UF; ++Part) MaskValues[Part] = State.get(Mask, Part); State.ILV->vectorizeMemoryInstruction(&Instr, &MaskValues); } bool LoopVectorizePass::processLoop(Loop *L) { assert(L->empty() && "Only process inner loops."); #ifndef NDEBUG const std::string DebugLocStr = getDebugLocString(L); #endif /* NDEBUG */ DEBUG(dbgs() << "\nLV: Checking a loop in \"" << L->getHeader()->getParent()->getName() << "\" from " << DebugLocStr << "\n"); LoopVectorizeHints Hints(L, DisableUnrolling, *ORE); DEBUG(dbgs() << "LV: Loop hints:" << " force=" << (Hints.getForce() == LoopVectorizeHints::FK_Disabled ? "disabled" : (Hints.getForce() == LoopVectorizeHints::FK_Enabled ? "enabled" : "?")) << " width=" << Hints.getWidth() << " unroll=" << Hints.getInterleave() << "\n"); // Function containing loop Function *F = L->getHeader()->getParent(); // Looking at the diagnostic output is the only way to determine if a loop // was vectorized (other than looking at the IR or machine code), so it // is important to generate an optimization remark for each loop. Most of // these messages are generated as OptimizationRemarkAnalysis. Remarks // generated as OptimizationRemark and OptimizationRemarkMissed are // less verbose reporting vectorized loops and unvectorized loops that may // benefit from vectorization, respectively. if (!Hints.allowVectorization(F, L, AlwaysVectorize)) { DEBUG(dbgs() << "LV: Loop hints prevent vectorization.\n"); return false; } PredicatedScalarEvolution PSE(*SE, *L); // Check if it is legal to vectorize the loop. LoopVectorizationRequirements Requirements(*ORE); LoopVectorizationLegality LVL(L, PSE, DT, TLI, AA, F, TTI, GetLAA, LI, ORE, - &Requirements, &Hints); + &Requirements, &Hints, DB, AC); if (!LVL.canVectorize()) { DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n"); emitMissedWarning(F, L, Hints, ORE); return false; } // Check the function attributes to find out if this function should be // optimized for size. bool OptForSize = Hints.getForce() != LoopVectorizeHints::FK_Enabled && F->optForSize(); // Check the loop for a trip count threshold: vectorize loops with a tiny trip // count by optimizing for size, to minimize overheads. unsigned ExpectedTC = SE->getSmallConstantMaxTripCount(L); bool HasExpectedTC = (ExpectedTC > 0); if (!HasExpectedTC && LoopVectorizeWithBlockFrequency) { auto EstimatedTC = getLoopEstimatedTripCount(L); if (EstimatedTC) { ExpectedTC = *EstimatedTC; HasExpectedTC = true; } } if (HasExpectedTC && ExpectedTC < TinyTripCountVectorThreshold) { DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " << "This loop is worth vectorizing only if no scalar " << "iteration overheads are incurred."); if (Hints.getForce() == LoopVectorizeHints::FK_Enabled) DEBUG(dbgs() << " But vectorizing was explicitly forced.\n"); else { DEBUG(dbgs() << "\n"); // Loops with a very small trip count are considered for vectorization // under OptForSize, thereby making sure the cost of their loop body is // dominant, free of runtime guards and scalar iteration overheads. OptForSize = true; } } // Check the function attributes to see if implicit floats are allowed. // FIXME: This check doesn't seem possibly correct -- what if the loop is // an integer loop and the vector instructions selected are purely integer // vector instructions? if (F->hasFnAttribute(Attribute::NoImplicitFloat)) { DEBUG(dbgs() << "LV: Can't vectorize when the NoImplicitFloat" "attribute is used.\n"); ORE->emit(createMissedAnalysis(Hints.vectorizeAnalysisPassName(), "NoImplicitFloat", L) << "loop not vectorized due to NoImplicitFloat attribute"); emitMissedWarning(F, L, Hints, ORE); return false; } // Check if the target supports potentially unsafe FP vectorization. // FIXME: Add a check for the type of safety issue (denormal, signaling) // for the target we're vectorizing for, to make sure none of the // additional fp-math flags can help. if (Hints.isPotentiallyUnsafe() && TTI->isFPVectorizationPotentiallyUnsafe()) { DEBUG(dbgs() << "LV: Potentially unsafe FP op prevents vectorization.\n"); ORE->emit( createMissedAnalysis(Hints.vectorizeAnalysisPassName(), "UnsafeFP", L) << "loop not vectorized due to unsafe FP support."); emitMissedWarning(F, L, Hints, ORE); return false; } // Use the cost model. LoopVectorizationCostModel CM(L, PSE, LI, &LVL, *TTI, TLI, DB, AC, ORE, F, &Hints); CM.collectValuesToIgnore(); // Use the planner for vectorization. LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM); // Get user vectorization factor. unsigned UserVF = Hints.getWidth(); // Plan how to best vectorize, return the best VF and its cost. LoopVectorizationCostModel::VectorizationFactor VF = LVP.plan(OptForSize, UserVF); // Select the interleave count. unsigned IC = CM.selectInterleaveCount(OptForSize, VF.Width, VF.Cost); // Get user interleave count. unsigned UserIC = Hints.getInterleave(); // Identify the diagnostic messages that should be produced. std::pair VecDiagMsg, IntDiagMsg; bool VectorizeLoop = true, InterleaveLoop = true; if (Requirements.doesNotMeet(F, L, Hints)) { DEBUG(dbgs() << "LV: Not vectorizing: loop did not meet vectorization " "requirements.\n"); emitMissedWarning(F, L, Hints, ORE); return false; } if (VF.Width == 1) { DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n"); VecDiagMsg = std::make_pair( "VectorizationNotBeneficial", "the cost-model indicates that vectorization is not beneficial"); VectorizeLoop = false; } if (IC == 1 && UserIC <= 1) { // Tell the user interleaving is not beneficial. DEBUG(dbgs() << "LV: Interleaving is not beneficial.\n"); IntDiagMsg = std::make_pair( "InterleavingNotBeneficial", "the cost-model indicates that interleaving is not beneficial"); InterleaveLoop = false; if (UserIC == 1) { IntDiagMsg.first = "InterleavingNotBeneficialAndDisabled"; IntDiagMsg.second += " and is explicitly disabled or interleave count is set to 1"; } } else if (IC > 1 && UserIC == 1) { // Tell the user interleaving is beneficial, but it explicitly disabled. DEBUG(dbgs() << "LV: Interleaving is beneficial but is explicitly disabled."); IntDiagMsg = std::make_pair( "InterleavingBeneficialButDisabled", "the cost-model indicates that interleaving is beneficial " "but is explicitly disabled or interleave count is set to 1"); InterleaveLoop = false; } // Override IC if user provided an interleave count. IC = UserIC > 0 ? UserIC : IC; // Emit diagnostic messages, if any. const char *VAPassName = Hints.vectorizeAnalysisPassName(); if (!VectorizeLoop && !InterleaveLoop) { // Do not vectorize or interleaving the loop. ORE->emit([&]() { return OptimizationRemarkMissed(VAPassName, VecDiagMsg.first, L->getStartLoc(), L->getHeader()) << VecDiagMsg.second; }); ORE->emit([&]() { return OptimizationRemarkMissed(LV_NAME, IntDiagMsg.first, L->getStartLoc(), L->getHeader()) << IntDiagMsg.second; }); return false; } else if (!VectorizeLoop && InterleaveLoop) { DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n'); ORE->emit([&]() { return OptimizationRemarkAnalysis(VAPassName, VecDiagMsg.first, L->getStartLoc(), L->getHeader()) << VecDiagMsg.second; }); } else if (VectorizeLoop && !InterleaveLoop) { DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in " << DebugLocStr << '\n'); ORE->emit([&]() { return OptimizationRemarkAnalysis(LV_NAME, IntDiagMsg.first, L->getStartLoc(), L->getHeader()) << IntDiagMsg.second; }); } else if (VectorizeLoop && InterleaveLoop) { DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in " << DebugLocStr << '\n'); DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n'); } LVP.setBestPlan(VF.Width, IC); using namespace ore; if (!VectorizeLoop) { assert(IC > 1 && "interleave count should not be 1 or 0"); // If we decided that it is not legal to vectorize the loop, then // interleave it. InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL, &CM); LVP.executePlan(Unroller, DT); ORE->emit([&]() { return OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(), L->getHeader()) << "interleaved loop (interleaved count: " << NV("InterleaveCount", IC) << ")"; }); } else { // If we decided that it is *legal* to vectorize the loop, then do it. InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, IC, &LVL, &CM); LVP.executePlan(LB, DT); ++LoopsVectorized; // Add metadata to disable runtime unrolling a scalar loop when there are // no runtime checks about strides and memory. A scalar loop that is // rarely used is not worth unrolling. if (!LB.areSafetyChecksAdded()) AddRuntimeUnrollDisableMetaData(L); // Report the vectorization decision. ORE->emit([&]() { return OptimizationRemark(LV_NAME, "Vectorized", L->getStartLoc(), L->getHeader()) << "vectorized loop (vectorization width: " << NV("VectorizationFactor", VF.Width) << ", interleaved count: " << NV("InterleaveCount", IC) << ")"; }); } // Mark the loop as already vectorized to avoid vectorizing again. Hints.setAlreadyVectorized(); DEBUG(verifyFunction(*L->getHeader()->getParent())); return true; } bool LoopVectorizePass::runImpl( Function &F, ScalarEvolution &SE_, LoopInfo &LI_, TargetTransformInfo &TTI_, DominatorTree &DT_, BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_, DemandedBits &DB_, AliasAnalysis &AA_, AssumptionCache &AC_, std::function &GetLAA_, OptimizationRemarkEmitter &ORE_) { SE = &SE_; LI = &LI_; TTI = &TTI_; DT = &DT_; BFI = &BFI_; TLI = TLI_; AA = &AA_; AC = &AC_; GetLAA = &GetLAA_; DB = &DB_; ORE = &ORE_; // Don't attempt if // 1. the target claims to have no vector registers, and // 2. interleaving won't help ILP. // // The second condition is necessary because, even if the target has no // vector registers, loop vectorization may still enable scalar // interleaving. if (!TTI->getNumberOfRegisters(true) && TTI->getMaxInterleaveFactor(1) < 2) return false; bool Changed = false; // The vectorizer requires loops to be in simplified form. // Since simplification may add new inner loops, it has to run before the // legality and profitability checks. This means running the loop vectorizer // will simplify all loops, regardless of whether anything end up being // vectorized. for (auto &L : *LI) Changed |= simplifyLoop(L, DT, LI, SE, AC, false /* PreserveLCSSA */); // Build up a worklist of inner-loops to vectorize. This is necessary as // the act of vectorizing or partially unrolling a loop creates new loops // and can invalidate iterators across the loops. SmallVector Worklist; for (Loop *L : *LI) addAcyclicInnerLoop(*L, Worklist); LoopsAnalyzed += Worklist.size(); // Now walk the identified inner loops. while (!Worklist.empty()) { Loop *L = Worklist.pop_back_val(); // For the inner loops we actually process, form LCSSA to simplify the // transform. Changed |= formLCSSARecursively(*L, *DT, LI, SE); Changed |= processLoop(L); } // Process each loop nest in the function. return Changed; } PreservedAnalyses LoopVectorizePass::run(Function &F, FunctionAnalysisManager &AM) { auto &SE = AM.getResult(F); auto &LI = AM.getResult(F); auto &TTI = AM.getResult(F); auto &DT = AM.getResult(F); auto &BFI = AM.getResult(F); auto &TLI = AM.getResult(F); auto &AA = AM.getResult(F); auto &AC = AM.getResult(F); auto &DB = AM.getResult(F); auto &ORE = AM.getResult(F); auto &LAM = AM.getResult(F).getManager(); std::function GetLAA = [&](Loop &L) -> const LoopAccessInfo & { LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, nullptr}; return LAM.getResult(L, AR); }; bool Changed = runImpl(F, SE, LI, TTI, DT, BFI, &TLI, DB, AA, AC, GetLAA, ORE); if (!Changed) return PreservedAnalyses::all(); PreservedAnalyses PA; PA.preserve(); PA.preserve(); PA.preserve(); PA.preserve(); return PA; } Index: head/contrib/llvm/tools/clang/lib/Basic/Targets/X86.cpp =================================================================== --- head/contrib/llvm/tools/clang/lib/Basic/Targets/X86.cpp (revision 329982) +++ head/contrib/llvm/tools/clang/lib/Basic/Targets/X86.cpp (revision 329983) @@ -1,1602 +1,1603 @@ //===--- X86.cpp - Implement X86 target feature support -------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file implements X86 TargetInfo objects. // //===----------------------------------------------------------------------===// #include "X86.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/TargetBuiltins.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" namespace clang { namespace targets { const Builtin::Info BuiltinInfoX86[] = { #define BUILTIN(ID, TYPE, ATTRS) \ {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, #define TARGET_HEADER_BUILTIN(ID, TYPE, ATTRS, HEADER, LANGS, FEATURE) \ {#ID, TYPE, ATTRS, HEADER, LANGS, FEATURE}, #include "clang/Basic/BuiltinsX86.def" #define BUILTIN(ID, TYPE, ATTRS) \ {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, #define TARGET_HEADER_BUILTIN(ID, TYPE, ATTRS, HEADER, LANGS, FEATURE) \ {#ID, TYPE, ATTRS, HEADER, LANGS, FEATURE}, #include "clang/Basic/BuiltinsX86_64.def" }; static const char *const GCCRegNames[] = { "ax", "dx", "cx", "bx", "si", "di", "bp", "sp", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)", "argp", "flags", "fpcr", "fpsr", "dirflag", "frame", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15", "xmm16", "xmm17", "xmm18", "xmm19", "xmm20", "xmm21", "xmm22", "xmm23", "xmm24", "xmm25", "xmm26", "xmm27", "xmm28", "xmm29", "xmm30", "xmm31", "ymm16", "ymm17", "ymm18", "ymm19", "ymm20", "ymm21", "ymm22", "ymm23", "ymm24", "ymm25", "ymm26", "ymm27", "ymm28", "ymm29", "ymm30", "ymm31", "zmm0", "zmm1", "zmm2", "zmm3", "zmm4", "zmm5", "zmm6", "zmm7", "zmm8", "zmm9", "zmm10", "zmm11", "zmm12", "zmm13", "zmm14", "zmm15", "zmm16", "zmm17", "zmm18", "zmm19", "zmm20", "zmm21", "zmm22", "zmm23", "zmm24", "zmm25", "zmm26", "zmm27", "zmm28", "zmm29", "zmm30", "zmm31", "k0", "k1", "k2", "k3", "k4", "k5", "k6", "k7", "cr0", "cr2", "cr3", "cr4", "cr8", "dr0", "dr1", "dr2", "dr3", "dr6", "dr7", "bnd0", "bnd1", "bnd2", "bnd3", }; const TargetInfo::AddlRegName AddlRegNames[] = { {{"al", "ah", "eax", "rax"}, 0}, {{"bl", "bh", "ebx", "rbx"}, 3}, {{"cl", "ch", "ecx", "rcx"}, 2}, {{"dl", "dh", "edx", "rdx"}, 1}, {{"esi", "rsi"}, 4}, {{"edi", "rdi"}, 5}, {{"esp", "rsp"}, 7}, {{"ebp", "rbp"}, 6}, {{"r8d", "r8w", "r8b"}, 38}, {{"r9d", "r9w", "r9b"}, 39}, {{"r10d", "r10w", "r10b"}, 40}, {{"r11d", "r11w", "r11b"}, 41}, {{"r12d", "r12w", "r12b"}, 42}, {{"r13d", "r13w", "r13b"}, 43}, {{"r14d", "r14w", "r14b"}, 44}, {{"r15d", "r15w", "r15b"}, 45}, }; } // namespace targets } // namespace clang using namespace clang; using namespace clang::targets; bool X86TargetInfo::setFPMath(StringRef Name) { if (Name == "387") { FPMath = FP_387; return true; } if (Name == "sse") { FPMath = FP_SSE; return true; } return false; } bool X86TargetInfo::initFeatureMap( llvm::StringMap &Features, DiagnosticsEngine &Diags, StringRef CPU, const std::vector &FeaturesVec) const { // FIXME: This *really* should not be here. // X86_64 always has SSE2. if (getTriple().getArch() == llvm::Triple::x86_64) setFeatureEnabledImpl(Features, "sse2", true); const CPUKind Kind = getCPUKind(CPU); // Enable X87 for all X86 processors but Lakemont. if (Kind != CK_Lakemont) setFeatureEnabledImpl(Features, "x87", true); switch (Kind) { case CK_Generic: case CK_i386: case CK_i486: case CK_i586: case CK_Pentium: case CK_PentiumPro: case CK_Lakemont: break; case CK_PentiumMMX: case CK_Pentium2: case CK_K6: case CK_WinChipC6: setFeatureEnabledImpl(Features, "mmx", true); break; case CK_Icelake: setFeatureEnabledImpl(Features, "vaes", true); setFeatureEnabledImpl(Features, "gfni", true); setFeatureEnabledImpl(Features, "vpclmulqdq", true); setFeatureEnabledImpl(Features, "avx512bitalg", true); setFeatureEnabledImpl(Features, "avx512vnni", true); setFeatureEnabledImpl(Features, "avx512vbmi2", true); setFeatureEnabledImpl(Features, "avx512vpopcntdq", true); LLVM_FALLTHROUGH; case CK_Cannonlake: setFeatureEnabledImpl(Features, "avx512ifma", true); setFeatureEnabledImpl(Features, "avx512vbmi", true); setFeatureEnabledImpl(Features, "sha", true); LLVM_FALLTHROUGH; case CK_SkylakeServer: setFeatureEnabledImpl(Features, "avx512f", true); setFeatureEnabledImpl(Features, "avx512cd", true); setFeatureEnabledImpl(Features, "avx512dq", true); setFeatureEnabledImpl(Features, "avx512bw", true); setFeatureEnabledImpl(Features, "avx512vl", true); setFeatureEnabledImpl(Features, "pku", true); - setFeatureEnabledImpl(Features, "clwb", true); + if (Kind != CK_Cannonlake) // CNL inherits all SKX features, except CLWB + setFeatureEnabledImpl(Features, "clwb", true); LLVM_FALLTHROUGH; case CK_SkylakeClient: setFeatureEnabledImpl(Features, "xsavec", true); setFeatureEnabledImpl(Features, "xsaves", true); setFeatureEnabledImpl(Features, "mpx", true); setFeatureEnabledImpl(Features, "sgx", true); setFeatureEnabledImpl(Features, "clflushopt", true); setFeatureEnabledImpl(Features, "rtm", true); LLVM_FALLTHROUGH; case CK_Broadwell: setFeatureEnabledImpl(Features, "rdseed", true); setFeatureEnabledImpl(Features, "adx", true); setFeatureEnabledImpl(Features, "prfchw", true); LLVM_FALLTHROUGH; case CK_Haswell: setFeatureEnabledImpl(Features, "avx2", true); setFeatureEnabledImpl(Features, "lzcnt", true); setFeatureEnabledImpl(Features, "bmi", true); setFeatureEnabledImpl(Features, "bmi2", true); setFeatureEnabledImpl(Features, "fma", true); setFeatureEnabledImpl(Features, "movbe", true); LLVM_FALLTHROUGH; case CK_IvyBridge: setFeatureEnabledImpl(Features, "rdrnd", true); setFeatureEnabledImpl(Features, "f16c", true); setFeatureEnabledImpl(Features, "fsgsbase", true); LLVM_FALLTHROUGH; case CK_SandyBridge: setFeatureEnabledImpl(Features, "avx", true); setFeatureEnabledImpl(Features, "xsave", true); setFeatureEnabledImpl(Features, "xsaveopt", true); LLVM_FALLTHROUGH; case CK_Westmere: setFeatureEnabledImpl(Features, "aes", true); setFeatureEnabledImpl(Features, "pclmul", true); LLVM_FALLTHROUGH; case CK_Nehalem: setFeatureEnabledImpl(Features, "sse4.2", true); LLVM_FALLTHROUGH; case CK_Penryn: setFeatureEnabledImpl(Features, "sse4.1", true); LLVM_FALLTHROUGH; case CK_Core2: setFeatureEnabledImpl(Features, "ssse3", true); LLVM_FALLTHROUGH; case CK_Yonah: case CK_Prescott: case CK_Nocona: setFeatureEnabledImpl(Features, "sse3", true); setFeatureEnabledImpl(Features, "cx16", true); LLVM_FALLTHROUGH; case CK_PentiumM: case CK_Pentium4: case CK_x86_64: setFeatureEnabledImpl(Features, "sse2", true); LLVM_FALLTHROUGH; case CK_Pentium3: case CK_C3_2: setFeatureEnabledImpl(Features, "sse", true); setFeatureEnabledImpl(Features, "fxsr", true); break; case CK_Goldmont: setFeatureEnabledImpl(Features, "sha", true); setFeatureEnabledImpl(Features, "rdrnd", true); setFeatureEnabledImpl(Features, "rdseed", true); setFeatureEnabledImpl(Features, "xsave", true); setFeatureEnabledImpl(Features, "xsaveopt", true); setFeatureEnabledImpl(Features, "xsavec", true); setFeatureEnabledImpl(Features, "xsaves", true); setFeatureEnabledImpl(Features, "clflushopt", true); setFeatureEnabledImpl(Features, "mpx", true); setFeatureEnabledImpl(Features, "fsgsbase", true); LLVM_FALLTHROUGH; case CK_Silvermont: setFeatureEnabledImpl(Features, "aes", true); setFeatureEnabledImpl(Features, "pclmul", true); setFeatureEnabledImpl(Features, "sse4.2", true); setFeatureEnabledImpl(Features, "prfchw", true); LLVM_FALLTHROUGH; case CK_Bonnell: setFeatureEnabledImpl(Features, "movbe", true); setFeatureEnabledImpl(Features, "ssse3", true); setFeatureEnabledImpl(Features, "fxsr", true); setFeatureEnabledImpl(Features, "cx16", true); break; case CK_KNM: // TODO: Add avx5124fmaps/avx5124vnniw. setFeatureEnabledImpl(Features, "avx512vpopcntdq", true); LLVM_FALLTHROUGH; case CK_KNL: setFeatureEnabledImpl(Features, "avx512f", true); setFeatureEnabledImpl(Features, "avx512cd", true); setFeatureEnabledImpl(Features, "avx512er", true); setFeatureEnabledImpl(Features, "avx512pf", true); setFeatureEnabledImpl(Features, "prfchw", true); setFeatureEnabledImpl(Features, "prefetchwt1", true); setFeatureEnabledImpl(Features, "fxsr", true); setFeatureEnabledImpl(Features, "rdseed", true); setFeatureEnabledImpl(Features, "adx", true); setFeatureEnabledImpl(Features, "lzcnt", true); setFeatureEnabledImpl(Features, "bmi", true); setFeatureEnabledImpl(Features, "bmi2", true); setFeatureEnabledImpl(Features, "rtm", true); setFeatureEnabledImpl(Features, "fma", true); setFeatureEnabledImpl(Features, "rdrnd", true); setFeatureEnabledImpl(Features, "f16c", true); setFeatureEnabledImpl(Features, "fsgsbase", true); setFeatureEnabledImpl(Features, "aes", true); setFeatureEnabledImpl(Features, "pclmul", true); setFeatureEnabledImpl(Features, "cx16", true); setFeatureEnabledImpl(Features, "xsaveopt", true); setFeatureEnabledImpl(Features, "xsave", true); setFeatureEnabledImpl(Features, "movbe", true); break; case CK_K6_2: case CK_K6_3: case CK_WinChip2: case CK_C3: setFeatureEnabledImpl(Features, "3dnow", true); break; case CK_AMDFAM10: setFeatureEnabledImpl(Features, "sse4a", true); setFeatureEnabledImpl(Features, "lzcnt", true); setFeatureEnabledImpl(Features, "popcnt", true); LLVM_FALLTHROUGH; case CK_K8SSE3: setFeatureEnabledImpl(Features, "sse3", true); LLVM_FALLTHROUGH; case CK_K8: setFeatureEnabledImpl(Features, "sse2", true); LLVM_FALLTHROUGH; case CK_AthlonXP: setFeatureEnabledImpl(Features, "sse", true); setFeatureEnabledImpl(Features, "fxsr", true); LLVM_FALLTHROUGH; case CK_Athlon: case CK_Geode: setFeatureEnabledImpl(Features, "3dnowa", true); break; case CK_BTVER2: setFeatureEnabledImpl(Features, "avx", true); setFeatureEnabledImpl(Features, "aes", true); setFeatureEnabledImpl(Features, "pclmul", true); setFeatureEnabledImpl(Features, "bmi", true); setFeatureEnabledImpl(Features, "f16c", true); setFeatureEnabledImpl(Features, "xsaveopt", true); setFeatureEnabledImpl(Features, "movbe", true); LLVM_FALLTHROUGH; case CK_BTVER1: setFeatureEnabledImpl(Features, "ssse3", true); setFeatureEnabledImpl(Features, "sse4a", true); setFeatureEnabledImpl(Features, "lzcnt", true); setFeatureEnabledImpl(Features, "popcnt", true); setFeatureEnabledImpl(Features, "prfchw", true); setFeatureEnabledImpl(Features, "cx16", true); setFeatureEnabledImpl(Features, "fxsr", true); break; case CK_ZNVER1: setFeatureEnabledImpl(Features, "adx", true); setFeatureEnabledImpl(Features, "aes", true); setFeatureEnabledImpl(Features, "avx2", true); setFeatureEnabledImpl(Features, "bmi", true); setFeatureEnabledImpl(Features, "bmi2", true); setFeatureEnabledImpl(Features, "clflushopt", true); setFeatureEnabledImpl(Features, "clzero", true); setFeatureEnabledImpl(Features, "cx16", true); setFeatureEnabledImpl(Features, "f16c", true); setFeatureEnabledImpl(Features, "fma", true); setFeatureEnabledImpl(Features, "fsgsbase", true); setFeatureEnabledImpl(Features, "fxsr", true); setFeatureEnabledImpl(Features, "lzcnt", true); setFeatureEnabledImpl(Features, "mwaitx", true); setFeatureEnabledImpl(Features, "movbe", true); setFeatureEnabledImpl(Features, "pclmul", true); setFeatureEnabledImpl(Features, "popcnt", true); setFeatureEnabledImpl(Features, "prfchw", true); setFeatureEnabledImpl(Features, "rdrnd", true); setFeatureEnabledImpl(Features, "rdseed", true); setFeatureEnabledImpl(Features, "sha", true); setFeatureEnabledImpl(Features, "sse4a", true); setFeatureEnabledImpl(Features, "xsave", true); setFeatureEnabledImpl(Features, "xsavec", true); setFeatureEnabledImpl(Features, "xsaveopt", true); setFeatureEnabledImpl(Features, "xsaves", true); break; case CK_BDVER4: setFeatureEnabledImpl(Features, "avx2", true); setFeatureEnabledImpl(Features, "bmi2", true); setFeatureEnabledImpl(Features, "mwaitx", true); LLVM_FALLTHROUGH; case CK_BDVER3: setFeatureEnabledImpl(Features, "fsgsbase", true); setFeatureEnabledImpl(Features, "xsaveopt", true); LLVM_FALLTHROUGH; case CK_BDVER2: setFeatureEnabledImpl(Features, "bmi", true); setFeatureEnabledImpl(Features, "fma", true); setFeatureEnabledImpl(Features, "f16c", true); setFeatureEnabledImpl(Features, "tbm", true); LLVM_FALLTHROUGH; case CK_BDVER1: // xop implies avx, sse4a and fma4. setFeatureEnabledImpl(Features, "xop", true); setFeatureEnabledImpl(Features, "lwp", true); setFeatureEnabledImpl(Features, "lzcnt", true); setFeatureEnabledImpl(Features, "aes", true); setFeatureEnabledImpl(Features, "pclmul", true); setFeatureEnabledImpl(Features, "prfchw", true); setFeatureEnabledImpl(Features, "cx16", true); setFeatureEnabledImpl(Features, "fxsr", true); setFeatureEnabledImpl(Features, "xsave", true); break; } if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec)) return false; // Can't do this earlier because we need to be able to explicitly enable // or disable these features and the things that they depend upon. // Enable popcnt if sse4.2 is enabled and popcnt is not explicitly disabled. auto I = Features.find("sse4.2"); if (I != Features.end() && I->getValue() && std::find(FeaturesVec.begin(), FeaturesVec.end(), "-popcnt") == FeaturesVec.end()) Features["popcnt"] = true; // Enable prfchw if 3DNow! is enabled and prfchw is not explicitly disabled. I = Features.find("3dnow"); if (I != Features.end() && I->getValue() && std::find(FeaturesVec.begin(), FeaturesVec.end(), "-prfchw") == FeaturesVec.end()) Features["prfchw"] = true; // Additionally, if SSE is enabled and mmx is not explicitly disabled, // then enable MMX. I = Features.find("sse"); if (I != Features.end() && I->getValue() && std::find(FeaturesVec.begin(), FeaturesVec.end(), "-mmx") == FeaturesVec.end()) Features["mmx"] = true; return true; } void X86TargetInfo::setSSELevel(llvm::StringMap &Features, X86SSEEnum Level, bool Enabled) { if (Enabled) { switch (Level) { case AVX512F: Features["avx512f"] = Features["fma"] = Features["f16c"] = true; LLVM_FALLTHROUGH; case AVX2: Features["avx2"] = true; LLVM_FALLTHROUGH; case AVX: Features["avx"] = true; Features["xsave"] = true; LLVM_FALLTHROUGH; case SSE42: Features["sse4.2"] = true; LLVM_FALLTHROUGH; case SSE41: Features["sse4.1"] = true; LLVM_FALLTHROUGH; case SSSE3: Features["ssse3"] = true; LLVM_FALLTHROUGH; case SSE3: Features["sse3"] = true; LLVM_FALLTHROUGH; case SSE2: Features["sse2"] = true; LLVM_FALLTHROUGH; case SSE1: Features["sse"] = true; LLVM_FALLTHROUGH; case NoSSE: break; } return; } switch (Level) { case NoSSE: case SSE1: Features["sse"] = false; LLVM_FALLTHROUGH; case SSE2: Features["sse2"] = Features["pclmul"] = Features["aes"] = Features["sha"] = Features["gfni"] = false; LLVM_FALLTHROUGH; case SSE3: Features["sse3"] = false; setXOPLevel(Features, NoXOP, false); LLVM_FALLTHROUGH; case SSSE3: Features["ssse3"] = false; LLVM_FALLTHROUGH; case SSE41: Features["sse4.1"] = false; LLVM_FALLTHROUGH; case SSE42: Features["sse4.2"] = false; LLVM_FALLTHROUGH; case AVX: Features["fma"] = Features["avx"] = Features["f16c"] = Features["xsave"] = Features["xsaveopt"] = Features["vaes"] = Features["vpclmulqdq"] = false; setXOPLevel(Features, FMA4, false); LLVM_FALLTHROUGH; case AVX2: Features["avx2"] = false; LLVM_FALLTHROUGH; case AVX512F: Features["avx512f"] = Features["avx512cd"] = Features["avx512er"] = Features["avx512pf"] = Features["avx512dq"] = Features["avx512bw"] = Features["avx512vl"] = Features["avx512vbmi"] = Features["avx512ifma"] = Features["avx512vpopcntdq"] = Features["avx512bitalg"] = Features["avx512vnni"] = Features["avx512vbmi2"] = false; break; } } void X86TargetInfo::setMMXLevel(llvm::StringMap &Features, MMX3DNowEnum Level, bool Enabled) { if (Enabled) { switch (Level) { case AMD3DNowAthlon: Features["3dnowa"] = true; LLVM_FALLTHROUGH; case AMD3DNow: Features["3dnow"] = true; LLVM_FALLTHROUGH; case MMX: Features["mmx"] = true; LLVM_FALLTHROUGH; case NoMMX3DNow: break; } return; } switch (Level) { case NoMMX3DNow: case MMX: Features["mmx"] = false; LLVM_FALLTHROUGH; case AMD3DNow: Features["3dnow"] = false; LLVM_FALLTHROUGH; case AMD3DNowAthlon: Features["3dnowa"] = false; break; } } void X86TargetInfo::setXOPLevel(llvm::StringMap &Features, XOPEnum Level, bool Enabled) { if (Enabled) { switch (Level) { case XOP: Features["xop"] = true; LLVM_FALLTHROUGH; case FMA4: Features["fma4"] = true; setSSELevel(Features, AVX, true); LLVM_FALLTHROUGH; case SSE4A: Features["sse4a"] = true; setSSELevel(Features, SSE3, true); LLVM_FALLTHROUGH; case NoXOP: break; } return; } switch (Level) { case NoXOP: case SSE4A: Features["sse4a"] = false; LLVM_FALLTHROUGH; case FMA4: Features["fma4"] = false; LLVM_FALLTHROUGH; case XOP: Features["xop"] = false; break; } } void X86TargetInfo::setFeatureEnabledImpl(llvm::StringMap &Features, StringRef Name, bool Enabled) { // This is a bit of a hack to deal with the sse4 target feature when used // as part of the target attribute. We handle sse4 correctly everywhere // else. See below for more information on how we handle the sse4 options. if (Name != "sse4") Features[Name] = Enabled; if (Name == "mmx") { setMMXLevel(Features, MMX, Enabled); } else if (Name == "sse") { setSSELevel(Features, SSE1, Enabled); } else if (Name == "sse2") { setSSELevel(Features, SSE2, Enabled); } else if (Name == "sse3") { setSSELevel(Features, SSE3, Enabled); } else if (Name == "ssse3") { setSSELevel(Features, SSSE3, Enabled); } else if (Name == "sse4.2") { setSSELevel(Features, SSE42, Enabled); } else if (Name == "sse4.1") { setSSELevel(Features, SSE41, Enabled); } else if (Name == "3dnow") { setMMXLevel(Features, AMD3DNow, Enabled); } else if (Name == "3dnowa") { setMMXLevel(Features, AMD3DNowAthlon, Enabled); } else if (Name == "aes") { if (Enabled) setSSELevel(Features, SSE2, Enabled); else Features["vaes"] = false; } else if (Name == "vaes") { if (Enabled) { setSSELevel(Features, AVX, Enabled); Features["aes"] = true; } } else if (Name == "pclmul") { if (Enabled) setSSELevel(Features, SSE2, Enabled); else Features["vpclmulqdq"] = false; } else if (Name == "vpclmulqdq") { if (Enabled) { setSSELevel(Features, AVX, Enabled); Features["pclmul"] = true; } } else if (Name == "gfni") { if (Enabled) setSSELevel(Features, SSE2, Enabled); } else if (Name == "avx") { setSSELevel(Features, AVX, Enabled); } else if (Name == "avx2") { setSSELevel(Features, AVX2, Enabled); } else if (Name == "avx512f") { setSSELevel(Features, AVX512F, Enabled); } else if (Name == "avx512cd" || Name == "avx512er" || Name == "avx512pf" || Name == "avx512dq" || Name == "avx512bw" || Name == "avx512vl" || Name == "avx512vbmi" || Name == "avx512ifma" || Name == "avx512vpopcntdq" || Name == "avx512bitalg" || Name == "avx512vnni" || Name == "avx512vbmi2") { if (Enabled) setSSELevel(Features, AVX512F, Enabled); // Enable BWI instruction if VBMI/VBMI2/BITALG is being enabled. if ((Name.startswith("avx512vbmi") || Name == "avx512bitalg") && Enabled) Features["avx512bw"] = true; // Also disable VBMI/VBMI2/BITALG if BWI is being disabled. if (Name == "avx512bw" && !Enabled) Features["avx512vbmi"] = Features["avx512vbmi2"] = Features["avx512bitalg"] = false; } else if (Name == "fma") { if (Enabled) setSSELevel(Features, AVX, Enabled); else setSSELevel(Features, AVX512F, Enabled); } else if (Name == "fma4") { setXOPLevel(Features, FMA4, Enabled); } else if (Name == "xop") { setXOPLevel(Features, XOP, Enabled); } else if (Name == "sse4a") { setXOPLevel(Features, SSE4A, Enabled); } else if (Name == "f16c") { if (Enabled) setSSELevel(Features, AVX, Enabled); else setSSELevel(Features, AVX512F, Enabled); } else if (Name == "sha") { if (Enabled) setSSELevel(Features, SSE2, Enabled); } else if (Name == "sse4") { // We can get here via the __target__ attribute since that's not controlled // via the -msse4/-mno-sse4 command line alias. Handle this the same way // here - turn on the sse4.2 if enabled, turn off the sse4.1 level if // disabled. if (Enabled) setSSELevel(Features, SSE42, Enabled); else setSSELevel(Features, SSE41, Enabled); } else if (Name == "xsave") { if (!Enabled) Features["xsaveopt"] = false; } else if (Name == "xsaveopt" || Name == "xsavec" || Name == "xsaves") { if (Enabled) Features["xsave"] = true; } } /// handleTargetFeatures - Perform initialization based on the user /// configured set of features. bool X86TargetInfo::handleTargetFeatures(std::vector &Features, DiagnosticsEngine &Diags) { for (const auto &Feature : Features) { if (Feature[0] != '+') continue; if (Feature == "+aes") { HasAES = true; } else if (Feature == "+vaes") { HasVAES = true; } else if (Feature == "+pclmul") { HasPCLMUL = true; } else if (Feature == "+vpclmulqdq") { HasVPCLMULQDQ = true; } else if (Feature == "+lzcnt") { HasLZCNT = true; } else if (Feature == "+rdrnd") { HasRDRND = true; } else if (Feature == "+fsgsbase") { HasFSGSBASE = true; } else if (Feature == "+bmi") { HasBMI = true; } else if (Feature == "+bmi2") { HasBMI2 = true; } else if (Feature == "+popcnt") { HasPOPCNT = true; } else if (Feature == "+rtm") { HasRTM = true; } else if (Feature == "+prfchw") { HasPRFCHW = true; } else if (Feature == "+rdseed") { HasRDSEED = true; } else if (Feature == "+adx") { HasADX = true; } else if (Feature == "+tbm") { HasTBM = true; } else if (Feature == "+lwp") { HasLWP = true; } else if (Feature == "+fma") { HasFMA = true; } else if (Feature == "+f16c") { HasF16C = true; } else if (Feature == "+gfni") { HasGFNI = true; } else if (Feature == "+avx512cd") { HasAVX512CD = true; } else if (Feature == "+avx512vpopcntdq") { HasAVX512VPOPCNTDQ = true; } else if (Feature == "+avx512vnni") { HasAVX512VNNI = true; } else if (Feature == "+avx512er") { HasAVX512ER = true; } else if (Feature == "+avx512pf") { HasAVX512PF = true; } else if (Feature == "+avx512dq") { HasAVX512DQ = true; } else if (Feature == "+avx512bitalg") { HasAVX512BITALG = true; } else if (Feature == "+avx512bw") { HasAVX512BW = true; } else if (Feature == "+avx512vl") { HasAVX512VL = true; } else if (Feature == "+avx512vbmi") { HasAVX512VBMI = true; } else if (Feature == "+avx512vbmi2") { HasAVX512VBMI2 = true; } else if (Feature == "+avx512ifma") { HasAVX512IFMA = true; } else if (Feature == "+sha") { HasSHA = true; } else if (Feature == "+mpx") { HasMPX = true; } else if (Feature == "+shstk") { HasSHSTK = true; } else if (Feature == "+ibt") { HasIBT = true; } else if (Feature == "+movbe") { HasMOVBE = true; } else if (Feature == "+sgx") { HasSGX = true; } else if (Feature == "+cx16") { HasCX16 = true; } else if (Feature == "+fxsr") { HasFXSR = true; } else if (Feature == "+xsave") { HasXSAVE = true; } else if (Feature == "+xsaveopt") { HasXSAVEOPT = true; } else if (Feature == "+xsavec") { HasXSAVEC = true; } else if (Feature == "+xsaves") { HasXSAVES = true; } else if (Feature == "+mwaitx") { HasMWAITX = true; } else if (Feature == "+pku") { HasPKU = true; } else if (Feature == "+clflushopt") { HasCLFLUSHOPT = true; } else if (Feature == "+clwb") { HasCLWB = true; } else if (Feature == "+prefetchwt1") { HasPREFETCHWT1 = true; } else if (Feature == "+clzero") { HasCLZERO = true; } else if (Feature == "+retpoline") { HasRetpoline = true; } else if (Feature == "+retpoline-external-thunk") { HasRetpolineExternalThunk = true; } X86SSEEnum Level = llvm::StringSwitch(Feature) .Case("+avx512f", AVX512F) .Case("+avx2", AVX2) .Case("+avx", AVX) .Case("+sse4.2", SSE42) .Case("+sse4.1", SSE41) .Case("+ssse3", SSSE3) .Case("+sse3", SSE3) .Case("+sse2", SSE2) .Case("+sse", SSE1) .Default(NoSSE); SSELevel = std::max(SSELevel, Level); MMX3DNowEnum ThreeDNowLevel = llvm::StringSwitch(Feature) .Case("+3dnowa", AMD3DNowAthlon) .Case("+3dnow", AMD3DNow) .Case("+mmx", MMX) .Default(NoMMX3DNow); MMX3DNowLevel = std::max(MMX3DNowLevel, ThreeDNowLevel); XOPEnum XLevel = llvm::StringSwitch(Feature) .Case("+xop", XOP) .Case("+fma4", FMA4) .Case("+sse4a", SSE4A) .Default(NoXOP); XOPLevel = std::max(XOPLevel, XLevel); } // LLVM doesn't have a separate switch for fpmath, so only accept it if it // matches the selected sse level. if ((FPMath == FP_SSE && SSELevel < SSE1) || (FPMath == FP_387 && SSELevel >= SSE1)) { Diags.Report(diag::err_target_unsupported_fpmath) << (FPMath == FP_SSE ? "sse" : "387"); return false; } SimdDefaultAlign = hasFeature("avx512f") ? 512 : hasFeature("avx") ? 256 : 128; return true; } /// X86TargetInfo::getTargetDefines - Return the set of the X86-specific macro /// definitions for this particular subtarget. void X86TargetInfo::getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const { // Target identification. if (getTriple().getArch() == llvm::Triple::x86_64) { Builder.defineMacro("__amd64__"); Builder.defineMacro("__amd64"); Builder.defineMacro("__x86_64"); Builder.defineMacro("__x86_64__"); if (getTriple().getArchName() == "x86_64h") { Builder.defineMacro("__x86_64h"); Builder.defineMacro("__x86_64h__"); } } else { DefineStd(Builder, "i386", Opts); } // Subtarget options. // FIXME: We are hard-coding the tune parameters based on the CPU, but they // truly should be based on -mtune options. switch (CPU) { case CK_Generic: break; case CK_i386: // The rest are coming from the i386 define above. Builder.defineMacro("__tune_i386__"); break; case CK_i486: case CK_WinChipC6: case CK_WinChip2: case CK_C3: defineCPUMacros(Builder, "i486"); break; case CK_PentiumMMX: Builder.defineMacro("__pentium_mmx__"); Builder.defineMacro("__tune_pentium_mmx__"); LLVM_FALLTHROUGH; case CK_i586: case CK_Pentium: defineCPUMacros(Builder, "i586"); defineCPUMacros(Builder, "pentium"); break; case CK_Pentium3: case CK_PentiumM: Builder.defineMacro("__tune_pentium3__"); LLVM_FALLTHROUGH; case CK_Pentium2: case CK_C3_2: Builder.defineMacro("__tune_pentium2__"); LLVM_FALLTHROUGH; case CK_PentiumPro: defineCPUMacros(Builder, "i686"); defineCPUMacros(Builder, "pentiumpro"); break; case CK_Pentium4: defineCPUMacros(Builder, "pentium4"); break; case CK_Yonah: case CK_Prescott: case CK_Nocona: defineCPUMacros(Builder, "nocona"); break; case CK_Core2: case CK_Penryn: defineCPUMacros(Builder, "core2"); break; case CK_Bonnell: defineCPUMacros(Builder, "atom"); break; case CK_Silvermont: defineCPUMacros(Builder, "slm"); break; case CK_Goldmont: defineCPUMacros(Builder, "goldmont"); break; case CK_Nehalem: case CK_Westmere: case CK_SandyBridge: case CK_IvyBridge: case CK_Haswell: case CK_Broadwell: case CK_SkylakeClient: case CK_SkylakeServer: case CK_Cannonlake: case CK_Icelake: // FIXME: Historically, we defined this legacy name, it would be nice to // remove it at some point. We've never exposed fine-grained names for // recent primary x86 CPUs, and we should keep it that way. defineCPUMacros(Builder, "corei7"); break; case CK_KNL: defineCPUMacros(Builder, "knl"); break; case CK_KNM: break; case CK_Lakemont: defineCPUMacros(Builder, "i586", /*Tuning*/false); defineCPUMacros(Builder, "pentium", /*Tuning*/false); Builder.defineMacro("__tune_lakemont__"); break; case CK_K6_2: Builder.defineMacro("__k6_2__"); Builder.defineMacro("__tune_k6_2__"); LLVM_FALLTHROUGH; case CK_K6_3: if (CPU != CK_K6_2) { // In case of fallthrough // FIXME: GCC may be enabling these in cases where some other k6 // architecture is specified but -m3dnow is explicitly provided. The // exact semantics need to be determined and emulated here. Builder.defineMacro("__k6_3__"); Builder.defineMacro("__tune_k6_3__"); } LLVM_FALLTHROUGH; case CK_K6: defineCPUMacros(Builder, "k6"); break; case CK_Athlon: case CK_AthlonXP: defineCPUMacros(Builder, "athlon"); if (SSELevel != NoSSE) { Builder.defineMacro("__athlon_sse__"); Builder.defineMacro("__tune_athlon_sse__"); } break; case CK_K8: case CK_K8SSE3: case CK_x86_64: defineCPUMacros(Builder, "k8"); break; case CK_AMDFAM10: defineCPUMacros(Builder, "amdfam10"); break; case CK_BTVER1: defineCPUMacros(Builder, "btver1"); break; case CK_BTVER2: defineCPUMacros(Builder, "btver2"); break; case CK_BDVER1: defineCPUMacros(Builder, "bdver1"); break; case CK_BDVER2: defineCPUMacros(Builder, "bdver2"); break; case CK_BDVER3: defineCPUMacros(Builder, "bdver3"); break; case CK_BDVER4: defineCPUMacros(Builder, "bdver4"); break; case CK_ZNVER1: defineCPUMacros(Builder, "znver1"); break; case CK_Geode: defineCPUMacros(Builder, "geode"); break; } // Target properties. Builder.defineMacro("__REGISTER_PREFIX__", ""); // Define __NO_MATH_INLINES on linux/x86 so that we don't get inline // functions in glibc header files that use FP Stack inline asm which the // backend can't deal with (PR879). Builder.defineMacro("__NO_MATH_INLINES"); if (HasAES) Builder.defineMacro("__AES__"); if (HasVAES) Builder.defineMacro("__VAES__"); if (HasPCLMUL) Builder.defineMacro("__PCLMUL__"); if (HasVPCLMULQDQ) Builder.defineMacro("__VPCLMULQDQ__"); if (HasLZCNT) Builder.defineMacro("__LZCNT__"); if (HasRDRND) Builder.defineMacro("__RDRND__"); if (HasFSGSBASE) Builder.defineMacro("__FSGSBASE__"); if (HasBMI) Builder.defineMacro("__BMI__"); if (HasBMI2) Builder.defineMacro("__BMI2__"); if (HasPOPCNT) Builder.defineMacro("__POPCNT__"); if (HasRTM) Builder.defineMacro("__RTM__"); if (HasPRFCHW) Builder.defineMacro("__PRFCHW__"); if (HasRDSEED) Builder.defineMacro("__RDSEED__"); if (HasADX) Builder.defineMacro("__ADX__"); if (HasTBM) Builder.defineMacro("__TBM__"); if (HasLWP) Builder.defineMacro("__LWP__"); if (HasMWAITX) Builder.defineMacro("__MWAITX__"); switch (XOPLevel) { case XOP: Builder.defineMacro("__XOP__"); LLVM_FALLTHROUGH; case FMA4: Builder.defineMacro("__FMA4__"); LLVM_FALLTHROUGH; case SSE4A: Builder.defineMacro("__SSE4A__"); LLVM_FALLTHROUGH; case NoXOP: break; } if (HasFMA) Builder.defineMacro("__FMA__"); if (HasF16C) Builder.defineMacro("__F16C__"); if (HasGFNI) Builder.defineMacro("__GFNI__"); if (HasAVX512CD) Builder.defineMacro("__AVX512CD__"); if (HasAVX512VPOPCNTDQ) Builder.defineMacro("__AVX512VPOPCNTDQ__"); if (HasAVX512VNNI) Builder.defineMacro("__AVX512VNNI__"); if (HasAVX512ER) Builder.defineMacro("__AVX512ER__"); if (HasAVX512PF) Builder.defineMacro("__AVX512PF__"); if (HasAVX512DQ) Builder.defineMacro("__AVX512DQ__"); if (HasAVX512BITALG) Builder.defineMacro("__AVX512BITALG__"); if (HasAVX512BW) Builder.defineMacro("__AVX512BW__"); if (HasAVX512VL) Builder.defineMacro("__AVX512VL__"); if (HasAVX512VBMI) Builder.defineMacro("__AVX512VBMI__"); if (HasAVX512VBMI2) Builder.defineMacro("__AVX512VBMI2__"); if (HasAVX512IFMA) Builder.defineMacro("__AVX512IFMA__"); if (HasSHA) Builder.defineMacro("__SHA__"); if (HasFXSR) Builder.defineMacro("__FXSR__"); if (HasXSAVE) Builder.defineMacro("__XSAVE__"); if (HasXSAVEOPT) Builder.defineMacro("__XSAVEOPT__"); if (HasXSAVEC) Builder.defineMacro("__XSAVEC__"); if (HasXSAVES) Builder.defineMacro("__XSAVES__"); if (HasPKU) Builder.defineMacro("__PKU__"); if (HasCX16) Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16"); if (HasCLFLUSHOPT) Builder.defineMacro("__CLFLUSHOPT__"); if (HasCLWB) Builder.defineMacro("__CLWB__"); if (HasMPX) Builder.defineMacro("__MPX__"); if (HasSHSTK) Builder.defineMacro("__SHSTK__"); if (HasSGX) Builder.defineMacro("__SGX__"); if (HasPREFETCHWT1) Builder.defineMacro("__PREFETCHWT1__"); if (HasCLZERO) Builder.defineMacro("__CLZERO__"); // Each case falls through to the previous one here. switch (SSELevel) { case AVX512F: Builder.defineMacro("__AVX512F__"); LLVM_FALLTHROUGH; case AVX2: Builder.defineMacro("__AVX2__"); LLVM_FALLTHROUGH; case AVX: Builder.defineMacro("__AVX__"); LLVM_FALLTHROUGH; case SSE42: Builder.defineMacro("__SSE4_2__"); LLVM_FALLTHROUGH; case SSE41: Builder.defineMacro("__SSE4_1__"); LLVM_FALLTHROUGH; case SSSE3: Builder.defineMacro("__SSSE3__"); LLVM_FALLTHROUGH; case SSE3: Builder.defineMacro("__SSE3__"); LLVM_FALLTHROUGH; case SSE2: Builder.defineMacro("__SSE2__"); Builder.defineMacro("__SSE2_MATH__"); // -mfp-math=sse always implied. LLVM_FALLTHROUGH; case SSE1: Builder.defineMacro("__SSE__"); Builder.defineMacro("__SSE_MATH__"); // -mfp-math=sse always implied. LLVM_FALLTHROUGH; case NoSSE: break; } if (Opts.MicrosoftExt && getTriple().getArch() == llvm::Triple::x86) { switch (SSELevel) { case AVX512F: case AVX2: case AVX: case SSE42: case SSE41: case SSSE3: case SSE3: case SSE2: Builder.defineMacro("_M_IX86_FP", Twine(2)); break; case SSE1: Builder.defineMacro("_M_IX86_FP", Twine(1)); break; default: Builder.defineMacro("_M_IX86_FP", Twine(0)); break; } } // Each case falls through to the previous one here. switch (MMX3DNowLevel) { case AMD3DNowAthlon: Builder.defineMacro("__3dNOW_A__"); LLVM_FALLTHROUGH; case AMD3DNow: Builder.defineMacro("__3dNOW__"); LLVM_FALLTHROUGH; case MMX: Builder.defineMacro("__MMX__"); LLVM_FALLTHROUGH; case NoMMX3DNow: break; } if (CPU >= CK_i486) { Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1"); Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2"); Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4"); } if (CPU >= CK_i586) Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8"); if (HasFloat128) Builder.defineMacro("__SIZEOF_FLOAT128__", "16"); } bool X86TargetInfo::isValidFeatureName(StringRef Name) const { return llvm::StringSwitch(Name) .Case("3dnow", true) .Case("3dnowa", true) .Case("adx", true) .Case("aes", true) .Case("avx", true) .Case("avx2", true) .Case("avx512f", true) .Case("avx512cd", true) .Case("avx512vpopcntdq", true) .Case("avx512vnni", true) .Case("avx512er", true) .Case("avx512pf", true) .Case("avx512dq", true) .Case("avx512bitalg", true) .Case("avx512bw", true) .Case("avx512vl", true) .Case("avx512vbmi", true) .Case("avx512vbmi2", true) .Case("avx512ifma", true) .Case("bmi", true) .Case("bmi2", true) .Case("clflushopt", true) .Case("clwb", true) .Case("clzero", true) .Case("cx16", true) .Case("f16c", true) .Case("fma", true) .Case("fma4", true) .Case("fsgsbase", true) .Case("fxsr", true) .Case("gfni", true) .Case("lwp", true) .Case("lzcnt", true) .Case("mmx", true) .Case("movbe", true) .Case("mpx", true) .Case("mwaitx", true) .Case("pclmul", true) .Case("pku", true) .Case("popcnt", true) .Case("prefetchwt1", true) .Case("prfchw", true) .Case("rdrnd", true) .Case("rdseed", true) .Case("rtm", true) .Case("sgx", true) .Case("sha", true) .Case("shstk", true) .Case("sse", true) .Case("sse2", true) .Case("sse3", true) .Case("ssse3", true) .Case("sse4", true) .Case("sse4.1", true) .Case("sse4.2", true) .Case("sse4a", true) .Case("tbm", true) .Case("vaes", true) .Case("vpclmulqdq", true) .Case("x87", true) .Case("xop", true) .Case("xsave", true) .Case("xsavec", true) .Case("xsaves", true) .Case("xsaveopt", true) .Default(false); } bool X86TargetInfo::hasFeature(StringRef Feature) const { return llvm::StringSwitch(Feature) .Case("adx", HasADX) .Case("aes", HasAES) .Case("avx", SSELevel >= AVX) .Case("avx2", SSELevel >= AVX2) .Case("avx512f", SSELevel >= AVX512F) .Case("avx512cd", HasAVX512CD) .Case("avx512vpopcntdq", HasAVX512VPOPCNTDQ) .Case("avx512vnni", HasAVX512VNNI) .Case("avx512er", HasAVX512ER) .Case("avx512pf", HasAVX512PF) .Case("avx512dq", HasAVX512DQ) .Case("avx512bitalg", HasAVX512BITALG) .Case("avx512bw", HasAVX512BW) .Case("avx512vl", HasAVX512VL) .Case("avx512vbmi", HasAVX512VBMI) .Case("avx512vbmi2", HasAVX512VBMI2) .Case("avx512ifma", HasAVX512IFMA) .Case("bmi", HasBMI) .Case("bmi2", HasBMI2) .Case("clflushopt", HasCLFLUSHOPT) .Case("clwb", HasCLWB) .Case("clzero", HasCLZERO) .Case("cx16", HasCX16) .Case("f16c", HasF16C) .Case("fma", HasFMA) .Case("fma4", XOPLevel >= FMA4) .Case("fsgsbase", HasFSGSBASE) .Case("fxsr", HasFXSR) .Case("gfni", HasGFNI) .Case("ibt", HasIBT) .Case("lwp", HasLWP) .Case("lzcnt", HasLZCNT) .Case("mm3dnow", MMX3DNowLevel >= AMD3DNow) .Case("mm3dnowa", MMX3DNowLevel >= AMD3DNowAthlon) .Case("mmx", MMX3DNowLevel >= MMX) .Case("movbe", HasMOVBE) .Case("mpx", HasMPX) .Case("mwaitx", HasMWAITX) .Case("pclmul", HasPCLMUL) .Case("pku", HasPKU) .Case("popcnt", HasPOPCNT) .Case("prefetchwt1", HasPREFETCHWT1) .Case("prfchw", HasPRFCHW) .Case("rdrnd", HasRDRND) .Case("rdseed", HasRDSEED) .Case("retpoline", HasRetpoline) .Case("retpoline-external-thunk", HasRetpolineExternalThunk) .Case("rtm", HasRTM) .Case("sgx", HasSGX) .Case("sha", HasSHA) .Case("shstk", HasSHSTK) .Case("sse", SSELevel >= SSE1) .Case("sse2", SSELevel >= SSE2) .Case("sse3", SSELevel >= SSE3) .Case("ssse3", SSELevel >= SSSE3) .Case("sse4.1", SSELevel >= SSE41) .Case("sse4.2", SSELevel >= SSE42) .Case("sse4a", XOPLevel >= SSE4A) .Case("tbm", HasTBM) .Case("vaes", HasVAES) .Case("vpclmulqdq", HasVPCLMULQDQ) .Case("x86", true) .Case("x86_32", getTriple().getArch() == llvm::Triple::x86) .Case("x86_64", getTriple().getArch() == llvm::Triple::x86_64) .Case("xop", XOPLevel >= XOP) .Case("xsave", HasXSAVE) .Case("xsavec", HasXSAVEC) .Case("xsaves", HasXSAVES) .Case("xsaveopt", HasXSAVEOPT) .Default(false); } // We can't use a generic validation scheme for the features accepted here // versus subtarget features accepted in the target attribute because the // bitfield structure that's initialized in the runtime only supports the // below currently rather than the full range of subtarget features. (See // X86TargetInfo::hasFeature for a somewhat comprehensive list). bool X86TargetInfo::validateCpuSupports(StringRef FeatureStr) const { return llvm::StringSwitch(FeatureStr) #define X86_FEATURE_COMPAT(VAL, ENUM, STR) .Case(STR, true) #include "llvm/Support/X86TargetParser.def" .Default(false); } // We can't use a generic validation scheme for the cpus accepted here // versus subtarget cpus accepted in the target attribute because the // variables intitialized by the runtime only support the below currently // rather than the full range of cpus. bool X86TargetInfo::validateCpuIs(StringRef FeatureStr) const { return llvm::StringSwitch(FeatureStr) #define X86_VENDOR(ENUM, STRING) .Case(STRING, true) #define X86_CPU_TYPE_COMPAT_WITH_ALIAS(ARCHNAME, ENUM, STR, ALIAS) \ .Cases(STR, ALIAS, true) #define X86_CPU_TYPE_COMPAT(ARCHNAME, ENUM, STR) .Case(STR, true) #define X86_CPU_SUBTYPE_COMPAT(ARCHNAME, ENUM, STR) .Case(STR, true) #include "llvm/Support/X86TargetParser.def" .Default(false); } bool X86TargetInfo::validateAsmConstraint( const char *&Name, TargetInfo::ConstraintInfo &Info) const { switch (*Name) { default: return false; // Constant constraints. case 'e': // 32-bit signed integer constant for use with sign-extending x86_64 // instructions. case 'Z': // 32-bit unsigned integer constant for use with zero-extending // x86_64 instructions. case 's': Info.setRequiresImmediate(); return true; case 'I': Info.setRequiresImmediate(0, 31); return true; case 'J': Info.setRequiresImmediate(0, 63); return true; case 'K': Info.setRequiresImmediate(-128, 127); return true; case 'L': Info.setRequiresImmediate({int(0xff), int(0xffff), int(0xffffffff)}); return true; case 'M': Info.setRequiresImmediate(0, 3); return true; case 'N': Info.setRequiresImmediate(0, 255); return true; case 'O': Info.setRequiresImmediate(0, 127); return true; // Register constraints. case 'Y': // 'Y' is the first character for several 2-character constraints. // Shift the pointer to the second character of the constraint. Name++; switch (*Name) { default: return false; case 'z': case '0': // First SSE register. case '2': case 't': // Any SSE register, when SSE2 is enabled. case 'i': // Any SSE register, when SSE2 and inter-unit moves enabled. case 'm': // Any MMX register, when inter-unit moves enabled. case 'k': // AVX512 arch mask registers: k1-k7. Info.setAllowsRegister(); return true; } case 'f': // Any x87 floating point stack register. // Constraint 'f' cannot be used for output operands. if (Info.ConstraintStr[0] == '=') return false; Info.setAllowsRegister(); return true; case 'a': // eax. case 'b': // ebx. case 'c': // ecx. case 'd': // edx. case 'S': // esi. case 'D': // edi. case 'A': // edx:eax. case 't': // Top of floating point stack. case 'u': // Second from top of floating point stack. case 'q': // Any register accessible as [r]l: a, b, c, and d. case 'y': // Any MMX register. case 'v': // Any {X,Y,Z}MM register (Arch & context dependent) case 'x': // Any SSE register. case 'k': // Any AVX512 mask register (same as Yk, additionaly allows k0 // for intermideate k reg operations). case 'Q': // Any register accessible as [r]h: a, b, c, and d. case 'R': // "Legacy" registers: ax, bx, cx, dx, di, si, sp, bp. case 'l': // "Index" registers: any general register that can be used as an // index in a base+index memory access. Info.setAllowsRegister(); return true; // Floating point constant constraints. case 'C': // SSE floating point constant. case 'G': // x87 floating point constant. return true; } } bool X86TargetInfo::validateOutputSize(StringRef Constraint, unsigned Size) const { // Strip off constraint modifiers. while (Constraint[0] == '=' || Constraint[0] == '+' || Constraint[0] == '&') Constraint = Constraint.substr(1); return validateOperandSize(Constraint, Size); } bool X86TargetInfo::validateInputSize(StringRef Constraint, unsigned Size) const { return validateOperandSize(Constraint, Size); } bool X86TargetInfo::validateOperandSize(StringRef Constraint, unsigned Size) const { switch (Constraint[0]) { default: break; case 'k': // Registers k0-k7 (AVX512) size limit is 64 bit. case 'y': return Size <= 64; case 'f': case 't': case 'u': return Size <= 128; case 'Y': // 'Y' is the first character for several 2-character constraints. switch (Constraint[1]) { default: return false; case 'm': // 'Ym' is synonymous with 'y'. case 'k': return Size <= 64; case 'z': case '0': // XMM0 if (SSELevel >= SSE1) return Size <= 128U; return false; case 'i': case 't': case '2': // 'Yi','Yt','Y2' are synonymous with 'x' when SSE2 is enabled. if (SSELevel < SSE2) return false; break; } case 'v': case 'x': if (SSELevel >= AVX512F) // 512-bit zmm registers can be used if target supports AVX512F. return Size <= 512U; else if (SSELevel >= AVX) // 256-bit ymm registers can be used if target supports AVX. return Size <= 256U; return Size <= 128U; } return true; } std::string X86TargetInfo::convertConstraint(const char *&Constraint) const { switch (*Constraint) { case 'a': return std::string("{ax}"); case 'b': return std::string("{bx}"); case 'c': return std::string("{cx}"); case 'd': return std::string("{dx}"); case 'S': return std::string("{si}"); case 'D': return std::string("{di}"); case 'p': // address return std::string("im"); case 't': // top of floating point stack. return std::string("{st}"); case 'u': // second from top of floating point stack. return std::string("{st(1)}"); // second from top of floating point stack. case 'Y': switch (Constraint[1]) { default: // Break from inner switch and fall through (copy single char), // continue parsing after copying the current constraint into // the return string. break; case 'k': case 'm': case 'i': case 't': case 'z': case '0': case '2': // "^" hints llvm that this is a 2 letter constraint. // "Constraint++" is used to promote the string iterator // to the next constraint. return std::string("^") + std::string(Constraint++, 2); } LLVM_FALLTHROUGH; default: return std::string(1, *Constraint); } } bool X86TargetInfo::checkCPUKind(CPUKind Kind) const { // Perform any per-CPU checks necessary to determine if this CPU is // acceptable. // FIXME: This results in terrible diagnostics. Clang just says the CPU is // invalid without explaining *why*. switch (Kind) { case CK_Generic: // No processor selected! return false; #define PROC(ENUM, STRING, IS64BIT) \ case CK_##ENUM: \ return IS64BIT || getTriple().getArch() == llvm::Triple::x86; #include "clang/Basic/X86Target.def" } llvm_unreachable("Unhandled CPU kind"); } X86TargetInfo::CPUKind X86TargetInfo::getCPUKind(StringRef CPU) const { return llvm::StringSwitch(CPU) #define PROC(ENUM, STRING, IS64BIT) .Case(STRING, CK_##ENUM) #define PROC_ALIAS(ENUM, ALIAS) .Case(ALIAS, CK_##ENUM) #include "clang/Basic/X86Target.def" .Default(CK_Generic); } ArrayRef X86TargetInfo::getGCCRegNames() const { return llvm::makeArrayRef(GCCRegNames); } ArrayRef X86TargetInfo::getGCCAddlRegNames() const { return llvm::makeArrayRef(AddlRegNames); } ArrayRef X86_32TargetInfo::getTargetBuiltins() const { return llvm::makeArrayRef(BuiltinInfoX86, clang::X86::LastX86CommonBuiltin - Builtin::FirstTSBuiltin + 1); } ArrayRef X86_64TargetInfo::getTargetBuiltins() const { return llvm::makeArrayRef(BuiltinInfoX86, X86::LastTSBuiltin - Builtin::FirstTSBuiltin); } Index: head/contrib/llvm/tools/clang/lib/Sema/SemaExpr.cpp =================================================================== --- head/contrib/llvm/tools/clang/lib/Sema/SemaExpr.cpp (revision 329982) +++ head/contrib/llvm/tools/clang/lib/Sema/SemaExpr.cpp (revision 329983) @@ -1,16099 +1,16100 @@ //===--- SemaExpr.cpp - Semantic Analysis for Expressions -----------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file implements semantic analysis for expressions. // //===----------------------------------------------------------------------===// #include "TreeTransform.h" #include "clang/AST/ASTConsumer.h" #include "clang/AST/ASTContext.h" #include "clang/AST/ASTLambda.h" #include "clang/AST/ASTMutationListener.h" #include "clang/AST/CXXInheritance.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/EvaluatedExprVisitor.h" #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/ExprObjC.h" #include "clang/AST/ExprOpenMP.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/TypeLoc.h" #include "clang/Basic/PartialDiagnostic.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" #include "clang/Lex/LiteralSupport.h" #include "clang/Lex/Preprocessor.h" #include "clang/Sema/AnalysisBasedWarnings.h" #include "clang/Sema/DeclSpec.h" #include "clang/Sema/DelayedDiagnostic.h" #include "clang/Sema/Designator.h" #include "clang/Sema/Initialization.h" #include "clang/Sema/Lookup.h" #include "clang/Sema/ParsedTemplate.h" #include "clang/Sema/Scope.h" #include "clang/Sema/ScopeInfo.h" #include "clang/Sema/SemaFixItUtils.h" #include "clang/Sema/SemaInternal.h" #include "clang/Sema/Template.h" #include "llvm/Support/ConvertUTF.h" using namespace clang; using namespace sema; /// \brief Determine whether the use of this declaration is valid, without /// emitting diagnostics. bool Sema::CanUseDecl(NamedDecl *D, bool TreatUnavailableAsInvalid) { // See if this is an auto-typed variable whose initializer we are parsing. if (ParsingInitForAutoVars.count(D)) return false; // See if this is a deleted function. if (FunctionDecl *FD = dyn_cast(D)) { if (FD->isDeleted()) return false; // If the function has a deduced return type, and we can't deduce it, // then we can't use it either. if (getLangOpts().CPlusPlus14 && FD->getReturnType()->isUndeducedType() && DeduceReturnType(FD, SourceLocation(), /*Diagnose*/ false)) return false; } // See if this function is unavailable. if (TreatUnavailableAsInvalid && D->getAvailability() == AR_Unavailable && cast(CurContext)->getAvailability() != AR_Unavailable) return false; return true; } static void DiagnoseUnusedOfDecl(Sema &S, NamedDecl *D, SourceLocation Loc) { // Warn if this is used but marked unused. if (const auto *A = D->getAttr()) { // [[maybe_unused]] should not diagnose uses, but __attribute__((unused)) // should diagnose them. if (A->getSemanticSpelling() != UnusedAttr::CXX11_maybe_unused && A->getSemanticSpelling() != UnusedAttr::C2x_maybe_unused) { const Decl *DC = cast_or_null(S.getCurObjCLexicalContext()); if (DC && !DC->hasAttr()) S.Diag(Loc, diag::warn_used_but_marked_unused) << D->getDeclName(); } } } /// \brief Emit a note explaining that this function is deleted. void Sema::NoteDeletedFunction(FunctionDecl *Decl) { assert(Decl->isDeleted()); CXXMethodDecl *Method = dyn_cast(Decl); if (Method && Method->isDeleted() && Method->isDefaulted()) { // If the method was explicitly defaulted, point at that declaration. if (!Method->isImplicit()) Diag(Decl->getLocation(), diag::note_implicitly_deleted); // Try to diagnose why this special member function was implicitly // deleted. This might fail, if that reason no longer applies. CXXSpecialMember CSM = getSpecialMember(Method); if (CSM != CXXInvalid) ShouldDeleteSpecialMember(Method, CSM, nullptr, /*Diagnose=*/true); return; } auto *Ctor = dyn_cast(Decl); if (Ctor && Ctor->isInheritingConstructor()) return NoteDeletedInheritingConstructor(Ctor); Diag(Decl->getLocation(), diag::note_availability_specified_here) << Decl << true; } /// \brief Determine whether a FunctionDecl was ever declared with an /// explicit storage class. static bool hasAnyExplicitStorageClass(const FunctionDecl *D) { for (auto I : D->redecls()) { if (I->getStorageClass() != SC_None) return true; } return false; } /// \brief Check whether we're in an extern inline function and referring to a /// variable or function with internal linkage (C11 6.7.4p3). /// /// This is only a warning because we used to silently accept this code, but /// in many cases it will not behave correctly. This is not enabled in C++ mode /// because the restriction language is a bit weaker (C++11 [basic.def.odr]p6) /// and so while there may still be user mistakes, most of the time we can't /// prove that there are errors. static void diagnoseUseOfInternalDeclInInlineFunction(Sema &S, const NamedDecl *D, SourceLocation Loc) { // This is disabled under C++; there are too many ways for this to fire in // contexts where the warning is a false positive, or where it is technically // correct but benign. if (S.getLangOpts().CPlusPlus) return; // Check if this is an inlined function or method. FunctionDecl *Current = S.getCurFunctionDecl(); if (!Current) return; if (!Current->isInlined()) return; if (!Current->isExternallyVisible()) return; // Check if the decl has internal linkage. if (D->getFormalLinkage() != InternalLinkage) return; // Downgrade from ExtWarn to Extension if // (1) the supposedly external inline function is in the main file, // and probably won't be included anywhere else. // (2) the thing we're referencing is a pure function. // (3) the thing we're referencing is another inline function. // This last can give us false negatives, but it's better than warning on // wrappers for simple C library functions. const FunctionDecl *UsedFn = dyn_cast(D); bool DowngradeWarning = S.getSourceManager().isInMainFile(Loc); if (!DowngradeWarning && UsedFn) DowngradeWarning = UsedFn->isInlined() || UsedFn->hasAttr(); S.Diag(Loc, DowngradeWarning ? diag::ext_internal_in_extern_inline_quiet : diag::ext_internal_in_extern_inline) << /*IsVar=*/!UsedFn << D; S.MaybeSuggestAddingStaticToDecl(Current); S.Diag(D->getCanonicalDecl()->getLocation(), diag::note_entity_declared_at) << D; } void Sema::MaybeSuggestAddingStaticToDecl(const FunctionDecl *Cur) { const FunctionDecl *First = Cur->getFirstDecl(); // Suggest "static" on the function, if possible. if (!hasAnyExplicitStorageClass(First)) { SourceLocation DeclBegin = First->getSourceRange().getBegin(); Diag(DeclBegin, diag::note_convert_inline_to_static) << Cur << FixItHint::CreateInsertion(DeclBegin, "static "); } } /// \brief Determine whether the use of this declaration is valid, and /// emit any corresponding diagnostics. /// /// This routine diagnoses various problems with referencing /// declarations that can occur when using a declaration. For example, /// it might warn if a deprecated or unavailable declaration is being /// used, or produce an error (and return true) if a C++0x deleted /// function is being used. /// /// \returns true if there was an error (this declaration cannot be /// referenced), false otherwise. /// bool Sema::DiagnoseUseOfDecl(NamedDecl *D, SourceLocation Loc, const ObjCInterfaceDecl *UnknownObjCClass, bool ObjCPropertyAccess, bool AvoidPartialAvailabilityChecks) { if (getLangOpts().CPlusPlus && isa(D)) { // If there were any diagnostics suppressed by template argument deduction, // emit them now. auto Pos = SuppressedDiagnostics.find(D->getCanonicalDecl()); if (Pos != SuppressedDiagnostics.end()) { for (const PartialDiagnosticAt &Suppressed : Pos->second) Diag(Suppressed.first, Suppressed.second); // Clear out the list of suppressed diagnostics, so that we don't emit // them again for this specialization. However, we don't obsolete this // entry from the table, because we want to avoid ever emitting these // diagnostics again. Pos->second.clear(); } // C++ [basic.start.main]p3: // The function 'main' shall not be used within a program. if (cast(D)->isMain()) Diag(Loc, diag::ext_main_used); } // See if this is an auto-typed variable whose initializer we are parsing. if (ParsingInitForAutoVars.count(D)) { if (isa(D)) { Diag(Loc, diag::err_binding_cannot_appear_in_own_initializer) << D->getDeclName(); } else { Diag(Loc, diag::err_auto_variable_cannot_appear_in_own_initializer) << D->getDeclName() << cast(D)->getType(); } return true; } // See if this is a deleted function. if (FunctionDecl *FD = dyn_cast(D)) { if (FD->isDeleted()) { auto *Ctor = dyn_cast(FD); if (Ctor && Ctor->isInheritingConstructor()) Diag(Loc, diag::err_deleted_inherited_ctor_use) << Ctor->getParent() << Ctor->getInheritedConstructor().getConstructor()->getParent(); else Diag(Loc, diag::err_deleted_function_use); NoteDeletedFunction(FD); return true; } // If the function has a deduced return type, and we can't deduce it, // then we can't use it either. if (getLangOpts().CPlusPlus14 && FD->getReturnType()->isUndeducedType() && DeduceReturnType(FD, Loc)) return true; if (getLangOpts().CUDA && !CheckCUDACall(Loc, FD)) return true; } auto getReferencedObjCProp = [](const NamedDecl *D) -> const ObjCPropertyDecl * { if (const auto *MD = dyn_cast(D)) return MD->findPropertyDecl(); return nullptr; }; if (const ObjCPropertyDecl *ObjCPDecl = getReferencedObjCProp(D)) { if (diagnoseArgIndependentDiagnoseIfAttrs(ObjCPDecl, Loc)) return true; } else if (diagnoseArgIndependentDiagnoseIfAttrs(D, Loc)) { return true; } // [OpenMP 4.0], 2.15 declare reduction Directive, Restrictions // Only the variables omp_in and omp_out are allowed in the combiner. // Only the variables omp_priv and omp_orig are allowed in the // initializer-clause. auto *DRD = dyn_cast(CurContext); if (LangOpts.OpenMP && DRD && !CurContext->containsDecl(D) && isa(D)) { Diag(Loc, diag::err_omp_wrong_var_in_declare_reduction) << getCurFunction()->HasOMPDeclareReductionCombiner; Diag(D->getLocation(), diag::note_entity_declared_at) << D; return true; } DiagnoseAvailabilityOfDecl(D, Loc, UnknownObjCClass, ObjCPropertyAccess, AvoidPartialAvailabilityChecks); DiagnoseUnusedOfDecl(*this, D, Loc); diagnoseUseOfInternalDeclInInlineFunction(*this, D, Loc); return false; } /// \brief Retrieve the message suffix that should be added to a /// diagnostic complaining about the given function being deleted or /// unavailable. std::string Sema::getDeletedOrUnavailableSuffix(const FunctionDecl *FD) { std::string Message; if (FD->getAvailability(&Message)) return ": " + Message; return std::string(); } /// DiagnoseSentinelCalls - This routine checks whether a call or /// message-send is to a declaration with the sentinel attribute, and /// if so, it checks that the requirements of the sentinel are /// satisfied. void Sema::DiagnoseSentinelCalls(NamedDecl *D, SourceLocation Loc, ArrayRef Args) { const SentinelAttr *attr = D->getAttr(); if (!attr) return; // The number of formal parameters of the declaration. unsigned numFormalParams; // The kind of declaration. This is also an index into a %select in // the diagnostic. enum CalleeType { CT_Function, CT_Method, CT_Block } calleeType; if (ObjCMethodDecl *MD = dyn_cast(D)) { numFormalParams = MD->param_size(); calleeType = CT_Method; } else if (FunctionDecl *FD = dyn_cast(D)) { numFormalParams = FD->param_size(); calleeType = CT_Function; } else if (isa(D)) { QualType type = cast(D)->getType(); const FunctionType *fn = nullptr; if (const PointerType *ptr = type->getAs()) { fn = ptr->getPointeeType()->getAs(); if (!fn) return; calleeType = CT_Function; } else if (const BlockPointerType *ptr = type->getAs()) { fn = ptr->getPointeeType()->castAs(); calleeType = CT_Block; } else { return; } if (const FunctionProtoType *proto = dyn_cast(fn)) { numFormalParams = proto->getNumParams(); } else { numFormalParams = 0; } } else { return; } // "nullPos" is the number of formal parameters at the end which // effectively count as part of the variadic arguments. This is // useful if you would prefer to not have *any* formal parameters, // but the language forces you to have at least one. unsigned nullPos = attr->getNullPos(); assert((nullPos == 0 || nullPos == 1) && "invalid null position on sentinel"); numFormalParams = (nullPos > numFormalParams ? 0 : numFormalParams - nullPos); // The number of arguments which should follow the sentinel. unsigned numArgsAfterSentinel = attr->getSentinel(); // If there aren't enough arguments for all the formal parameters, // the sentinel, and the args after the sentinel, complain. if (Args.size() < numFormalParams + numArgsAfterSentinel + 1) { Diag(Loc, diag::warn_not_enough_argument) << D->getDeclName(); Diag(D->getLocation(), diag::note_sentinel_here) << int(calleeType); return; } // Otherwise, find the sentinel expression. Expr *sentinelExpr = Args[Args.size() - numArgsAfterSentinel - 1]; if (!sentinelExpr) return; if (sentinelExpr->isValueDependent()) return; if (Context.isSentinelNullExpr(sentinelExpr)) return; // Pick a reasonable string to insert. Optimistically use 'nil', 'nullptr', // or 'NULL' if those are actually defined in the context. Only use // 'nil' for ObjC methods, where it's much more likely that the // variadic arguments form a list of object pointers. SourceLocation MissingNilLoc = getLocForEndOfToken(sentinelExpr->getLocEnd()); std::string NullValue; if (calleeType == CT_Method && PP.isMacroDefined("nil")) NullValue = "nil"; else if (getLangOpts().CPlusPlus11) NullValue = "nullptr"; else if (PP.isMacroDefined("NULL")) NullValue = "NULL"; else NullValue = "(void*) 0"; if (MissingNilLoc.isInvalid()) Diag(Loc, diag::warn_missing_sentinel) << int(calleeType); else Diag(MissingNilLoc, diag::warn_missing_sentinel) << int(calleeType) << FixItHint::CreateInsertion(MissingNilLoc, ", " + NullValue); Diag(D->getLocation(), diag::note_sentinel_here) << int(calleeType); } SourceRange Sema::getExprRange(Expr *E) const { return E ? E->getSourceRange() : SourceRange(); } //===----------------------------------------------------------------------===// // Standard Promotions and Conversions //===----------------------------------------------------------------------===// /// DefaultFunctionArrayConversion (C99 6.3.2.1p3, C99 6.3.2.1p4). ExprResult Sema::DefaultFunctionArrayConversion(Expr *E, bool Diagnose) { // Handle any placeholder expressions which made it here. if (E->getType()->isPlaceholderType()) { ExprResult result = CheckPlaceholderExpr(E); if (result.isInvalid()) return ExprError(); E = result.get(); } QualType Ty = E->getType(); assert(!Ty.isNull() && "DefaultFunctionArrayConversion - missing type"); if (Ty->isFunctionType()) { if (auto *DRE = dyn_cast(E->IgnoreParenCasts())) if (auto *FD = dyn_cast(DRE->getDecl())) if (!checkAddressOfFunctionIsAvailable(FD, Diagnose, E->getExprLoc())) return ExprError(); E = ImpCastExprToType(E, Context.getPointerType(Ty), CK_FunctionToPointerDecay).get(); } else if (Ty->isArrayType()) { // In C90 mode, arrays only promote to pointers if the array expression is // an lvalue. The relevant legalese is C90 6.2.2.1p3: "an lvalue that has // type 'array of type' is converted to an expression that has type 'pointer // to type'...". In C99 this was changed to: C99 6.3.2.1p3: "an expression // that has type 'array of type' ...". The relevant change is "an lvalue" // (C90) to "an expression" (C99). // // C++ 4.2p1: // An lvalue or rvalue of type "array of N T" or "array of unknown bound of // T" can be converted to an rvalue of type "pointer to T". // if (getLangOpts().C99 || getLangOpts().CPlusPlus || E->isLValue()) E = ImpCastExprToType(E, Context.getArrayDecayedType(Ty), CK_ArrayToPointerDecay).get(); } return E; } static void CheckForNullPointerDereference(Sema &S, Expr *E) { // Check to see if we are dereferencing a null pointer. If so, // and if not volatile-qualified, this is undefined behavior that the // optimizer will delete, so warn about it. People sometimes try to use this // to get a deterministic trap and are surprised by clang's behavior. This // only handles the pattern "*null", which is a very syntactic check. if (UnaryOperator *UO = dyn_cast(E->IgnoreParenCasts())) if (UO->getOpcode() == UO_Deref && UO->getSubExpr()->IgnoreParenCasts()-> isNullPointerConstant(S.Context, Expr::NPC_ValueDependentIsNotNull) && !UO->getType().isVolatileQualified()) { S.DiagRuntimeBehavior(UO->getOperatorLoc(), UO, S.PDiag(diag::warn_indirection_through_null) << UO->getSubExpr()->getSourceRange()); S.DiagRuntimeBehavior(UO->getOperatorLoc(), UO, S.PDiag(diag::note_indirection_through_null)); } } static void DiagnoseDirectIsaAccess(Sema &S, const ObjCIvarRefExpr *OIRE, SourceLocation AssignLoc, const Expr* RHS) { const ObjCIvarDecl *IV = OIRE->getDecl(); if (!IV) return; DeclarationName MemberName = IV->getDeclName(); IdentifierInfo *Member = MemberName.getAsIdentifierInfo(); if (!Member || !Member->isStr("isa")) return; const Expr *Base = OIRE->getBase(); QualType BaseType = Base->getType(); if (OIRE->isArrow()) BaseType = BaseType->getPointeeType(); if (const ObjCObjectType *OTy = BaseType->getAs()) if (ObjCInterfaceDecl *IDecl = OTy->getInterface()) { ObjCInterfaceDecl *ClassDeclared = nullptr; ObjCIvarDecl *IV = IDecl->lookupInstanceVariable(Member, ClassDeclared); if (!ClassDeclared->getSuperClass() && (*ClassDeclared->ivar_begin()) == IV) { if (RHS) { NamedDecl *ObjectSetClass = S.LookupSingleName(S.TUScope, &S.Context.Idents.get("object_setClass"), SourceLocation(), S.LookupOrdinaryName); if (ObjectSetClass) { SourceLocation RHSLocEnd = S.getLocForEndOfToken(RHS->getLocEnd()); S.Diag(OIRE->getExprLoc(), diag::warn_objc_isa_assign) << FixItHint::CreateInsertion(OIRE->getLocStart(), "object_setClass(") << FixItHint::CreateReplacement(SourceRange(OIRE->getOpLoc(), AssignLoc), ",") << FixItHint::CreateInsertion(RHSLocEnd, ")"); } else S.Diag(OIRE->getLocation(), diag::warn_objc_isa_assign); } else { NamedDecl *ObjectGetClass = S.LookupSingleName(S.TUScope, &S.Context.Idents.get("object_getClass"), SourceLocation(), S.LookupOrdinaryName); if (ObjectGetClass) S.Diag(OIRE->getExprLoc(), diag::warn_objc_isa_use) << FixItHint::CreateInsertion(OIRE->getLocStart(), "object_getClass(") << FixItHint::CreateReplacement( SourceRange(OIRE->getOpLoc(), OIRE->getLocEnd()), ")"); else S.Diag(OIRE->getLocation(), diag::warn_objc_isa_use); } S.Diag(IV->getLocation(), diag::note_ivar_decl); } } } ExprResult Sema::DefaultLvalueConversion(Expr *E) { // Handle any placeholder expressions which made it here. if (E->getType()->isPlaceholderType()) { ExprResult result = CheckPlaceholderExpr(E); if (result.isInvalid()) return ExprError(); E = result.get(); } // C++ [conv.lval]p1: // A glvalue of a non-function, non-array type T can be // converted to a prvalue. if (!E->isGLValue()) return E; QualType T = E->getType(); assert(!T.isNull() && "r-value conversion on typeless expression?"); // We don't want to throw lvalue-to-rvalue casts on top of // expressions of certain types in C++. if (getLangOpts().CPlusPlus && (E->getType() == Context.OverloadTy || T->isDependentType() || T->isRecordType())) return E; // The C standard is actually really unclear on this point, and // DR106 tells us what the result should be but not why. It's // generally best to say that void types just doesn't undergo // lvalue-to-rvalue at all. Note that expressions of unqualified // 'void' type are never l-values, but qualified void can be. if (T->isVoidType()) return E; // OpenCL usually rejects direct accesses to values of 'half' type. if (getLangOpts().OpenCL && !getOpenCLOptions().isEnabled("cl_khr_fp16") && T->isHalfType()) { Diag(E->getExprLoc(), diag::err_opencl_half_load_store) << 0 << T; return ExprError(); } CheckForNullPointerDereference(*this, E); if (const ObjCIsaExpr *OISA = dyn_cast(E->IgnoreParenCasts())) { NamedDecl *ObjectGetClass = LookupSingleName(TUScope, &Context.Idents.get("object_getClass"), SourceLocation(), LookupOrdinaryName); if (ObjectGetClass) Diag(E->getExprLoc(), diag::warn_objc_isa_use) << FixItHint::CreateInsertion(OISA->getLocStart(), "object_getClass(") << FixItHint::CreateReplacement( SourceRange(OISA->getOpLoc(), OISA->getIsaMemberLoc()), ")"); else Diag(E->getExprLoc(), diag::warn_objc_isa_use); } else if (const ObjCIvarRefExpr *OIRE = dyn_cast(E->IgnoreParenCasts())) DiagnoseDirectIsaAccess(*this, OIRE, SourceLocation(), /* Expr*/nullptr); // C++ [conv.lval]p1: // [...] If T is a non-class type, the type of the prvalue is the // cv-unqualified version of T. Otherwise, the type of the // rvalue is T. // // C99 6.3.2.1p2: // If the lvalue has qualified type, the value has the unqualified // version of the type of the lvalue; otherwise, the value has the // type of the lvalue. if (T.hasQualifiers()) T = T.getUnqualifiedType(); // Under the MS ABI, lock down the inheritance model now. if (T->isMemberPointerType() && Context.getTargetInfo().getCXXABI().isMicrosoft()) (void)isCompleteType(E->getExprLoc(), T); UpdateMarkingForLValueToRValue(E); // Loading a __weak object implicitly retains the value, so we need a cleanup to // balance that. if (E->getType().getObjCLifetime() == Qualifiers::OCL_Weak) Cleanup.setExprNeedsCleanups(true); ExprResult Res = ImplicitCastExpr::Create(Context, T, CK_LValueToRValue, E, nullptr, VK_RValue); // C11 6.3.2.1p2: // ... if the lvalue has atomic type, the value has the non-atomic version // of the type of the lvalue ... if (const AtomicType *Atomic = T->getAs()) { T = Atomic->getValueType().getUnqualifiedType(); Res = ImplicitCastExpr::Create(Context, T, CK_AtomicToNonAtomic, Res.get(), nullptr, VK_RValue); } return Res; } ExprResult Sema::DefaultFunctionArrayLvalueConversion(Expr *E, bool Diagnose) { ExprResult Res = DefaultFunctionArrayConversion(E, Diagnose); if (Res.isInvalid()) return ExprError(); Res = DefaultLvalueConversion(Res.get()); if (Res.isInvalid()) return ExprError(); return Res; } /// CallExprUnaryConversions - a special case of an unary conversion /// performed on a function designator of a call expression. ExprResult Sema::CallExprUnaryConversions(Expr *E) { QualType Ty = E->getType(); ExprResult Res = E; // Only do implicit cast for a function type, but not for a pointer // to function type. if (Ty->isFunctionType()) { Res = ImpCastExprToType(E, Context.getPointerType(Ty), CK_FunctionToPointerDecay).get(); if (Res.isInvalid()) return ExprError(); } Res = DefaultLvalueConversion(Res.get()); if (Res.isInvalid()) return ExprError(); return Res.get(); } /// UsualUnaryConversions - Performs various conversions that are common to most /// operators (C99 6.3). The conversions of array and function types are /// sometimes suppressed. For example, the array->pointer conversion doesn't /// apply if the array is an argument to the sizeof or address (&) operators. /// In these instances, this routine should *not* be called. ExprResult Sema::UsualUnaryConversions(Expr *E) { // First, convert to an r-value. ExprResult Res = DefaultFunctionArrayLvalueConversion(E); if (Res.isInvalid()) return ExprError(); E = Res.get(); QualType Ty = E->getType(); assert(!Ty.isNull() && "UsualUnaryConversions - missing type"); // Half FP have to be promoted to float unless it is natively supported if (Ty->isHalfType() && !getLangOpts().NativeHalfType) return ImpCastExprToType(Res.get(), Context.FloatTy, CK_FloatingCast); // Try to perform integral promotions if the object has a theoretically // promotable type. if (Ty->isIntegralOrUnscopedEnumerationType()) { // C99 6.3.1.1p2: // // The following may be used in an expression wherever an int or // unsigned int may be used: // - an object or expression with an integer type whose integer // conversion rank is less than or equal to the rank of int // and unsigned int. // - A bit-field of type _Bool, int, signed int, or unsigned int. // // If an int can represent all values of the original type, the // value is converted to an int; otherwise, it is converted to an // unsigned int. These are called the integer promotions. All // other types are unchanged by the integer promotions. QualType PTy = Context.isPromotableBitField(E); if (!PTy.isNull()) { E = ImpCastExprToType(E, PTy, CK_IntegralCast).get(); return E; } if (Ty->isPromotableIntegerType()) { QualType PT = Context.getPromotedIntegerType(Ty); E = ImpCastExprToType(E, PT, CK_IntegralCast).get(); return E; } } return E; } /// DefaultArgumentPromotion (C99 6.5.2.2p6). Used for function calls that /// do not have a prototype. Arguments that have type float or __fp16 /// are promoted to double. All other argument types are converted by /// UsualUnaryConversions(). ExprResult Sema::DefaultArgumentPromotion(Expr *E) { QualType Ty = E->getType(); assert(!Ty.isNull() && "DefaultArgumentPromotion - missing type"); ExprResult Res = UsualUnaryConversions(E); if (Res.isInvalid()) return ExprError(); E = Res.get(); // If this is a 'float' or '__fp16' (CVR qualified or typedef) // promote to double. // Note that default argument promotion applies only to float (and // half/fp16); it does not apply to _Float16. const BuiltinType *BTy = Ty->getAs(); if (BTy && (BTy->getKind() == BuiltinType::Half || BTy->getKind() == BuiltinType::Float)) { if (getLangOpts().OpenCL && !getOpenCLOptions().isEnabled("cl_khr_fp64")) { if (BTy->getKind() == BuiltinType::Half) { E = ImpCastExprToType(E, Context.FloatTy, CK_FloatingCast).get(); } } else { E = ImpCastExprToType(E, Context.DoubleTy, CK_FloatingCast).get(); } } // C++ performs lvalue-to-rvalue conversion as a default argument // promotion, even on class types, but note: // C++11 [conv.lval]p2: // When an lvalue-to-rvalue conversion occurs in an unevaluated // operand or a subexpression thereof the value contained in the // referenced object is not accessed. Otherwise, if the glvalue // has a class type, the conversion copy-initializes a temporary // of type T from the glvalue and the result of the conversion // is a prvalue for the temporary. // FIXME: add some way to gate this entire thing for correctness in // potentially potentially evaluated contexts. if (getLangOpts().CPlusPlus && E->isGLValue() && !isUnevaluatedContext()) { ExprResult Temp = PerformCopyInitialization( InitializedEntity::InitializeTemporary(E->getType()), E->getExprLoc(), E); if (Temp.isInvalid()) return ExprError(); E = Temp.get(); } return E; } /// Determine the degree of POD-ness for an expression. /// Incomplete types are considered POD, since this check can be performed /// when we're in an unevaluated context. Sema::VarArgKind Sema::isValidVarArgType(const QualType &Ty) { if (Ty->isIncompleteType()) { // C++11 [expr.call]p7: // After these conversions, if the argument does not have arithmetic, // enumeration, pointer, pointer to member, or class type, the program // is ill-formed. // // Since we've already performed array-to-pointer and function-to-pointer // decay, the only such type in C++ is cv void. This also handles // initializer lists as variadic arguments. if (Ty->isVoidType()) return VAK_Invalid; if (Ty->isObjCObjectType()) return VAK_Invalid; return VAK_Valid; } if (Ty.isCXX98PODType(Context)) return VAK_Valid; // C++11 [expr.call]p7: // Passing a potentially-evaluated argument of class type (Clause 9) // having a non-trivial copy constructor, a non-trivial move constructor, // or a non-trivial destructor, with no corresponding parameter, // is conditionally-supported with implementation-defined semantics. if (getLangOpts().CPlusPlus11 && !Ty->isDependentType()) if (CXXRecordDecl *Record = Ty->getAsCXXRecordDecl()) if (!Record->hasNonTrivialCopyConstructor() && !Record->hasNonTrivialMoveConstructor() && !Record->hasNonTrivialDestructor()) return VAK_ValidInCXX11; if (getLangOpts().ObjCAutoRefCount && Ty->isObjCLifetimeType()) return VAK_Valid; if (Ty->isObjCObjectType()) return VAK_Invalid; if (getLangOpts().MSVCCompat) return VAK_MSVCUndefined; // FIXME: In C++11, these cases are conditionally-supported, meaning we're // permitted to reject them. We should consider doing so. return VAK_Undefined; } void Sema::checkVariadicArgument(const Expr *E, VariadicCallType CT) { // Don't allow one to pass an Objective-C interface to a vararg. const QualType &Ty = E->getType(); VarArgKind VAK = isValidVarArgType(Ty); // Complain about passing non-POD types through varargs. switch (VAK) { case VAK_ValidInCXX11: DiagRuntimeBehavior( E->getLocStart(), nullptr, PDiag(diag::warn_cxx98_compat_pass_non_pod_arg_to_vararg) << Ty << CT); LLVM_FALLTHROUGH; case VAK_Valid: if (Ty->isRecordType()) { // This is unlikely to be what the user intended. If the class has a // 'c_str' member function, the user probably meant to call that. DiagRuntimeBehavior(E->getLocStart(), nullptr, PDiag(diag::warn_pass_class_arg_to_vararg) << Ty << CT << hasCStrMethod(E) << ".c_str()"); } break; case VAK_Undefined: case VAK_MSVCUndefined: DiagRuntimeBehavior( E->getLocStart(), nullptr, PDiag(diag::warn_cannot_pass_non_pod_arg_to_vararg) << getLangOpts().CPlusPlus11 << Ty << CT); break; case VAK_Invalid: if (Ty->isObjCObjectType()) DiagRuntimeBehavior( E->getLocStart(), nullptr, PDiag(diag::err_cannot_pass_objc_interface_to_vararg) << Ty << CT); else Diag(E->getLocStart(), diag::err_cannot_pass_to_vararg) << isa(E) << Ty << CT; break; } } /// DefaultVariadicArgumentPromotion - Like DefaultArgumentPromotion, but /// will create a trap if the resulting type is not a POD type. ExprResult Sema::DefaultVariadicArgumentPromotion(Expr *E, VariadicCallType CT, FunctionDecl *FDecl) { if (const BuiltinType *PlaceholderTy = E->getType()->getAsPlaceholderType()) { // Strip the unbridged-cast placeholder expression off, if applicable. if (PlaceholderTy->getKind() == BuiltinType::ARCUnbridgedCast && (CT == VariadicMethod || (FDecl && FDecl->hasAttr()))) { E = stripARCUnbridgedCast(E); // Otherwise, do normal placeholder checking. } else { ExprResult ExprRes = CheckPlaceholderExpr(E); if (ExprRes.isInvalid()) return ExprError(); E = ExprRes.get(); } } ExprResult ExprRes = DefaultArgumentPromotion(E); if (ExprRes.isInvalid()) return ExprError(); E = ExprRes.get(); // Diagnostics regarding non-POD argument types are // emitted along with format string checking in Sema::CheckFunctionCall(). if (isValidVarArgType(E->getType()) == VAK_Undefined) { // Turn this into a trap. CXXScopeSpec SS; SourceLocation TemplateKWLoc; UnqualifiedId Name; Name.setIdentifier(PP.getIdentifierInfo("__builtin_trap"), E->getLocStart()); ExprResult TrapFn = ActOnIdExpression(TUScope, SS, TemplateKWLoc, Name, true, false); if (TrapFn.isInvalid()) return ExprError(); ExprResult Call = ActOnCallExpr(TUScope, TrapFn.get(), E->getLocStart(), None, E->getLocEnd()); if (Call.isInvalid()) return ExprError(); ExprResult Comma = ActOnBinOp(TUScope, E->getLocStart(), tok::comma, Call.get(), E); if (Comma.isInvalid()) return ExprError(); return Comma.get(); } if (!getLangOpts().CPlusPlus && RequireCompleteType(E->getExprLoc(), E->getType(), diag::err_call_incomplete_argument)) return ExprError(); return E; } /// \brief Converts an integer to complex float type. Helper function of /// UsualArithmeticConversions() /// /// \return false if the integer expression is an integer type and is /// successfully converted to the complex type. static bool handleIntegerToComplexFloatConversion(Sema &S, ExprResult &IntExpr, ExprResult &ComplexExpr, QualType IntTy, QualType ComplexTy, bool SkipCast) { if (IntTy->isComplexType() || IntTy->isRealFloatingType()) return true; if (SkipCast) return false; if (IntTy->isIntegerType()) { QualType fpTy = cast(ComplexTy)->getElementType(); IntExpr = S.ImpCastExprToType(IntExpr.get(), fpTy, CK_IntegralToFloating); IntExpr = S.ImpCastExprToType(IntExpr.get(), ComplexTy, CK_FloatingRealToComplex); } else { assert(IntTy->isComplexIntegerType()); IntExpr = S.ImpCastExprToType(IntExpr.get(), ComplexTy, CK_IntegralComplexToFloatingComplex); } return false; } /// \brief Handle arithmetic conversion with complex types. Helper function of /// UsualArithmeticConversions() static QualType handleComplexFloatConversion(Sema &S, ExprResult &LHS, ExprResult &RHS, QualType LHSType, QualType RHSType, bool IsCompAssign) { // if we have an integer operand, the result is the complex type. if (!handleIntegerToComplexFloatConversion(S, RHS, LHS, RHSType, LHSType, /*skipCast*/false)) return LHSType; if (!handleIntegerToComplexFloatConversion(S, LHS, RHS, LHSType, RHSType, /*skipCast*/IsCompAssign)) return RHSType; // This handles complex/complex, complex/float, or float/complex. // When both operands are complex, the shorter operand is converted to the // type of the longer, and that is the type of the result. This corresponds // to what is done when combining two real floating-point operands. // The fun begins when size promotion occur across type domains. // From H&S 6.3.4: When one operand is complex and the other is a real // floating-point type, the less precise type is converted, within it's // real or complex domain, to the precision of the other type. For example, // when combining a "long double" with a "double _Complex", the // "double _Complex" is promoted to "long double _Complex". // Compute the rank of the two types, regardless of whether they are complex. int Order = S.Context.getFloatingTypeOrder(LHSType, RHSType); auto *LHSComplexType = dyn_cast(LHSType); auto *RHSComplexType = dyn_cast(RHSType); QualType LHSElementType = LHSComplexType ? LHSComplexType->getElementType() : LHSType; QualType RHSElementType = RHSComplexType ? RHSComplexType->getElementType() : RHSType; QualType ResultType = S.Context.getComplexType(LHSElementType); if (Order < 0) { // Promote the precision of the LHS if not an assignment. ResultType = S.Context.getComplexType(RHSElementType); if (!IsCompAssign) { if (LHSComplexType) LHS = S.ImpCastExprToType(LHS.get(), ResultType, CK_FloatingComplexCast); else LHS = S.ImpCastExprToType(LHS.get(), RHSElementType, CK_FloatingCast); } } else if (Order > 0) { // Promote the precision of the RHS. if (RHSComplexType) RHS = S.ImpCastExprToType(RHS.get(), ResultType, CK_FloatingComplexCast); else RHS = S.ImpCastExprToType(RHS.get(), LHSElementType, CK_FloatingCast); } return ResultType; } /// \brief Handle arithmetic conversion from integer to float. Helper function /// of UsualArithmeticConversions() static QualType handleIntToFloatConversion(Sema &S, ExprResult &FloatExpr, ExprResult &IntExpr, QualType FloatTy, QualType IntTy, bool ConvertFloat, bool ConvertInt) { if (IntTy->isIntegerType()) { if (ConvertInt) // Convert intExpr to the lhs floating point type. IntExpr = S.ImpCastExprToType(IntExpr.get(), FloatTy, CK_IntegralToFloating); return FloatTy; } // Convert both sides to the appropriate complex float. assert(IntTy->isComplexIntegerType()); QualType result = S.Context.getComplexType(FloatTy); // _Complex int -> _Complex float if (ConvertInt) IntExpr = S.ImpCastExprToType(IntExpr.get(), result, CK_IntegralComplexToFloatingComplex); // float -> _Complex float if (ConvertFloat) FloatExpr = S.ImpCastExprToType(FloatExpr.get(), result, CK_FloatingRealToComplex); return result; } /// \brief Handle arithmethic conversion with floating point types. Helper /// function of UsualArithmeticConversions() static QualType handleFloatConversion(Sema &S, ExprResult &LHS, ExprResult &RHS, QualType LHSType, QualType RHSType, bool IsCompAssign) { bool LHSFloat = LHSType->isRealFloatingType(); bool RHSFloat = RHSType->isRealFloatingType(); // If we have two real floating types, convert the smaller operand // to the bigger result. if (LHSFloat && RHSFloat) { int order = S.Context.getFloatingTypeOrder(LHSType, RHSType); if (order > 0) { RHS = S.ImpCastExprToType(RHS.get(), LHSType, CK_FloatingCast); return LHSType; } assert(order < 0 && "illegal float comparison"); if (!IsCompAssign) LHS = S.ImpCastExprToType(LHS.get(), RHSType, CK_FloatingCast); return RHSType; } if (LHSFloat) { // Half FP has to be promoted to float unless it is natively supported if (LHSType->isHalfType() && !S.getLangOpts().NativeHalfType) LHSType = S.Context.FloatTy; return handleIntToFloatConversion(S, LHS, RHS, LHSType, RHSType, /*convertFloat=*/!IsCompAssign, /*convertInt=*/ true); } assert(RHSFloat); return handleIntToFloatConversion(S, RHS, LHS, RHSType, LHSType, /*convertInt=*/ true, /*convertFloat=*/!IsCompAssign); } /// \brief Diagnose attempts to convert between __float128 and long double if /// there is no support for such conversion. Helper function of /// UsualArithmeticConversions(). static bool unsupportedTypeConversion(const Sema &S, QualType LHSType, QualType RHSType) { /* No issue converting if at least one of the types is not a floating point type or the two types have the same rank. */ if (!LHSType->isFloatingType() || !RHSType->isFloatingType() || S.Context.getFloatingTypeOrder(LHSType, RHSType) == 0) return false; assert(LHSType->isFloatingType() && RHSType->isFloatingType() && "The remaining types must be floating point types."); auto *LHSComplex = LHSType->getAs(); auto *RHSComplex = RHSType->getAs(); QualType LHSElemType = LHSComplex ? LHSComplex->getElementType() : LHSType; QualType RHSElemType = RHSComplex ? RHSComplex->getElementType() : RHSType; // No issue if the two types have the same representation if (&S.Context.getFloatTypeSemantics(LHSElemType) == &S.Context.getFloatTypeSemantics(RHSElemType)) return false; bool Float128AndLongDouble = (LHSElemType == S.Context.Float128Ty && RHSElemType == S.Context.LongDoubleTy); Float128AndLongDouble |= (LHSElemType == S.Context.LongDoubleTy && RHSElemType == S.Context.Float128Ty); /* We've handled the situation where __float128 and long double have the same representation. The only other allowable conversion is if long double is really just double. */ return Float128AndLongDouble && (&S.Context.getFloatTypeSemantics(S.Context.LongDoubleTy) != &llvm::APFloat::IEEEdouble()); } typedef ExprResult PerformCastFn(Sema &S, Expr *operand, QualType toType); namespace { /// These helper callbacks are placed in an anonymous namespace to /// permit their use as function template parameters. ExprResult doIntegralCast(Sema &S, Expr *op, QualType toType) { return S.ImpCastExprToType(op, toType, CK_IntegralCast); } ExprResult doComplexIntegralCast(Sema &S, Expr *op, QualType toType) { return S.ImpCastExprToType(op, S.Context.getComplexType(toType), CK_IntegralComplexCast); } } /// \brief Handle integer arithmetic conversions. Helper function of /// UsualArithmeticConversions() template static QualType handleIntegerConversion(Sema &S, ExprResult &LHS, ExprResult &RHS, QualType LHSType, QualType RHSType, bool IsCompAssign) { // The rules for this case are in C99 6.3.1.8 int order = S.Context.getIntegerTypeOrder(LHSType, RHSType); bool LHSSigned = LHSType->hasSignedIntegerRepresentation(); bool RHSSigned = RHSType->hasSignedIntegerRepresentation(); if (LHSSigned == RHSSigned) { // Same signedness; use the higher-ranked type if (order >= 0) { RHS = (*doRHSCast)(S, RHS.get(), LHSType); return LHSType; } else if (!IsCompAssign) LHS = (*doLHSCast)(S, LHS.get(), RHSType); return RHSType; } else if (order != (LHSSigned ? 1 : -1)) { // The unsigned type has greater than or equal rank to the // signed type, so use the unsigned type if (RHSSigned) { RHS = (*doRHSCast)(S, RHS.get(), LHSType); return LHSType; } else if (!IsCompAssign) LHS = (*doLHSCast)(S, LHS.get(), RHSType); return RHSType; } else if (S.Context.getIntWidth(LHSType) != S.Context.getIntWidth(RHSType)) { // The two types are different widths; if we are here, that // means the signed type is larger than the unsigned type, so // use the signed type. if (LHSSigned) { RHS = (*doRHSCast)(S, RHS.get(), LHSType); return LHSType; } else if (!IsCompAssign) LHS = (*doLHSCast)(S, LHS.get(), RHSType); return RHSType; } else { // The signed type is higher-ranked than the unsigned type, // but isn't actually any bigger (like unsigned int and long // on most 32-bit systems). Use the unsigned type corresponding // to the signed type. QualType result = S.Context.getCorrespondingUnsignedType(LHSSigned ? LHSType : RHSType); RHS = (*doRHSCast)(S, RHS.get(), result); if (!IsCompAssign) LHS = (*doLHSCast)(S, LHS.get(), result); return result; } } /// \brief Handle conversions with GCC complex int extension. Helper function /// of UsualArithmeticConversions() static QualType handleComplexIntConversion(Sema &S, ExprResult &LHS, ExprResult &RHS, QualType LHSType, QualType RHSType, bool IsCompAssign) { const ComplexType *LHSComplexInt = LHSType->getAsComplexIntegerType(); const ComplexType *RHSComplexInt = RHSType->getAsComplexIntegerType(); if (LHSComplexInt && RHSComplexInt) { QualType LHSEltType = LHSComplexInt->getElementType(); QualType RHSEltType = RHSComplexInt->getElementType(); QualType ScalarType = handleIntegerConversion (S, LHS, RHS, LHSEltType, RHSEltType, IsCompAssign); return S.Context.getComplexType(ScalarType); } if (LHSComplexInt) { QualType LHSEltType = LHSComplexInt->getElementType(); QualType ScalarType = handleIntegerConversion (S, LHS, RHS, LHSEltType, RHSType, IsCompAssign); QualType ComplexType = S.Context.getComplexType(ScalarType); RHS = S.ImpCastExprToType(RHS.get(), ComplexType, CK_IntegralRealToComplex); return ComplexType; } assert(RHSComplexInt); QualType RHSEltType = RHSComplexInt->getElementType(); QualType ScalarType = handleIntegerConversion (S, LHS, RHS, LHSType, RHSEltType, IsCompAssign); QualType ComplexType = S.Context.getComplexType(ScalarType); if (!IsCompAssign) LHS = S.ImpCastExprToType(LHS.get(), ComplexType, CK_IntegralRealToComplex); return ComplexType; } /// UsualArithmeticConversions - Performs various conversions that are common to /// binary operators (C99 6.3.1.8). If both operands aren't arithmetic, this /// routine returns the first non-arithmetic type found. The client is /// responsible for emitting appropriate error diagnostics. QualType Sema::UsualArithmeticConversions(ExprResult &LHS, ExprResult &RHS, bool IsCompAssign) { if (!IsCompAssign) { LHS = UsualUnaryConversions(LHS.get()); if (LHS.isInvalid()) return QualType(); } RHS = UsualUnaryConversions(RHS.get()); if (RHS.isInvalid()) return QualType(); // For conversion purposes, we ignore any qualifiers. // For example, "const float" and "float" are equivalent. QualType LHSType = Context.getCanonicalType(LHS.get()->getType()).getUnqualifiedType(); QualType RHSType = Context.getCanonicalType(RHS.get()->getType()).getUnqualifiedType(); // For conversion purposes, we ignore any atomic qualifier on the LHS. if (const AtomicType *AtomicLHS = LHSType->getAs()) LHSType = AtomicLHS->getValueType(); // If both types are identical, no conversion is needed. if (LHSType == RHSType) return LHSType; // If either side is a non-arithmetic type (e.g. a pointer), we are done. // The caller can deal with this (e.g. pointer + int). if (!LHSType->isArithmeticType() || !RHSType->isArithmeticType()) return QualType(); // Apply unary and bitfield promotions to the LHS's type. QualType LHSUnpromotedType = LHSType; if (LHSType->isPromotableIntegerType()) LHSType = Context.getPromotedIntegerType(LHSType); QualType LHSBitfieldPromoteTy = Context.isPromotableBitField(LHS.get()); if (!LHSBitfieldPromoteTy.isNull()) LHSType = LHSBitfieldPromoteTy; if (LHSType != LHSUnpromotedType && !IsCompAssign) LHS = ImpCastExprToType(LHS.get(), LHSType, CK_IntegralCast); // If both types are identical, no conversion is needed. if (LHSType == RHSType) return LHSType; // At this point, we have two different arithmetic types. // Diagnose attempts to convert between __float128 and long double where // such conversions currently can't be handled. if (unsupportedTypeConversion(*this, LHSType, RHSType)) return QualType(); // Handle complex types first (C99 6.3.1.8p1). if (LHSType->isComplexType() || RHSType->isComplexType()) return handleComplexFloatConversion(*this, LHS, RHS, LHSType, RHSType, IsCompAssign); // Now handle "real" floating types (i.e. float, double, long double). if (LHSType->isRealFloatingType() || RHSType->isRealFloatingType()) return handleFloatConversion(*this, LHS, RHS, LHSType, RHSType, IsCompAssign); // Handle GCC complex int extension. if (LHSType->isComplexIntegerType() || RHSType->isComplexIntegerType()) return handleComplexIntConversion(*this, LHS, RHS, LHSType, RHSType, IsCompAssign); // Finally, we have two differing integer types. return handleIntegerConversion (*this, LHS, RHS, LHSType, RHSType, IsCompAssign); } //===----------------------------------------------------------------------===// // Semantic Analysis for various Expression Types //===----------------------------------------------------------------------===// ExprResult Sema::ActOnGenericSelectionExpr(SourceLocation KeyLoc, SourceLocation DefaultLoc, SourceLocation RParenLoc, Expr *ControllingExpr, ArrayRef ArgTypes, ArrayRef ArgExprs) { unsigned NumAssocs = ArgTypes.size(); assert(NumAssocs == ArgExprs.size()); TypeSourceInfo **Types = new TypeSourceInfo*[NumAssocs]; for (unsigned i = 0; i < NumAssocs; ++i) { if (ArgTypes[i]) (void) GetTypeFromParser(ArgTypes[i], &Types[i]); else Types[i] = nullptr; } ExprResult ER = CreateGenericSelectionExpr(KeyLoc, DefaultLoc, RParenLoc, ControllingExpr, llvm::makeArrayRef(Types, NumAssocs), ArgExprs); delete [] Types; return ER; } ExprResult Sema::CreateGenericSelectionExpr(SourceLocation KeyLoc, SourceLocation DefaultLoc, SourceLocation RParenLoc, Expr *ControllingExpr, ArrayRef Types, ArrayRef Exprs) { unsigned NumAssocs = Types.size(); assert(NumAssocs == Exprs.size()); // Decay and strip qualifiers for the controlling expression type, and handle // placeholder type replacement. See committee discussion from WG14 DR423. { EnterExpressionEvaluationContext Unevaluated( *this, Sema::ExpressionEvaluationContext::Unevaluated); ExprResult R = DefaultFunctionArrayLvalueConversion(ControllingExpr); if (R.isInvalid()) return ExprError(); ControllingExpr = R.get(); } // The controlling expression is an unevaluated operand, so side effects are // likely unintended. if (!inTemplateInstantiation() && ControllingExpr->HasSideEffects(Context, false)) Diag(ControllingExpr->getExprLoc(), diag::warn_side_effects_unevaluated_context); bool TypeErrorFound = false, IsResultDependent = ControllingExpr->isTypeDependent(), ContainsUnexpandedParameterPack = ControllingExpr->containsUnexpandedParameterPack(); for (unsigned i = 0; i < NumAssocs; ++i) { if (Exprs[i]->containsUnexpandedParameterPack()) ContainsUnexpandedParameterPack = true; if (Types[i]) { if (Types[i]->getType()->containsUnexpandedParameterPack()) ContainsUnexpandedParameterPack = true; if (Types[i]->getType()->isDependentType()) { IsResultDependent = true; } else { // C11 6.5.1.1p2 "The type name in a generic association shall specify a // complete object type other than a variably modified type." unsigned D = 0; if (Types[i]->getType()->isIncompleteType()) D = diag::err_assoc_type_incomplete; else if (!Types[i]->getType()->isObjectType()) D = diag::err_assoc_type_nonobject; else if (Types[i]->getType()->isVariablyModifiedType()) D = diag::err_assoc_type_variably_modified; if (D != 0) { Diag(Types[i]->getTypeLoc().getBeginLoc(), D) << Types[i]->getTypeLoc().getSourceRange() << Types[i]->getType(); TypeErrorFound = true; } // C11 6.5.1.1p2 "No two generic associations in the same generic // selection shall specify compatible types." for (unsigned j = i+1; j < NumAssocs; ++j) if (Types[j] && !Types[j]->getType()->isDependentType() && Context.typesAreCompatible(Types[i]->getType(), Types[j]->getType())) { Diag(Types[j]->getTypeLoc().getBeginLoc(), diag::err_assoc_compatible_types) << Types[j]->getTypeLoc().getSourceRange() << Types[j]->getType() << Types[i]->getType(); Diag(Types[i]->getTypeLoc().getBeginLoc(), diag::note_compat_assoc) << Types[i]->getTypeLoc().getSourceRange() << Types[i]->getType(); TypeErrorFound = true; } } } } if (TypeErrorFound) return ExprError(); // If we determined that the generic selection is result-dependent, don't // try to compute the result expression. if (IsResultDependent) return new (Context) GenericSelectionExpr( Context, KeyLoc, ControllingExpr, Types, Exprs, DefaultLoc, RParenLoc, ContainsUnexpandedParameterPack); SmallVector CompatIndices; unsigned DefaultIndex = -1U; for (unsigned i = 0; i < NumAssocs; ++i) { if (!Types[i]) DefaultIndex = i; else if (Context.typesAreCompatible(ControllingExpr->getType(), Types[i]->getType())) CompatIndices.push_back(i); } // C11 6.5.1.1p2 "The controlling expression of a generic selection shall have // type compatible with at most one of the types named in its generic // association list." if (CompatIndices.size() > 1) { // We strip parens here because the controlling expression is typically // parenthesized in macro definitions. ControllingExpr = ControllingExpr->IgnoreParens(); Diag(ControllingExpr->getLocStart(), diag::err_generic_sel_multi_match) << ControllingExpr->getSourceRange() << ControllingExpr->getType() << (unsigned) CompatIndices.size(); for (unsigned I : CompatIndices) { Diag(Types[I]->getTypeLoc().getBeginLoc(), diag::note_compat_assoc) << Types[I]->getTypeLoc().getSourceRange() << Types[I]->getType(); } return ExprError(); } // C11 6.5.1.1p2 "If a generic selection has no default generic association, // its controlling expression shall have type compatible with exactly one of // the types named in its generic association list." if (DefaultIndex == -1U && CompatIndices.size() == 0) { // We strip parens here because the controlling expression is typically // parenthesized in macro definitions. ControllingExpr = ControllingExpr->IgnoreParens(); Diag(ControllingExpr->getLocStart(), diag::err_generic_sel_no_match) << ControllingExpr->getSourceRange() << ControllingExpr->getType(); return ExprError(); } // C11 6.5.1.1p3 "If a generic selection has a generic association with a // type name that is compatible with the type of the controlling expression, // then the result expression of the generic selection is the expression // in that generic association. Otherwise, the result expression of the // generic selection is the expression in the default generic association." unsigned ResultIndex = CompatIndices.size() ? CompatIndices[0] : DefaultIndex; return new (Context) GenericSelectionExpr( Context, KeyLoc, ControllingExpr, Types, Exprs, DefaultLoc, RParenLoc, ContainsUnexpandedParameterPack, ResultIndex); } /// getUDSuffixLoc - Create a SourceLocation for a ud-suffix, given the /// location of the token and the offset of the ud-suffix within it. static SourceLocation getUDSuffixLoc(Sema &S, SourceLocation TokLoc, unsigned Offset) { return Lexer::AdvanceToTokenCharacter(TokLoc, Offset, S.getSourceManager(), S.getLangOpts()); } /// BuildCookedLiteralOperatorCall - A user-defined literal was found. Look up /// the corresponding cooked (non-raw) literal operator, and build a call to it. static ExprResult BuildCookedLiteralOperatorCall(Sema &S, Scope *Scope, IdentifierInfo *UDSuffix, SourceLocation UDSuffixLoc, ArrayRef Args, SourceLocation LitEndLoc) { assert(Args.size() <= 2 && "too many arguments for literal operator"); QualType ArgTy[2]; for (unsigned ArgIdx = 0; ArgIdx != Args.size(); ++ArgIdx) { ArgTy[ArgIdx] = Args[ArgIdx]->getType(); if (ArgTy[ArgIdx]->isArrayType()) ArgTy[ArgIdx] = S.Context.getArrayDecayedType(ArgTy[ArgIdx]); } DeclarationName OpName = S.Context.DeclarationNames.getCXXLiteralOperatorName(UDSuffix); DeclarationNameInfo OpNameInfo(OpName, UDSuffixLoc); OpNameInfo.setCXXLiteralOperatorNameLoc(UDSuffixLoc); LookupResult R(S, OpName, UDSuffixLoc, Sema::LookupOrdinaryName); if (S.LookupLiteralOperator(Scope, R, llvm::makeArrayRef(ArgTy, Args.size()), /*AllowRaw*/ false, /*AllowTemplate*/ false, /*AllowStringTemplate*/ false, /*DiagnoseMissing*/ true) == Sema::LOLR_Error) return ExprError(); return S.BuildLiteralOperatorCall(R, OpNameInfo, Args, LitEndLoc); } /// ActOnStringLiteral - The specified tokens were lexed as pasted string /// fragments (e.g. "foo" "bar" L"baz"). The result string has to handle string /// concatenation ([C99 5.1.1.2, translation phase #6]), so it may come from /// multiple tokens. However, the common case is that StringToks points to one /// string. /// ExprResult Sema::ActOnStringLiteral(ArrayRef StringToks, Scope *UDLScope) { assert(!StringToks.empty() && "Must have at least one string!"); StringLiteralParser Literal(StringToks, PP); if (Literal.hadError) return ExprError(); SmallVector StringTokLocs; for (const Token &Tok : StringToks) StringTokLocs.push_back(Tok.getLocation()); QualType CharTy = Context.CharTy; StringLiteral::StringKind Kind = StringLiteral::Ascii; if (Literal.isWide()) { CharTy = Context.getWideCharType(); Kind = StringLiteral::Wide; } else if (Literal.isUTF8()) { Kind = StringLiteral::UTF8; } else if (Literal.isUTF16()) { CharTy = Context.Char16Ty; Kind = StringLiteral::UTF16; } else if (Literal.isUTF32()) { CharTy = Context.Char32Ty; Kind = StringLiteral::UTF32; } else if (Literal.isPascal()) { CharTy = Context.UnsignedCharTy; } QualType CharTyConst = CharTy; // A C++ string literal has a const-qualified element type (C++ 2.13.4p1). if (getLangOpts().CPlusPlus || getLangOpts().ConstStrings) CharTyConst.addConst(); // Get an array type for the string, according to C99 6.4.5. This includes // the nul terminator character as well as the string length for pascal // strings. QualType StrTy = Context.getConstantArrayType(CharTyConst, llvm::APInt(32, Literal.GetNumStringChars()+1), ArrayType::Normal, 0); // OpenCL v1.1 s6.5.3: a string literal is in the constant address space. if (getLangOpts().OpenCL) { StrTy = Context.getAddrSpaceQualType(StrTy, LangAS::opencl_constant); } // Pass &StringTokLocs[0], StringTokLocs.size() to factory! StringLiteral *Lit = StringLiteral::Create(Context, Literal.GetString(), Kind, Literal.Pascal, StrTy, &StringTokLocs[0], StringTokLocs.size()); if (Literal.getUDSuffix().empty()) return Lit; // We're building a user-defined literal. IdentifierInfo *UDSuffix = &Context.Idents.get(Literal.getUDSuffix()); SourceLocation UDSuffixLoc = getUDSuffixLoc(*this, StringTokLocs[Literal.getUDSuffixToken()], Literal.getUDSuffixOffset()); // Make sure we're allowed user-defined literals here. if (!UDLScope) return ExprError(Diag(UDSuffixLoc, diag::err_invalid_string_udl)); // C++11 [lex.ext]p5: The literal L is treated as a call of the form // operator "" X (str, len) QualType SizeType = Context.getSizeType(); DeclarationName OpName = Context.DeclarationNames.getCXXLiteralOperatorName(UDSuffix); DeclarationNameInfo OpNameInfo(OpName, UDSuffixLoc); OpNameInfo.setCXXLiteralOperatorNameLoc(UDSuffixLoc); QualType ArgTy[] = { Context.getArrayDecayedType(StrTy), SizeType }; LookupResult R(*this, OpName, UDSuffixLoc, LookupOrdinaryName); switch (LookupLiteralOperator(UDLScope, R, ArgTy, /*AllowRaw*/ false, /*AllowTemplate*/ false, /*AllowStringTemplate*/ true, /*DiagnoseMissing*/ true)) { case LOLR_Cooked: { llvm::APInt Len(Context.getIntWidth(SizeType), Literal.GetNumStringChars()); IntegerLiteral *LenArg = IntegerLiteral::Create(Context, Len, SizeType, StringTokLocs[0]); Expr *Args[] = { Lit, LenArg }; return BuildLiteralOperatorCall(R, OpNameInfo, Args, StringTokLocs.back()); } case LOLR_StringTemplate: { TemplateArgumentListInfo ExplicitArgs; unsigned CharBits = Context.getIntWidth(CharTy); bool CharIsUnsigned = CharTy->isUnsignedIntegerType(); llvm::APSInt Value(CharBits, CharIsUnsigned); TemplateArgument TypeArg(CharTy); TemplateArgumentLocInfo TypeArgInfo(Context.getTrivialTypeSourceInfo(CharTy)); ExplicitArgs.addArgument(TemplateArgumentLoc(TypeArg, TypeArgInfo)); for (unsigned I = 0, N = Lit->getLength(); I != N; ++I) { Value = Lit->getCodeUnit(I); TemplateArgument Arg(Context, Value, CharTy); TemplateArgumentLocInfo ArgInfo; ExplicitArgs.addArgument(TemplateArgumentLoc(Arg, ArgInfo)); } return BuildLiteralOperatorCall(R, OpNameInfo, None, StringTokLocs.back(), &ExplicitArgs); } case LOLR_Raw: case LOLR_Template: case LOLR_ErrorNoDiagnostic: llvm_unreachable("unexpected literal operator lookup result"); case LOLR_Error: return ExprError(); } llvm_unreachable("unexpected literal operator lookup result"); } ExprResult Sema::BuildDeclRefExpr(ValueDecl *D, QualType Ty, ExprValueKind VK, SourceLocation Loc, const CXXScopeSpec *SS) { DeclarationNameInfo NameInfo(D->getDeclName(), Loc); return BuildDeclRefExpr(D, Ty, VK, NameInfo, SS); } /// BuildDeclRefExpr - Build an expression that references a /// declaration that does not require a closure capture. ExprResult Sema::BuildDeclRefExpr(ValueDecl *D, QualType Ty, ExprValueKind VK, const DeclarationNameInfo &NameInfo, const CXXScopeSpec *SS, NamedDecl *FoundD, const TemplateArgumentListInfo *TemplateArgs) { bool RefersToCapturedVariable = isa(D) && NeedToCaptureVariable(cast(D), NameInfo.getLoc()); DeclRefExpr *E; if (isa(D)) { VarTemplateSpecializationDecl *VarSpec = cast(D); E = DeclRefExpr::Create(Context, SS ? SS->getWithLocInContext(Context) : NestedNameSpecifierLoc(), VarSpec->getTemplateKeywordLoc(), D, RefersToCapturedVariable, NameInfo.getLoc(), Ty, VK, FoundD, TemplateArgs); } else { assert(!TemplateArgs && "No template arguments for non-variable" " template specialization references"); E = DeclRefExpr::Create(Context, SS ? SS->getWithLocInContext(Context) : NestedNameSpecifierLoc(), SourceLocation(), D, RefersToCapturedVariable, NameInfo, Ty, VK, FoundD); } MarkDeclRefReferenced(E); if (getLangOpts().ObjCWeak && isa(D) && Ty.getObjCLifetime() == Qualifiers::OCL_Weak && !Diags.isIgnored(diag::warn_arc_repeated_use_of_weak, E->getLocStart())) recordUseOfEvaluatedWeak(E); FieldDecl *FD = dyn_cast(D); if (IndirectFieldDecl *IFD = dyn_cast(D)) FD = IFD->getAnonField(); if (FD) { UnusedPrivateFields.remove(FD); // Just in case we're building an illegal pointer-to-member. if (FD->isBitField()) E->setObjectKind(OK_BitField); } // C++ [expr.prim]/8: The expression [...] is a bit-field if the identifier // designates a bit-field. if (auto *BD = dyn_cast(D)) if (auto *BE = BD->getBinding()) E->setObjectKind(BE->getObjectKind()); return E; } /// Decomposes the given name into a DeclarationNameInfo, its location, and /// possibly a list of template arguments. /// /// If this produces template arguments, it is permitted to call /// DecomposeTemplateName. /// /// This actually loses a lot of source location information for /// non-standard name kinds; we should consider preserving that in /// some way. void Sema::DecomposeUnqualifiedId(const UnqualifiedId &Id, TemplateArgumentListInfo &Buffer, DeclarationNameInfo &NameInfo, const TemplateArgumentListInfo *&TemplateArgs) { if (Id.getKind() == UnqualifiedIdKind::IK_TemplateId) { Buffer.setLAngleLoc(Id.TemplateId->LAngleLoc); Buffer.setRAngleLoc(Id.TemplateId->RAngleLoc); ASTTemplateArgsPtr TemplateArgsPtr(Id.TemplateId->getTemplateArgs(), Id.TemplateId->NumArgs); translateTemplateArguments(TemplateArgsPtr, Buffer); TemplateName TName = Id.TemplateId->Template.get(); SourceLocation TNameLoc = Id.TemplateId->TemplateNameLoc; NameInfo = Context.getNameForTemplate(TName, TNameLoc); TemplateArgs = &Buffer; } else { NameInfo = GetNameFromUnqualifiedId(Id); TemplateArgs = nullptr; } } static void emitEmptyLookupTypoDiagnostic( const TypoCorrection &TC, Sema &SemaRef, const CXXScopeSpec &SS, DeclarationName Typo, SourceLocation TypoLoc, ArrayRef Args, unsigned DiagnosticID, unsigned DiagnosticSuggestID) { DeclContext *Ctx = SS.isEmpty() ? nullptr : SemaRef.computeDeclContext(SS, false); if (!TC) { // Emit a special diagnostic for failed member lookups. // FIXME: computing the declaration context might fail here (?) if (Ctx) SemaRef.Diag(TypoLoc, diag::err_no_member) << Typo << Ctx << SS.getRange(); else SemaRef.Diag(TypoLoc, DiagnosticID) << Typo; return; } std::string CorrectedStr = TC.getAsString(SemaRef.getLangOpts()); bool DroppedSpecifier = TC.WillReplaceSpecifier() && Typo.getAsString() == CorrectedStr; unsigned NoteID = TC.getCorrectionDeclAs() ? diag::note_implicit_param_decl : diag::note_previous_decl; if (!Ctx) SemaRef.diagnoseTypo(TC, SemaRef.PDiag(DiagnosticSuggestID) << Typo, SemaRef.PDiag(NoteID)); else SemaRef.diagnoseTypo(TC, SemaRef.PDiag(diag::err_no_member_suggest) << Typo << Ctx << DroppedSpecifier << SS.getRange(), SemaRef.PDiag(NoteID)); } /// Diagnose an empty lookup. /// /// \return false if new lookup candidates were found bool Sema::DiagnoseEmptyLookup(Scope *S, CXXScopeSpec &SS, LookupResult &R, std::unique_ptr CCC, TemplateArgumentListInfo *ExplicitTemplateArgs, ArrayRef Args, TypoExpr **Out) { DeclarationName Name = R.getLookupName(); unsigned diagnostic = diag::err_undeclared_var_use; unsigned diagnostic_suggest = diag::err_undeclared_var_use_suggest; if (Name.getNameKind() == DeclarationName::CXXOperatorName || Name.getNameKind() == DeclarationName::CXXLiteralOperatorName || Name.getNameKind() == DeclarationName::CXXConversionFunctionName) { diagnostic = diag::err_undeclared_use; diagnostic_suggest = diag::err_undeclared_use_suggest; } // If the original lookup was an unqualified lookup, fake an // unqualified lookup. This is useful when (for example) the // original lookup would not have found something because it was a // dependent name. DeclContext *DC = SS.isEmpty() ? CurContext : nullptr; while (DC) { if (isa(DC)) { LookupQualifiedName(R, DC); if (!R.empty()) { // Don't give errors about ambiguities in this lookup. R.suppressDiagnostics(); // During a default argument instantiation the CurContext points // to a CXXMethodDecl; but we can't apply a this-> fixit inside a // function parameter list, hence add an explicit check. bool isDefaultArgument = !CodeSynthesisContexts.empty() && CodeSynthesisContexts.back().Kind == CodeSynthesisContext::DefaultFunctionArgumentInstantiation; CXXMethodDecl *CurMethod = dyn_cast(CurContext); bool isInstance = CurMethod && CurMethod->isInstance() && DC == CurMethod->getParent() && !isDefaultArgument; // Give a code modification hint to insert 'this->'. // TODO: fixit for inserting 'Base::' in the other cases. // Actually quite difficult! if (getLangOpts().MSVCCompat) diagnostic = diag::ext_found_via_dependent_bases_lookup; if (isInstance) { Diag(R.getNameLoc(), diagnostic) << Name << FixItHint::CreateInsertion(R.getNameLoc(), "this->"); CheckCXXThisCapture(R.getNameLoc()); } else { Diag(R.getNameLoc(), diagnostic) << Name; } // Do we really want to note all of these? for (NamedDecl *D : R) Diag(D->getLocation(), diag::note_dependent_var_use); // Return true if we are inside a default argument instantiation // and the found name refers to an instance member function, otherwise // the function calling DiagnoseEmptyLookup will try to create an // implicit member call and this is wrong for default argument. if (isDefaultArgument && ((*R.begin())->isCXXInstanceMember())) { Diag(R.getNameLoc(), diag::err_member_call_without_object); return true; } // Tell the callee to try to recover. return false; } R.clear(); } // In Microsoft mode, if we are performing lookup from within a friend // function definition declared at class scope then we must set // DC to the lexical parent to be able to search into the parent // class. if (getLangOpts().MSVCCompat && isa(DC) && cast(DC)->getFriendObjectKind() && DC->getLexicalParent()->isRecord()) DC = DC->getLexicalParent(); else DC = DC->getParent(); } // We didn't find anything, so try to correct for a typo. TypoCorrection Corrected; if (S && Out) { SourceLocation TypoLoc = R.getNameLoc(); assert(!ExplicitTemplateArgs && "Diagnosing an empty lookup with explicit template args!"); *Out = CorrectTypoDelayed( R.getLookupNameInfo(), R.getLookupKind(), S, &SS, std::move(CCC), [=](const TypoCorrection &TC) { emitEmptyLookupTypoDiagnostic(TC, *this, SS, Name, TypoLoc, Args, diagnostic, diagnostic_suggest); }, nullptr, CTK_ErrorRecovery); if (*Out) return true; } else if (S && (Corrected = CorrectTypo(R.getLookupNameInfo(), R.getLookupKind(), S, &SS, std::move(CCC), CTK_ErrorRecovery))) { std::string CorrectedStr(Corrected.getAsString(getLangOpts())); bool DroppedSpecifier = Corrected.WillReplaceSpecifier() && Name.getAsString() == CorrectedStr; R.setLookupName(Corrected.getCorrection()); bool AcceptableWithRecovery = false; bool AcceptableWithoutRecovery = false; NamedDecl *ND = Corrected.getFoundDecl(); if (ND) { if (Corrected.isOverloaded()) { OverloadCandidateSet OCS(R.getNameLoc(), OverloadCandidateSet::CSK_Normal); OverloadCandidateSet::iterator Best; for (NamedDecl *CD : Corrected) { if (FunctionTemplateDecl *FTD = dyn_cast(CD)) AddTemplateOverloadCandidate( FTD, DeclAccessPair::make(FTD, AS_none), ExplicitTemplateArgs, Args, OCS); else if (FunctionDecl *FD = dyn_cast(CD)) if (!ExplicitTemplateArgs || ExplicitTemplateArgs->size() == 0) AddOverloadCandidate(FD, DeclAccessPair::make(FD, AS_none), Args, OCS); } switch (OCS.BestViableFunction(*this, R.getNameLoc(), Best)) { case OR_Success: ND = Best->FoundDecl; Corrected.setCorrectionDecl(ND); break; default: // FIXME: Arbitrarily pick the first declaration for the note. Corrected.setCorrectionDecl(ND); break; } } R.addDecl(ND); if (getLangOpts().CPlusPlus && ND->isCXXClassMember()) { CXXRecordDecl *Record = nullptr; if (Corrected.getCorrectionSpecifier()) { const Type *Ty = Corrected.getCorrectionSpecifier()->getAsType(); Record = Ty->getAsCXXRecordDecl(); } if (!Record) Record = cast( ND->getDeclContext()->getRedeclContext()); R.setNamingClass(Record); } auto *UnderlyingND = ND->getUnderlyingDecl(); AcceptableWithRecovery = isa(UnderlyingND) || isa(UnderlyingND); // FIXME: If we ended up with a typo for a type name or // Objective-C class name, we're in trouble because the parser // is in the wrong place to recover. Suggest the typo // correction, but don't make it a fix-it since we're not going // to recover well anyway. AcceptableWithoutRecovery = isa(UnderlyingND) || isa(UnderlyingND); } else { // FIXME: We found a keyword. Suggest it, but don't provide a fix-it // because we aren't able to recover. AcceptableWithoutRecovery = true; } if (AcceptableWithRecovery || AcceptableWithoutRecovery) { unsigned NoteID = Corrected.getCorrectionDeclAs() ? diag::note_implicit_param_decl : diag::note_previous_decl; if (SS.isEmpty()) diagnoseTypo(Corrected, PDiag(diagnostic_suggest) << Name, PDiag(NoteID), AcceptableWithRecovery); else diagnoseTypo(Corrected, PDiag(diag::err_no_member_suggest) << Name << computeDeclContext(SS, false) << DroppedSpecifier << SS.getRange(), PDiag(NoteID), AcceptableWithRecovery); // Tell the callee whether to try to recover. return !AcceptableWithRecovery; } } R.clear(); // Emit a special diagnostic for failed member lookups. // FIXME: computing the declaration context might fail here (?) if (!SS.isEmpty()) { Diag(R.getNameLoc(), diag::err_no_member) << Name << computeDeclContext(SS, false) << SS.getRange(); return true; } // Give up, we can't recover. Diag(R.getNameLoc(), diagnostic) << Name; return true; } /// In Microsoft mode, if we are inside a template class whose parent class has /// dependent base classes, and we can't resolve an unqualified identifier, then /// assume the identifier is a member of a dependent base class. We can only /// recover successfully in static methods, instance methods, and other contexts /// where 'this' is available. This doesn't precisely match MSVC's /// instantiation model, but it's close enough. static Expr * recoverFromMSUnqualifiedLookup(Sema &S, ASTContext &Context, DeclarationNameInfo &NameInfo, SourceLocation TemplateKWLoc, const TemplateArgumentListInfo *TemplateArgs) { // Only try to recover from lookup into dependent bases in static methods or // contexts where 'this' is available. QualType ThisType = S.getCurrentThisType(); const CXXRecordDecl *RD = nullptr; if (!ThisType.isNull()) RD = ThisType->getPointeeType()->getAsCXXRecordDecl(); else if (auto *MD = dyn_cast(S.CurContext)) RD = MD->getParent(); if (!RD || !RD->hasAnyDependentBases()) return nullptr; // Diagnose this as unqualified lookup into a dependent base class. If 'this' // is available, suggest inserting 'this->' as a fixit. SourceLocation Loc = NameInfo.getLoc(); auto DB = S.Diag(Loc, diag::ext_undeclared_unqual_id_with_dependent_base); DB << NameInfo.getName() << RD; if (!ThisType.isNull()) { DB << FixItHint::CreateInsertion(Loc, "this->"); return CXXDependentScopeMemberExpr::Create( Context, /*This=*/nullptr, ThisType, /*IsArrow=*/true, /*Op=*/SourceLocation(), NestedNameSpecifierLoc(), TemplateKWLoc, /*FirstQualifierInScope=*/nullptr, NameInfo, TemplateArgs); } // Synthesize a fake NNS that points to the derived class. This will // perform name lookup during template instantiation. CXXScopeSpec SS; auto *NNS = NestedNameSpecifier::Create(Context, nullptr, true, RD->getTypeForDecl()); SS.MakeTrivial(Context, NNS, SourceRange(Loc, Loc)); return DependentScopeDeclRefExpr::Create( Context, SS.getWithLocInContext(Context), TemplateKWLoc, NameInfo, TemplateArgs); } ExprResult Sema::ActOnIdExpression(Scope *S, CXXScopeSpec &SS, SourceLocation TemplateKWLoc, UnqualifiedId &Id, bool HasTrailingLParen, bool IsAddressOfOperand, std::unique_ptr CCC, bool IsInlineAsmIdentifier, Token *KeywordReplacement) { assert(!(IsAddressOfOperand && HasTrailingLParen) && "cannot be direct & operand and have a trailing lparen"); if (SS.isInvalid()) return ExprError(); TemplateArgumentListInfo TemplateArgsBuffer; // Decompose the UnqualifiedId into the following data. DeclarationNameInfo NameInfo; const TemplateArgumentListInfo *TemplateArgs; DecomposeUnqualifiedId(Id, TemplateArgsBuffer, NameInfo, TemplateArgs); DeclarationName Name = NameInfo.getName(); IdentifierInfo *II = Name.getAsIdentifierInfo(); SourceLocation NameLoc = NameInfo.getLoc(); if (II && II->isEditorPlaceholder()) { // FIXME: When typed placeholders are supported we can create a typed // placeholder expression node. return ExprError(); } // C++ [temp.dep.expr]p3: // An id-expression is type-dependent if it contains: // -- an identifier that was declared with a dependent type, // (note: handled after lookup) // -- a template-id that is dependent, // (note: handled in BuildTemplateIdExpr) // -- a conversion-function-id that specifies a dependent type, // -- a nested-name-specifier that contains a class-name that // names a dependent type. // Determine whether this is a member of an unknown specialization; // we need to handle these differently. bool DependentID = false; if (Name.getNameKind() == DeclarationName::CXXConversionFunctionName && Name.getCXXNameType()->isDependentType()) { DependentID = true; } else if (SS.isSet()) { if (DeclContext *DC = computeDeclContext(SS, false)) { if (RequireCompleteDeclContext(SS, DC)) return ExprError(); } else { DependentID = true; } } if (DependentID) return ActOnDependentIdExpression(SS, TemplateKWLoc, NameInfo, IsAddressOfOperand, TemplateArgs); // Perform the required lookup. LookupResult R(*this, NameInfo, (Id.getKind() == UnqualifiedIdKind::IK_ImplicitSelfParam) ? LookupObjCImplicitSelfParam : LookupOrdinaryName); if (TemplateArgs) { // Lookup the template name again to correctly establish the context in // which it was found. This is really unfortunate as we already did the // lookup to determine that it was a template name in the first place. If // this becomes a performance hit, we can work harder to preserve those // results until we get here but it's likely not worth it. bool MemberOfUnknownSpecialization; LookupTemplateName(R, S, SS, QualType(), /*EnteringContext=*/false, MemberOfUnknownSpecialization); if (MemberOfUnknownSpecialization || (R.getResultKind() == LookupResult::NotFoundInCurrentInstantiation)) return ActOnDependentIdExpression(SS, TemplateKWLoc, NameInfo, IsAddressOfOperand, TemplateArgs); } else { bool IvarLookupFollowUp = II && !SS.isSet() && getCurMethodDecl(); LookupParsedName(R, S, &SS, !IvarLookupFollowUp); // If the result might be in a dependent base class, this is a dependent // id-expression. if (R.getResultKind() == LookupResult::NotFoundInCurrentInstantiation) return ActOnDependentIdExpression(SS, TemplateKWLoc, NameInfo, IsAddressOfOperand, TemplateArgs); // If this reference is in an Objective-C method, then we need to do // some special Objective-C lookup, too. if (IvarLookupFollowUp) { ExprResult E(LookupInObjCMethod(R, S, II, true)); if (E.isInvalid()) return ExprError(); if (Expr *Ex = E.getAs()) return Ex; } } if (R.isAmbiguous()) return ExprError(); // This could be an implicitly declared function reference (legal in C90, // extension in C99, forbidden in C++). if (R.empty() && HasTrailingLParen && II && !getLangOpts().CPlusPlus) { NamedDecl *D = ImplicitlyDefineFunction(NameLoc, *II, S); if (D) R.addDecl(D); } // Determine whether this name might be a candidate for // argument-dependent lookup. bool ADL = UseArgumentDependentLookup(SS, R, HasTrailingLParen); if (R.empty() && !ADL) { if (SS.isEmpty() && getLangOpts().MSVCCompat) { if (Expr *E = recoverFromMSUnqualifiedLookup(*this, Context, NameInfo, TemplateKWLoc, TemplateArgs)) return E; } // Don't diagnose an empty lookup for inline assembly. if (IsInlineAsmIdentifier) return ExprError(); // If this name wasn't predeclared and if this is not a function // call, diagnose the problem. TypoExpr *TE = nullptr; auto DefaultValidator = llvm::make_unique( II, SS.isValid() ? SS.getScopeRep() : nullptr); DefaultValidator->IsAddressOfOperand = IsAddressOfOperand; assert((!CCC || CCC->IsAddressOfOperand == IsAddressOfOperand) && "Typo correction callback misconfigured"); if (CCC) { // Make sure the callback knows what the typo being diagnosed is. CCC->setTypoName(II); if (SS.isValid()) CCC->setTypoNNS(SS.getScopeRep()); } if (DiagnoseEmptyLookup(S, SS, R, CCC ? std::move(CCC) : std::move(DefaultValidator), nullptr, None, &TE)) { if (TE && KeywordReplacement) { auto &State = getTypoExprState(TE); auto BestTC = State.Consumer->getNextCorrection(); if (BestTC.isKeyword()) { auto *II = BestTC.getCorrectionAsIdentifierInfo(); if (State.DiagHandler) State.DiagHandler(BestTC); KeywordReplacement->startToken(); KeywordReplacement->setKind(II->getTokenID()); KeywordReplacement->setIdentifierInfo(II); KeywordReplacement->setLocation(BestTC.getCorrectionRange().getBegin()); // Clean up the state associated with the TypoExpr, since it has // now been diagnosed (without a call to CorrectDelayedTyposInExpr). clearDelayedTypo(TE); // Signal that a correction to a keyword was performed by returning a // valid-but-null ExprResult. return (Expr*)nullptr; } State.Consumer->resetCorrectionStream(); } return TE ? TE : ExprError(); } assert(!R.empty() && "DiagnoseEmptyLookup returned false but added no results"); // If we found an Objective-C instance variable, let // LookupInObjCMethod build the appropriate expression to // reference the ivar. if (ObjCIvarDecl *Ivar = R.getAsSingle()) { R.clear(); ExprResult E(LookupInObjCMethod(R, S, Ivar->getIdentifier())); // In a hopelessly buggy code, Objective-C instance variable // lookup fails and no expression will be built to reference it. if (!E.isInvalid() && !E.get()) return ExprError(); return E; } } // This is guaranteed from this point on. assert(!R.empty() || ADL); // Check whether this might be a C++ implicit instance member access. // C++ [class.mfct.non-static]p3: // When an id-expression that is not part of a class member access // syntax and not used to form a pointer to member is used in the // body of a non-static member function of class X, if name lookup // resolves the name in the id-expression to a non-static non-type // member of some class C, the id-expression is transformed into a // class member access expression using (*this) as the // postfix-expression to the left of the . operator. // // But we don't actually need to do this for '&' operands if R // resolved to a function or overloaded function set, because the // expression is ill-formed if it actually works out to be a // non-static member function: // // C++ [expr.ref]p4: // Otherwise, if E1.E2 refers to a non-static member function. . . // [t]he expression can be used only as the left-hand operand of a // member function call. // // There are other safeguards against such uses, but it's important // to get this right here so that we don't end up making a // spuriously dependent expression if we're inside a dependent // instance method. if (!R.empty() && (*R.begin())->isCXXClassMember()) { bool MightBeImplicitMember; if (!IsAddressOfOperand) MightBeImplicitMember = true; else if (!SS.isEmpty()) MightBeImplicitMember = false; else if (R.isOverloadedResult()) MightBeImplicitMember = false; else if (R.isUnresolvableResult()) MightBeImplicitMember = true; else MightBeImplicitMember = isa(R.getFoundDecl()) || isa(R.getFoundDecl()) || isa(R.getFoundDecl()); if (MightBeImplicitMember) return BuildPossibleImplicitMemberExpr(SS, TemplateKWLoc, R, TemplateArgs, S); } if (TemplateArgs || TemplateKWLoc.isValid()) { // In C++1y, if this is a variable template id, then check it // in BuildTemplateIdExpr(). // The single lookup result must be a variable template declaration. if (Id.getKind() == UnqualifiedIdKind::IK_TemplateId && Id.TemplateId && Id.TemplateId->Kind == TNK_Var_template) { assert(R.getAsSingle() && "There should only be one declaration found."); } return BuildTemplateIdExpr(SS, TemplateKWLoc, R, ADL, TemplateArgs); } return BuildDeclarationNameExpr(SS, R, ADL); } /// BuildQualifiedDeclarationNameExpr - Build a C++ qualified /// declaration name, generally during template instantiation. /// There's a large number of things which don't need to be done along /// this path. ExprResult Sema::BuildQualifiedDeclarationNameExpr( CXXScopeSpec &SS, const DeclarationNameInfo &NameInfo, bool IsAddressOfOperand, const Scope *S, TypeSourceInfo **RecoveryTSI) { DeclContext *DC = computeDeclContext(SS, false); if (!DC) return BuildDependentDeclRefExpr(SS, /*TemplateKWLoc=*/SourceLocation(), NameInfo, /*TemplateArgs=*/nullptr); if (RequireCompleteDeclContext(SS, DC)) return ExprError(); LookupResult R(*this, NameInfo, LookupOrdinaryName); LookupQualifiedName(R, DC); if (R.isAmbiguous()) return ExprError(); if (R.getResultKind() == LookupResult::NotFoundInCurrentInstantiation) return BuildDependentDeclRefExpr(SS, /*TemplateKWLoc=*/SourceLocation(), NameInfo, /*TemplateArgs=*/nullptr); if (R.empty()) { Diag(NameInfo.getLoc(), diag::err_no_member) << NameInfo.getName() << DC << SS.getRange(); return ExprError(); } if (const TypeDecl *TD = R.getAsSingle()) { // Diagnose a missing typename if this resolved unambiguously to a type in // a dependent context. If we can recover with a type, downgrade this to // a warning in Microsoft compatibility mode. unsigned DiagID = diag::err_typename_missing; if (RecoveryTSI && getLangOpts().MSVCCompat) DiagID = diag::ext_typename_missing; SourceLocation Loc = SS.getBeginLoc(); auto D = Diag(Loc, DiagID); D << SS.getScopeRep() << NameInfo.getName().getAsString() << SourceRange(Loc, NameInfo.getEndLoc()); // Don't recover if the caller isn't expecting us to or if we're in a SFINAE // context. if (!RecoveryTSI) return ExprError(); // Only issue the fixit if we're prepared to recover. D << FixItHint::CreateInsertion(Loc, "typename "); // Recover by pretending this was an elaborated type. QualType Ty = Context.getTypeDeclType(TD); TypeLocBuilder TLB; TLB.pushTypeSpec(Ty).setNameLoc(NameInfo.getLoc()); QualType ET = getElaboratedType(ETK_None, SS, Ty); ElaboratedTypeLoc QTL = TLB.push(ET); QTL.setElaboratedKeywordLoc(SourceLocation()); QTL.setQualifierLoc(SS.getWithLocInContext(Context)); *RecoveryTSI = TLB.getTypeSourceInfo(Context, ET); return ExprEmpty(); } // Defend against this resolving to an implicit member access. We usually // won't get here if this might be a legitimate a class member (we end up in // BuildMemberReferenceExpr instead), but this can be valid if we're forming // a pointer-to-member or in an unevaluated context in C++11. if (!R.empty() && (*R.begin())->isCXXClassMember() && !IsAddressOfOperand) return BuildPossibleImplicitMemberExpr(SS, /*TemplateKWLoc=*/SourceLocation(), R, /*TemplateArgs=*/nullptr, S); return BuildDeclarationNameExpr(SS, R, /* ADL */ false); } /// LookupInObjCMethod - The parser has read a name in, and Sema has /// detected that we're currently inside an ObjC method. Perform some /// additional lookup. /// /// Ideally, most of this would be done by lookup, but there's /// actually quite a lot of extra work involved. /// /// Returns a null sentinel to indicate trivial success. ExprResult Sema::LookupInObjCMethod(LookupResult &Lookup, Scope *S, IdentifierInfo *II, bool AllowBuiltinCreation) { SourceLocation Loc = Lookup.getNameLoc(); ObjCMethodDecl *CurMethod = getCurMethodDecl(); // Check for error condition which is already reported. if (!CurMethod) return ExprError(); // There are two cases to handle here. 1) scoped lookup could have failed, // in which case we should look for an ivar. 2) scoped lookup could have // found a decl, but that decl is outside the current instance method (i.e. // a global variable). In these two cases, we do a lookup for an ivar with // this name, if the lookup sucedes, we replace it our current decl. // If we're in a class method, we don't normally want to look for // ivars. But if we don't find anything else, and there's an // ivar, that's an error. bool IsClassMethod = CurMethod->isClassMethod(); bool LookForIvars; if (Lookup.empty()) LookForIvars = true; else if (IsClassMethod) LookForIvars = false; else LookForIvars = (Lookup.isSingleResult() && Lookup.getFoundDecl()->isDefinedOutsideFunctionOrMethod()); ObjCInterfaceDecl *IFace = nullptr; if (LookForIvars) { IFace = CurMethod->getClassInterface(); ObjCInterfaceDecl *ClassDeclared; ObjCIvarDecl *IV = nullptr; if (IFace && (IV = IFace->lookupInstanceVariable(II, ClassDeclared))) { // Diagnose using an ivar in a class method. if (IsClassMethod) return ExprError(Diag(Loc, diag::err_ivar_use_in_class_method) << IV->getDeclName()); // If we're referencing an invalid decl, just return this as a silent // error node. The error diagnostic was already emitted on the decl. if (IV->isInvalidDecl()) return ExprError(); // Check if referencing a field with __attribute__((deprecated)). if (DiagnoseUseOfDecl(IV, Loc)) return ExprError(); // Diagnose the use of an ivar outside of the declaring class. if (IV->getAccessControl() == ObjCIvarDecl::Private && !declaresSameEntity(ClassDeclared, IFace) && !getLangOpts().DebuggerSupport) Diag(Loc, diag::err_private_ivar_access) << IV->getDeclName(); // FIXME: This should use a new expr for a direct reference, don't // turn this into Self->ivar, just return a BareIVarExpr or something. IdentifierInfo &II = Context.Idents.get("self"); UnqualifiedId SelfName; SelfName.setIdentifier(&II, SourceLocation()); SelfName.setKind(UnqualifiedIdKind::IK_ImplicitSelfParam); CXXScopeSpec SelfScopeSpec; SourceLocation TemplateKWLoc; ExprResult SelfExpr = ActOnIdExpression(S, SelfScopeSpec, TemplateKWLoc, SelfName, false, false); if (SelfExpr.isInvalid()) return ExprError(); SelfExpr = DefaultLvalueConversion(SelfExpr.get()); if (SelfExpr.isInvalid()) return ExprError(); MarkAnyDeclReferenced(Loc, IV, true); ObjCMethodFamily MF = CurMethod->getMethodFamily(); if (MF != OMF_init && MF != OMF_dealloc && MF != OMF_finalize && !IvarBacksCurrentMethodAccessor(IFace, CurMethod, IV)) Diag(Loc, diag::warn_direct_ivar_access) << IV->getDeclName(); ObjCIvarRefExpr *Result = new (Context) ObjCIvarRefExpr(IV, IV->getUsageType(SelfExpr.get()->getType()), Loc, IV->getLocation(), SelfExpr.get(), true, true); if (IV->getType().getObjCLifetime() == Qualifiers::OCL_Weak) { if (!Diags.isIgnored(diag::warn_arc_repeated_use_of_weak, Loc)) recordUseOfEvaluatedWeak(Result); } if (getLangOpts().ObjCAutoRefCount) { if (CurContext->isClosure()) Diag(Loc, diag::warn_implicitly_retains_self) << FixItHint::CreateInsertion(Loc, "self->"); } return Result; } } else if (CurMethod->isInstanceMethod()) { // We should warn if a local variable hides an ivar. if (ObjCInterfaceDecl *IFace = CurMethod->getClassInterface()) { ObjCInterfaceDecl *ClassDeclared; if (ObjCIvarDecl *IV = IFace->lookupInstanceVariable(II, ClassDeclared)) { if (IV->getAccessControl() != ObjCIvarDecl::Private || declaresSameEntity(IFace, ClassDeclared)) Diag(Loc, diag::warn_ivar_use_hidden) << IV->getDeclName(); } } } else if (Lookup.isSingleResult() && Lookup.getFoundDecl()->isDefinedOutsideFunctionOrMethod()) { // If accessing a stand-alone ivar in a class method, this is an error. if (const ObjCIvarDecl *IV = dyn_cast(Lookup.getFoundDecl())) return ExprError(Diag(Loc, diag::err_ivar_use_in_class_method) << IV->getDeclName()); } if (Lookup.empty() && II && AllowBuiltinCreation) { // FIXME. Consolidate this with similar code in LookupName. if (unsigned BuiltinID = II->getBuiltinID()) { if (!(getLangOpts().CPlusPlus && Context.BuiltinInfo.isPredefinedLibFunction(BuiltinID))) { NamedDecl *D = LazilyCreateBuiltin((IdentifierInfo *)II, BuiltinID, S, Lookup.isForRedeclaration(), Lookup.getNameLoc()); if (D) Lookup.addDecl(D); } } } // Sentinel value saying that we didn't do anything special. return ExprResult((Expr *)nullptr); } /// \brief Cast a base object to a member's actual type. /// /// Logically this happens in three phases: /// /// * First we cast from the base type to the naming class. /// The naming class is the class into which we were looking /// when we found the member; it's the qualifier type if a /// qualifier was provided, and otherwise it's the base type. /// /// * Next we cast from the naming class to the declaring class. /// If the member we found was brought into a class's scope by /// a using declaration, this is that class; otherwise it's /// the class declaring the member. /// /// * Finally we cast from the declaring class to the "true" /// declaring class of the member. This conversion does not /// obey access control. ExprResult Sema::PerformObjectMemberConversion(Expr *From, NestedNameSpecifier *Qualifier, NamedDecl *FoundDecl, NamedDecl *Member) { CXXRecordDecl *RD = dyn_cast(Member->getDeclContext()); if (!RD) return From; QualType DestRecordType; QualType DestType; QualType FromRecordType; QualType FromType = From->getType(); bool PointerConversions = false; if (isa(Member)) { DestRecordType = Context.getCanonicalType(Context.getTypeDeclType(RD)); if (FromType->getAs()) { DestType = Context.getPointerType(DestRecordType); FromRecordType = FromType->getPointeeType(); PointerConversions = true; } else { DestType = DestRecordType; FromRecordType = FromType; } } else if (CXXMethodDecl *Method = dyn_cast(Member)) { if (Method->isStatic()) return From; DestType = Method->getThisType(Context); DestRecordType = DestType->getPointeeType(); if (FromType->getAs()) { FromRecordType = FromType->getPointeeType(); PointerConversions = true; } else { FromRecordType = FromType; DestType = DestRecordType; } } else { // No conversion necessary. return From; } if (DestType->isDependentType() || FromType->isDependentType()) return From; // If the unqualified types are the same, no conversion is necessary. if (Context.hasSameUnqualifiedType(FromRecordType, DestRecordType)) return From; SourceRange FromRange = From->getSourceRange(); SourceLocation FromLoc = FromRange.getBegin(); ExprValueKind VK = From->getValueKind(); // C++ [class.member.lookup]p8: // [...] Ambiguities can often be resolved by qualifying a name with its // class name. // // If the member was a qualified name and the qualified referred to a // specific base subobject type, we'll cast to that intermediate type // first and then to the object in which the member is declared. That allows // one to resolve ambiguities in, e.g., a diamond-shaped hierarchy such as: // // class Base { public: int x; }; // class Derived1 : public Base { }; // class Derived2 : public Base { }; // class VeryDerived : public Derived1, public Derived2 { void f(); }; // // void VeryDerived::f() { // x = 17; // error: ambiguous base subobjects // Derived1::x = 17; // okay, pick the Base subobject of Derived1 // } if (Qualifier && Qualifier->getAsType()) { QualType QType = QualType(Qualifier->getAsType(), 0); assert(QType->isRecordType() && "lookup done with non-record type"); QualType QRecordType = QualType(QType->getAs(), 0); // In C++98, the qualifier type doesn't actually have to be a base // type of the object type, in which case we just ignore it. // Otherwise build the appropriate casts. if (IsDerivedFrom(FromLoc, FromRecordType, QRecordType)) { CXXCastPath BasePath; if (CheckDerivedToBaseConversion(FromRecordType, QRecordType, FromLoc, FromRange, &BasePath)) return ExprError(); if (PointerConversions) QType = Context.getPointerType(QType); From = ImpCastExprToType(From, QType, CK_UncheckedDerivedToBase, VK, &BasePath).get(); FromType = QType; FromRecordType = QRecordType; // If the qualifier type was the same as the destination type, // we're done. if (Context.hasSameUnqualifiedType(FromRecordType, DestRecordType)) return From; } } bool IgnoreAccess = false; // If we actually found the member through a using declaration, cast // down to the using declaration's type. // // Pointer equality is fine here because only one declaration of a // class ever has member declarations. if (FoundDecl->getDeclContext() != Member->getDeclContext()) { assert(isa(FoundDecl)); QualType URecordType = Context.getTypeDeclType( cast(FoundDecl->getDeclContext())); // We only need to do this if the naming-class to declaring-class // conversion is non-trivial. if (!Context.hasSameUnqualifiedType(FromRecordType, URecordType)) { assert(IsDerivedFrom(FromLoc, FromRecordType, URecordType)); CXXCastPath BasePath; if (CheckDerivedToBaseConversion(FromRecordType, URecordType, FromLoc, FromRange, &BasePath)) return ExprError(); QualType UType = URecordType; if (PointerConversions) UType = Context.getPointerType(UType); From = ImpCastExprToType(From, UType, CK_UncheckedDerivedToBase, VK, &BasePath).get(); FromType = UType; FromRecordType = URecordType; } // We don't do access control for the conversion from the // declaring class to the true declaring class. IgnoreAccess = true; } CXXCastPath BasePath; if (CheckDerivedToBaseConversion(FromRecordType, DestRecordType, FromLoc, FromRange, &BasePath, IgnoreAccess)) return ExprError(); return ImpCastExprToType(From, DestType, CK_UncheckedDerivedToBase, VK, &BasePath); } bool Sema::UseArgumentDependentLookup(const CXXScopeSpec &SS, const LookupResult &R, bool HasTrailingLParen) { // Only when used directly as the postfix-expression of a call. if (!HasTrailingLParen) return false; // Never if a scope specifier was provided. if (SS.isSet()) return false; // Only in C++ or ObjC++. if (!getLangOpts().CPlusPlus) return false; // Turn off ADL when we find certain kinds of declarations during // normal lookup: for (NamedDecl *D : R) { // C++0x [basic.lookup.argdep]p3: // -- a declaration of a class member // Since using decls preserve this property, we check this on the // original decl. if (D->isCXXClassMember()) return false; // C++0x [basic.lookup.argdep]p3: // -- a block-scope function declaration that is not a // using-declaration // NOTE: we also trigger this for function templates (in fact, we // don't check the decl type at all, since all other decl types // turn off ADL anyway). if (isa(D)) D = cast(D)->getTargetDecl(); else if (D->getLexicalDeclContext()->isFunctionOrMethod()) return false; // C++0x [basic.lookup.argdep]p3: // -- a declaration that is neither a function or a function // template // And also for builtin functions. if (isa(D)) { FunctionDecl *FDecl = cast(D); // But also builtin functions. if (FDecl->getBuiltinID() && FDecl->isImplicit()) return false; } else if (!isa(D)) return false; } return true; } /// Diagnoses obvious problems with the use of the given declaration /// as an expression. This is only actually called for lookups that /// were not overloaded, and it doesn't promise that the declaration /// will in fact be used. static bool CheckDeclInExpr(Sema &S, SourceLocation Loc, NamedDecl *D) { if (D->isInvalidDecl()) return true; if (isa(D)) { S.Diag(Loc, diag::err_unexpected_typedef) << D->getDeclName(); return true; } if (isa(D)) { S.Diag(Loc, diag::err_unexpected_interface) << D->getDeclName(); return true; } if (isa(D)) { S.Diag(Loc, diag::err_unexpected_namespace) << D->getDeclName(); return true; } return false; } ExprResult Sema::BuildDeclarationNameExpr(const CXXScopeSpec &SS, LookupResult &R, bool NeedsADL, bool AcceptInvalidDecl) { // If this is a single, fully-resolved result and we don't need ADL, // just build an ordinary singleton decl ref. if (!NeedsADL && R.isSingleResult() && !R.getAsSingle()) return BuildDeclarationNameExpr(SS, R.getLookupNameInfo(), R.getFoundDecl(), R.getRepresentativeDecl(), nullptr, AcceptInvalidDecl); // We only need to check the declaration if there's exactly one // result, because in the overloaded case the results can only be // functions and function templates. if (R.isSingleResult() && CheckDeclInExpr(*this, R.getNameLoc(), R.getFoundDecl())) return ExprError(); // Otherwise, just build an unresolved lookup expression. Suppress // any lookup-related diagnostics; we'll hash these out later, when // we've picked a target. R.suppressDiagnostics(); UnresolvedLookupExpr *ULE = UnresolvedLookupExpr::Create(Context, R.getNamingClass(), SS.getWithLocInContext(Context), R.getLookupNameInfo(), NeedsADL, R.isOverloadedResult(), R.begin(), R.end()); return ULE; } static void diagnoseUncapturableValueReference(Sema &S, SourceLocation loc, ValueDecl *var, DeclContext *DC); /// \brief Complete semantic analysis for a reference to the given declaration. ExprResult Sema::BuildDeclarationNameExpr( const CXXScopeSpec &SS, const DeclarationNameInfo &NameInfo, NamedDecl *D, NamedDecl *FoundD, const TemplateArgumentListInfo *TemplateArgs, bool AcceptInvalidDecl) { assert(D && "Cannot refer to a NULL declaration"); assert(!isa(D) && "Cannot refer unambiguously to a function template"); SourceLocation Loc = NameInfo.getLoc(); if (CheckDeclInExpr(*this, Loc, D)) return ExprError(); if (TemplateDecl *Template = dyn_cast(D)) { // Specifically diagnose references to class templates that are missing // a template argument list. Diag(Loc, diag::err_template_decl_ref) << (isa(D) ? 1 : 0) << Template << SS.getRange(); Diag(Template->getLocation(), diag::note_template_decl_here); return ExprError(); } // Make sure that we're referring to a value. ValueDecl *VD = dyn_cast(D); if (!VD) { Diag(Loc, diag::err_ref_non_value) << D << SS.getRange(); Diag(D->getLocation(), diag::note_declared_at); return ExprError(); } // Check whether this declaration can be used. Note that we suppress // this check when we're going to perform argument-dependent lookup // on this function name, because this might not be the function // that overload resolution actually selects. if (DiagnoseUseOfDecl(VD, Loc)) return ExprError(); // Only create DeclRefExpr's for valid Decl's. if (VD->isInvalidDecl() && !AcceptInvalidDecl) return ExprError(); // Handle members of anonymous structs and unions. If we got here, // and the reference is to a class member indirect field, then this // must be the subject of a pointer-to-member expression. if (IndirectFieldDecl *indirectField = dyn_cast(VD)) if (!indirectField->isCXXClassMember()) return BuildAnonymousStructUnionMemberReference(SS, NameInfo.getLoc(), indirectField); { QualType type = VD->getType(); if (type.isNull()) return ExprError(); if (auto *FPT = type->getAs()) { // C++ [except.spec]p17: // An exception-specification is considered to be needed when: // - in an expression, the function is the unique lookup result or // the selected member of a set of overloaded functions. ResolveExceptionSpec(Loc, FPT); type = VD->getType(); } ExprValueKind valueKind = VK_RValue; switch (D->getKind()) { // Ignore all the non-ValueDecl kinds. #define ABSTRACT_DECL(kind) #define VALUE(type, base) #define DECL(type, base) \ case Decl::type: #include "clang/AST/DeclNodes.inc" llvm_unreachable("invalid value decl kind"); // These shouldn't make it here. case Decl::ObjCAtDefsField: case Decl::ObjCIvar: llvm_unreachable("forming non-member reference to ivar?"); // Enum constants are always r-values and never references. // Unresolved using declarations are dependent. case Decl::EnumConstant: case Decl::UnresolvedUsingValue: case Decl::OMPDeclareReduction: valueKind = VK_RValue; break; // Fields and indirect fields that got here must be for // pointer-to-member expressions; we just call them l-values for // internal consistency, because this subexpression doesn't really // exist in the high-level semantics. case Decl::Field: case Decl::IndirectField: assert(getLangOpts().CPlusPlus && "building reference to field in C?"); // These can't have reference type in well-formed programs, but // for internal consistency we do this anyway. type = type.getNonReferenceType(); valueKind = VK_LValue; break; // Non-type template parameters are either l-values or r-values // depending on the type. case Decl::NonTypeTemplateParm: { if (const ReferenceType *reftype = type->getAs()) { type = reftype->getPointeeType(); valueKind = VK_LValue; // even if the parameter is an r-value reference break; } // For non-references, we need to strip qualifiers just in case // the template parameter was declared as 'const int' or whatever. valueKind = VK_RValue; type = type.getUnqualifiedType(); break; } case Decl::Var: case Decl::VarTemplateSpecialization: case Decl::VarTemplatePartialSpecialization: case Decl::Decomposition: case Decl::OMPCapturedExpr: // In C, "extern void blah;" is valid and is an r-value. if (!getLangOpts().CPlusPlus && !type.hasQualifiers() && type->isVoidType()) { valueKind = VK_RValue; break; } LLVM_FALLTHROUGH; case Decl::ImplicitParam: case Decl::ParmVar: { // These are always l-values. valueKind = VK_LValue; type = type.getNonReferenceType(); // FIXME: Does the addition of const really only apply in // potentially-evaluated contexts? Since the variable isn't actually // captured in an unevaluated context, it seems that the answer is no. if (!isUnevaluatedContext()) { QualType CapturedType = getCapturedDeclRefType(cast(VD), Loc); if (!CapturedType.isNull()) type = CapturedType; } break; } case Decl::Binding: { // These are always lvalues. valueKind = VK_LValue; type = type.getNonReferenceType(); // FIXME: Support lambda-capture of BindingDecls, once CWG actually // decides how that's supposed to work. auto *BD = cast(VD); if (BD->getDeclContext()->isFunctionOrMethod() && BD->getDeclContext() != CurContext) diagnoseUncapturableValueReference(*this, Loc, BD, CurContext); break; } case Decl::Function: { if (unsigned BID = cast(VD)->getBuiltinID()) { if (!Context.BuiltinInfo.isPredefinedLibFunction(BID)) { type = Context.BuiltinFnTy; valueKind = VK_RValue; break; } } const FunctionType *fty = type->castAs(); // If we're referring to a function with an __unknown_anytype // result type, make the entire expression __unknown_anytype. if (fty->getReturnType() == Context.UnknownAnyTy) { type = Context.UnknownAnyTy; valueKind = VK_RValue; break; } // Functions are l-values in C++. if (getLangOpts().CPlusPlus) { valueKind = VK_LValue; break; } // C99 DR 316 says that, if a function type comes from a // function definition (without a prototype), that type is only // used for checking compatibility. Therefore, when referencing // the function, we pretend that we don't have the full function // type. if (!cast(VD)->hasPrototype() && isa(fty)) type = Context.getFunctionNoProtoType(fty->getReturnType(), fty->getExtInfo()); // Functions are r-values in C. valueKind = VK_RValue; break; } case Decl::CXXDeductionGuide: llvm_unreachable("building reference to deduction guide"); case Decl::MSProperty: valueKind = VK_LValue; break; case Decl::CXXMethod: // If we're referring to a method with an __unknown_anytype // result type, make the entire expression __unknown_anytype. // This should only be possible with a type written directly. if (const FunctionProtoType *proto = dyn_cast(VD->getType())) if (proto->getReturnType() == Context.UnknownAnyTy) { type = Context.UnknownAnyTy; valueKind = VK_RValue; break; } // C++ methods are l-values if static, r-values if non-static. if (cast(VD)->isStatic()) { valueKind = VK_LValue; break; } LLVM_FALLTHROUGH; case Decl::CXXConversion: case Decl::CXXDestructor: case Decl::CXXConstructor: valueKind = VK_RValue; break; } return BuildDeclRefExpr(VD, type, valueKind, NameInfo, &SS, FoundD, TemplateArgs); } } static void ConvertUTF8ToWideString(unsigned CharByteWidth, StringRef Source, SmallString<32> &Target) { Target.resize(CharByteWidth * (Source.size() + 1)); char *ResultPtr = &Target[0]; const llvm::UTF8 *ErrorPtr; bool success = llvm::ConvertUTF8toWide(CharByteWidth, Source, ResultPtr, ErrorPtr); (void)success; assert(success); Target.resize(ResultPtr - &Target[0]); } ExprResult Sema::BuildPredefinedExpr(SourceLocation Loc, PredefinedExpr::IdentType IT) { // Pick the current block, lambda, captured statement or function. Decl *currentDecl = nullptr; if (const BlockScopeInfo *BSI = getCurBlock()) currentDecl = BSI->TheDecl; else if (const LambdaScopeInfo *LSI = getCurLambda()) currentDecl = LSI->CallOperator; else if (const CapturedRegionScopeInfo *CSI = getCurCapturedRegion()) currentDecl = CSI->TheCapturedDecl; else currentDecl = getCurFunctionOrMethodDecl(); if (!currentDecl) { Diag(Loc, diag::ext_predef_outside_function); currentDecl = Context.getTranslationUnitDecl(); } QualType ResTy; StringLiteral *SL = nullptr; if (cast(currentDecl)->isDependentContext()) ResTy = Context.DependentTy; else { // Pre-defined identifiers are of type char[x], where x is the length of // the string. auto Str = PredefinedExpr::ComputeName(IT, currentDecl); unsigned Length = Str.length(); llvm::APInt LengthI(32, Length + 1); if (IT == PredefinedExpr::LFunction) { ResTy = Context.WideCharTy.withConst(); SmallString<32> RawChars; ConvertUTF8ToWideString(Context.getTypeSizeInChars(ResTy).getQuantity(), Str, RawChars); ResTy = Context.getConstantArrayType(ResTy, LengthI, ArrayType::Normal, /*IndexTypeQuals*/ 0); SL = StringLiteral::Create(Context, RawChars, StringLiteral::Wide, /*Pascal*/ false, ResTy, Loc); } else { ResTy = Context.CharTy.withConst(); ResTy = Context.getConstantArrayType(ResTy, LengthI, ArrayType::Normal, /*IndexTypeQuals*/ 0); SL = StringLiteral::Create(Context, Str, StringLiteral::Ascii, /*Pascal*/ false, ResTy, Loc); } } return new (Context) PredefinedExpr(Loc, ResTy, IT, SL); } ExprResult Sema::ActOnPredefinedExpr(SourceLocation Loc, tok::TokenKind Kind) { PredefinedExpr::IdentType IT; switch (Kind) { default: llvm_unreachable("Unknown simple primary expr!"); case tok::kw___func__: IT = PredefinedExpr::Func; break; // [C99 6.4.2.2] case tok::kw___FUNCTION__: IT = PredefinedExpr::Function; break; case tok::kw___FUNCDNAME__: IT = PredefinedExpr::FuncDName; break; // [MS] case tok::kw___FUNCSIG__: IT = PredefinedExpr::FuncSig; break; // [MS] case tok::kw_L__FUNCTION__: IT = PredefinedExpr::LFunction; break; case tok::kw___PRETTY_FUNCTION__: IT = PredefinedExpr::PrettyFunction; break; } return BuildPredefinedExpr(Loc, IT); } ExprResult Sema::ActOnCharacterConstant(const Token &Tok, Scope *UDLScope) { SmallString<16> CharBuffer; bool Invalid = false; StringRef ThisTok = PP.getSpelling(Tok, CharBuffer, &Invalid); if (Invalid) return ExprError(); CharLiteralParser Literal(ThisTok.begin(), ThisTok.end(), Tok.getLocation(), PP, Tok.getKind()); if (Literal.hadError()) return ExprError(); QualType Ty; if (Literal.isWide()) Ty = Context.WideCharTy; // L'x' -> wchar_t in C and C++. else if (Literal.isUTF16()) Ty = Context.Char16Ty; // u'x' -> char16_t in C11 and C++11. else if (Literal.isUTF32()) Ty = Context.Char32Ty; // U'x' -> char32_t in C11 and C++11. else if (!getLangOpts().CPlusPlus || Literal.isMultiChar()) Ty = Context.IntTy; // 'x' -> int in C, 'wxyz' -> int in C++. else Ty = Context.CharTy; // 'x' -> char in C++ CharacterLiteral::CharacterKind Kind = CharacterLiteral::Ascii; if (Literal.isWide()) Kind = CharacterLiteral::Wide; else if (Literal.isUTF16()) Kind = CharacterLiteral::UTF16; else if (Literal.isUTF32()) Kind = CharacterLiteral::UTF32; else if (Literal.isUTF8()) Kind = CharacterLiteral::UTF8; Expr *Lit = new (Context) CharacterLiteral(Literal.getValue(), Kind, Ty, Tok.getLocation()); if (Literal.getUDSuffix().empty()) return Lit; // We're building a user-defined literal. IdentifierInfo *UDSuffix = &Context.Idents.get(Literal.getUDSuffix()); SourceLocation UDSuffixLoc = getUDSuffixLoc(*this, Tok.getLocation(), Literal.getUDSuffixOffset()); // Make sure we're allowed user-defined literals here. if (!UDLScope) return ExprError(Diag(UDSuffixLoc, diag::err_invalid_character_udl)); // C++11 [lex.ext]p6: The literal L is treated as a call of the form // operator "" X (ch) return BuildCookedLiteralOperatorCall(*this, UDLScope, UDSuffix, UDSuffixLoc, Lit, Tok.getLocation()); } ExprResult Sema::ActOnIntegerConstant(SourceLocation Loc, uint64_t Val) { unsigned IntSize = Context.getTargetInfo().getIntWidth(); return IntegerLiteral::Create(Context, llvm::APInt(IntSize, Val), Context.IntTy, Loc); } static Expr *BuildFloatingLiteral(Sema &S, NumericLiteralParser &Literal, QualType Ty, SourceLocation Loc) { const llvm::fltSemantics &Format = S.Context.getFloatTypeSemantics(Ty); using llvm::APFloat; APFloat Val(Format); APFloat::opStatus result = Literal.GetFloatValue(Val); // Overflow is always an error, but underflow is only an error if // we underflowed to zero (APFloat reports denormals as underflow). if ((result & APFloat::opOverflow) || ((result & APFloat::opUnderflow) && Val.isZero())) { unsigned diagnostic; SmallString<20> buffer; if (result & APFloat::opOverflow) { diagnostic = diag::warn_float_overflow; APFloat::getLargest(Format).toString(buffer); } else { diagnostic = diag::warn_float_underflow; APFloat::getSmallest(Format).toString(buffer); } S.Diag(Loc, diagnostic) << Ty << StringRef(buffer.data(), buffer.size()); } bool isExact = (result == APFloat::opOK); return FloatingLiteral::Create(S.Context, Val, isExact, Ty, Loc); } bool Sema::CheckLoopHintExpr(Expr *E, SourceLocation Loc) { assert(E && "Invalid expression"); if (E->isValueDependent()) return false; QualType QT = E->getType(); if (!QT->isIntegerType() || QT->isBooleanType() || QT->isCharType()) { Diag(E->getExprLoc(), diag::err_pragma_loop_invalid_argument_type) << QT; return true; } llvm::APSInt ValueAPS; ExprResult R = VerifyIntegerConstantExpression(E, &ValueAPS); if (R.isInvalid()) return true; bool ValueIsPositive = ValueAPS.isStrictlyPositive(); if (!ValueIsPositive || ValueAPS.getActiveBits() > 31) { Diag(E->getExprLoc(), diag::err_pragma_loop_invalid_argument_value) << ValueAPS.toString(10) << ValueIsPositive; return true; } return false; } ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) { // Fast path for a single digit (which is quite common). A single digit // cannot have a trigraph, escaped newline, radix prefix, or suffix. if (Tok.getLength() == 1) { const char Val = PP.getSpellingOfSingleCharacterNumericConstant(Tok); return ActOnIntegerConstant(Tok.getLocation(), Val-'0'); } SmallString<128> SpellingBuffer; // NumericLiteralParser wants to overread by one character. Add padding to // the buffer in case the token is copied to the buffer. If getSpelling() // returns a StringRef to the memory buffer, it should have a null char at // the EOF, so it is also safe. SpellingBuffer.resize(Tok.getLength() + 1); // Get the spelling of the token, which eliminates trigraphs, etc. bool Invalid = false; StringRef TokSpelling = PP.getSpelling(Tok, SpellingBuffer, &Invalid); if (Invalid) return ExprError(); NumericLiteralParser Literal(TokSpelling, Tok.getLocation(), PP); if (Literal.hadError) return ExprError(); if (Literal.hasUDSuffix()) { // We're building a user-defined literal. IdentifierInfo *UDSuffix = &Context.Idents.get(Literal.getUDSuffix()); SourceLocation UDSuffixLoc = getUDSuffixLoc(*this, Tok.getLocation(), Literal.getUDSuffixOffset()); // Make sure we're allowed user-defined literals here. if (!UDLScope) return ExprError(Diag(UDSuffixLoc, diag::err_invalid_numeric_udl)); QualType CookedTy; if (Literal.isFloatingLiteral()) { // C++11 [lex.ext]p4: If S contains a literal operator with parameter type // long double, the literal is treated as a call of the form // operator "" X (f L) CookedTy = Context.LongDoubleTy; } else { // C++11 [lex.ext]p3: If S contains a literal operator with parameter type // unsigned long long, the literal is treated as a call of the form // operator "" X (n ULL) CookedTy = Context.UnsignedLongLongTy; } DeclarationName OpName = Context.DeclarationNames.getCXXLiteralOperatorName(UDSuffix); DeclarationNameInfo OpNameInfo(OpName, UDSuffixLoc); OpNameInfo.setCXXLiteralOperatorNameLoc(UDSuffixLoc); SourceLocation TokLoc = Tok.getLocation(); // Perform literal operator lookup to determine if we're building a raw // literal or a cooked one. LookupResult R(*this, OpName, UDSuffixLoc, LookupOrdinaryName); switch (LookupLiteralOperator(UDLScope, R, CookedTy, /*AllowRaw*/ true, /*AllowTemplate*/ true, /*AllowStringTemplate*/ false, /*DiagnoseMissing*/ !Literal.isImaginary)) { case LOLR_ErrorNoDiagnostic: // Lookup failure for imaginary constants isn't fatal, there's still the // GNU extension producing _Complex types. break; case LOLR_Error: return ExprError(); case LOLR_Cooked: { Expr *Lit; if (Literal.isFloatingLiteral()) { Lit = BuildFloatingLiteral(*this, Literal, CookedTy, Tok.getLocation()); } else { llvm::APInt ResultVal(Context.getTargetInfo().getLongLongWidth(), 0); if (Literal.GetIntegerValue(ResultVal)) Diag(Tok.getLocation(), diag::err_integer_literal_too_large) << /* Unsigned */ 1; Lit = IntegerLiteral::Create(Context, ResultVal, CookedTy, Tok.getLocation()); } return BuildLiteralOperatorCall(R, OpNameInfo, Lit, TokLoc); } case LOLR_Raw: { // C++11 [lit.ext]p3, p4: If S contains a raw literal operator, the // literal is treated as a call of the form // operator "" X ("n") unsigned Length = Literal.getUDSuffixOffset(); QualType StrTy = Context.getConstantArrayType( Context.CharTy.withConst(), llvm::APInt(32, Length + 1), ArrayType::Normal, 0); Expr *Lit = StringLiteral::Create( Context, StringRef(TokSpelling.data(), Length), StringLiteral::Ascii, /*Pascal*/false, StrTy, &TokLoc, 1); return BuildLiteralOperatorCall(R, OpNameInfo, Lit, TokLoc); } case LOLR_Template: { // C++11 [lit.ext]p3, p4: Otherwise (S contains a literal operator // template), L is treated as a call fo the form // operator "" X <'c1', 'c2', ... 'ck'>() // where n is the source character sequence c1 c2 ... ck. TemplateArgumentListInfo ExplicitArgs; unsigned CharBits = Context.getIntWidth(Context.CharTy); bool CharIsUnsigned = Context.CharTy->isUnsignedIntegerType(); llvm::APSInt Value(CharBits, CharIsUnsigned); for (unsigned I = 0, N = Literal.getUDSuffixOffset(); I != N; ++I) { Value = TokSpelling[I]; TemplateArgument Arg(Context, Value, Context.CharTy); TemplateArgumentLocInfo ArgInfo; ExplicitArgs.addArgument(TemplateArgumentLoc(Arg, ArgInfo)); } return BuildLiteralOperatorCall(R, OpNameInfo, None, TokLoc, &ExplicitArgs); } case LOLR_StringTemplate: llvm_unreachable("unexpected literal operator lookup result"); } } Expr *Res; if (Literal.isFloatingLiteral()) { QualType Ty; if (Literal.isHalf){ if (getOpenCLOptions().isEnabled("cl_khr_fp16")) Ty = Context.HalfTy; else { Diag(Tok.getLocation(), diag::err_half_const_requires_fp16); return ExprError(); } } else if (Literal.isFloat) Ty = Context.FloatTy; else if (Literal.isLong) Ty = Context.LongDoubleTy; else if (Literal.isFloat16) Ty = Context.Float16Ty; else if (Literal.isFloat128) Ty = Context.Float128Ty; else Ty = Context.DoubleTy; Res = BuildFloatingLiteral(*this, Literal, Ty, Tok.getLocation()); if (Ty == Context.DoubleTy) { if (getLangOpts().SinglePrecisionConstants) { const BuiltinType *BTy = Ty->getAs(); if (BTy->getKind() != BuiltinType::Float) { Res = ImpCastExprToType(Res, Context.FloatTy, CK_FloatingCast).get(); } } else if (getLangOpts().OpenCL && !getOpenCLOptions().isEnabled("cl_khr_fp64")) { // Impose single-precision float type when cl_khr_fp64 is not enabled. Diag(Tok.getLocation(), diag::warn_double_const_requires_fp64); Res = ImpCastExprToType(Res, Context.FloatTy, CK_FloatingCast).get(); } } } else if (!Literal.isIntegerLiteral()) { return ExprError(); } else { QualType Ty; // 'long long' is a C99 or C++11 feature. if (!getLangOpts().C99 && Literal.isLongLong) { if (getLangOpts().CPlusPlus) Diag(Tok.getLocation(), getLangOpts().CPlusPlus11 ? diag::warn_cxx98_compat_longlong : diag::ext_cxx11_longlong); else Diag(Tok.getLocation(), diag::ext_c99_longlong); } // Get the value in the widest-possible width. unsigned MaxWidth = Context.getTargetInfo().getIntMaxTWidth(); llvm::APInt ResultVal(MaxWidth, 0); if (Literal.GetIntegerValue(ResultVal)) { // If this value didn't fit into uintmax_t, error and force to ull. Diag(Tok.getLocation(), diag::err_integer_literal_too_large) << /* Unsigned */ 1; Ty = Context.UnsignedLongLongTy; assert(Context.getTypeSize(Ty) == ResultVal.getBitWidth() && "long long is not intmax_t?"); } else { // If this value fits into a ULL, try to figure out what else it fits into // according to the rules of C99 6.4.4.1p5. // Octal, Hexadecimal, and integers with a U suffix are allowed to // be an unsigned int. bool AllowUnsigned = Literal.isUnsigned || Literal.getRadix() != 10; // Check from smallest to largest, picking the smallest type we can. unsigned Width = 0; // Microsoft specific integer suffixes are explicitly sized. if (Literal.MicrosoftInteger) { if (Literal.MicrosoftInteger == 8 && !Literal.isUnsigned) { Width = 8; Ty = Context.CharTy; } else { Width = Literal.MicrosoftInteger; Ty = Context.getIntTypeForBitwidth(Width, /*Signed=*/!Literal.isUnsigned); } } if (Ty.isNull() && !Literal.isLong && !Literal.isLongLong) { // Are int/unsigned possibilities? unsigned IntSize = Context.getTargetInfo().getIntWidth(); // Does it fit in a unsigned int? if (ResultVal.isIntN(IntSize)) { // Does it fit in a signed int? if (!Literal.isUnsigned && ResultVal[IntSize-1] == 0) Ty = Context.IntTy; else if (AllowUnsigned) Ty = Context.UnsignedIntTy; Width = IntSize; } } // Are long/unsigned long possibilities? if (Ty.isNull() && !Literal.isLongLong) { unsigned LongSize = Context.getTargetInfo().getLongWidth(); // Does it fit in a unsigned long? if (ResultVal.isIntN(LongSize)) { // Does it fit in a signed long? if (!Literal.isUnsigned && ResultVal[LongSize-1] == 0) Ty = Context.LongTy; else if (AllowUnsigned) Ty = Context.UnsignedLongTy; // Check according to the rules of C90 6.1.3.2p5. C++03 [lex.icon]p2 // is compatible. else if (!getLangOpts().C99 && !getLangOpts().CPlusPlus11) { const unsigned LongLongSize = Context.getTargetInfo().getLongLongWidth(); Diag(Tok.getLocation(), getLangOpts().CPlusPlus ? Literal.isLong ? diag::warn_old_implicitly_unsigned_long_cxx : /*C++98 UB*/ diag:: ext_old_implicitly_unsigned_long_cxx : diag::warn_old_implicitly_unsigned_long) << (LongLongSize > LongSize ? /*will have type 'long long'*/ 0 : /*will be ill-formed*/ 1); Ty = Context.UnsignedLongTy; } Width = LongSize; } } // Check long long if needed. if (Ty.isNull()) { unsigned LongLongSize = Context.getTargetInfo().getLongLongWidth(); // Does it fit in a unsigned long long? if (ResultVal.isIntN(LongLongSize)) { // Does it fit in a signed long long? // To be compatible with MSVC, hex integer literals ending with the // LL or i64 suffix are always signed in Microsoft mode. if (!Literal.isUnsigned && (ResultVal[LongLongSize-1] == 0 || (getLangOpts().MSVCCompat && Literal.isLongLong))) Ty = Context.LongLongTy; else if (AllowUnsigned) Ty = Context.UnsignedLongLongTy; Width = LongLongSize; } } // If we still couldn't decide a type, we probably have something that // does not fit in a signed long long, but has no U suffix. if (Ty.isNull()) { Diag(Tok.getLocation(), diag::ext_integer_literal_too_large_for_signed); Ty = Context.UnsignedLongLongTy; Width = Context.getTargetInfo().getLongLongWidth(); } if (ResultVal.getBitWidth() != Width) ResultVal = ResultVal.trunc(Width); } Res = IntegerLiteral::Create(Context, ResultVal, Ty, Tok.getLocation()); } // If this is an imaginary literal, create the ImaginaryLiteral wrapper. if (Literal.isImaginary) { Res = new (Context) ImaginaryLiteral(Res, Context.getComplexType(Res->getType())); Diag(Tok.getLocation(), diag::ext_imaginary_constant); } return Res; } ExprResult Sema::ActOnParenExpr(SourceLocation L, SourceLocation R, Expr *E) { assert(E && "ActOnParenExpr() missing expr"); return new (Context) ParenExpr(L, R, E); } static bool CheckVecStepTraitOperandType(Sema &S, QualType T, SourceLocation Loc, SourceRange ArgRange) { // [OpenCL 1.1 6.11.12] "The vec_step built-in function takes a built-in // scalar or vector data type argument..." // Every built-in scalar type (OpenCL 1.1 6.1.1) is either an arithmetic // type (C99 6.2.5p18) or void. if (!(T->isArithmeticType() || T->isVoidType() || T->isVectorType())) { S.Diag(Loc, diag::err_vecstep_non_scalar_vector_type) << T << ArgRange; return true; } assert((T->isVoidType() || !T->isIncompleteType()) && "Scalar types should always be complete"); return false; } static bool CheckExtensionTraitOperandType(Sema &S, QualType T, SourceLocation Loc, SourceRange ArgRange, UnaryExprOrTypeTrait TraitKind) { // Invalid types must be hard errors for SFINAE in C++. if (S.LangOpts.CPlusPlus) return true; // C99 6.5.3.4p1: if (T->isFunctionType() && (TraitKind == UETT_SizeOf || TraitKind == UETT_AlignOf)) { // sizeof(function)/alignof(function) is allowed as an extension. S.Diag(Loc, diag::ext_sizeof_alignof_function_type) << TraitKind << ArgRange; return false; } // Allow sizeof(void)/alignof(void) as an extension, unless in OpenCL where // this is an error (OpenCL v1.1 s6.3.k) if (T->isVoidType()) { unsigned DiagID = S.LangOpts.OpenCL ? diag::err_opencl_sizeof_alignof_type : diag::ext_sizeof_alignof_void_type; S.Diag(Loc, DiagID) << TraitKind << ArgRange; return false; } return true; } static bool CheckObjCTraitOperandConstraints(Sema &S, QualType T, SourceLocation Loc, SourceRange ArgRange, UnaryExprOrTypeTrait TraitKind) { // Reject sizeof(interface) and sizeof(interface) if the // runtime doesn't allow it. if (!S.LangOpts.ObjCRuntime.allowsSizeofAlignof() && T->isObjCObjectType()) { S.Diag(Loc, diag::err_sizeof_nonfragile_interface) << T << (TraitKind == UETT_SizeOf) << ArgRange; return true; } return false; } /// \brief Check whether E is a pointer from a decayed array type (the decayed /// pointer type is equal to T) and emit a warning if it is. static void warnOnSizeofOnArrayDecay(Sema &S, SourceLocation Loc, QualType T, Expr *E) { // Don't warn if the operation changed the type. if (T != E->getType()) return; // Now look for array decays. ImplicitCastExpr *ICE = dyn_cast(E); if (!ICE || ICE->getCastKind() != CK_ArrayToPointerDecay) return; S.Diag(Loc, diag::warn_sizeof_array_decay) << ICE->getSourceRange() << ICE->getType() << ICE->getSubExpr()->getType(); } /// \brief Check the constraints on expression operands to unary type expression /// and type traits. /// /// Completes any types necessary and validates the constraints on the operand /// expression. The logic mostly mirrors the type-based overload, but may modify /// the expression as it completes the type for that expression through template /// instantiation, etc. bool Sema::CheckUnaryExprOrTypeTraitOperand(Expr *E, UnaryExprOrTypeTrait ExprKind) { QualType ExprTy = E->getType(); assert(!ExprTy->isReferenceType()); if (ExprKind == UETT_VecStep) return CheckVecStepTraitOperandType(*this, ExprTy, E->getExprLoc(), E->getSourceRange()); // Whitelist some types as extensions if (!CheckExtensionTraitOperandType(*this, ExprTy, E->getExprLoc(), E->getSourceRange(), ExprKind)) return false; // 'alignof' applied to an expression only requires the base element type of // the expression to be complete. 'sizeof' requires the expression's type to // be complete (and will attempt to complete it if it's an array of unknown // bound). if (ExprKind == UETT_AlignOf) { if (RequireCompleteType(E->getExprLoc(), Context.getBaseElementType(E->getType()), diag::err_sizeof_alignof_incomplete_type, ExprKind, E->getSourceRange())) return true; } else { if (RequireCompleteExprType(E, diag::err_sizeof_alignof_incomplete_type, ExprKind, E->getSourceRange())) return true; } // Completing the expression's type may have changed it. ExprTy = E->getType(); assert(!ExprTy->isReferenceType()); if (ExprTy->isFunctionType()) { Diag(E->getExprLoc(), diag::err_sizeof_alignof_function_type) << ExprKind << E->getSourceRange(); return true; } // The operand for sizeof and alignof is in an unevaluated expression context, // so side effects could result in unintended consequences. if ((ExprKind == UETT_SizeOf || ExprKind == UETT_AlignOf) && !inTemplateInstantiation() && E->HasSideEffects(Context, false)) Diag(E->getExprLoc(), diag::warn_side_effects_unevaluated_context); if (CheckObjCTraitOperandConstraints(*this, ExprTy, E->getExprLoc(), E->getSourceRange(), ExprKind)) return true; if (ExprKind == UETT_SizeOf) { if (DeclRefExpr *DeclRef = dyn_cast(E->IgnoreParens())) { if (ParmVarDecl *PVD = dyn_cast(DeclRef->getFoundDecl())) { QualType OType = PVD->getOriginalType(); QualType Type = PVD->getType(); if (Type->isPointerType() && OType->isArrayType()) { Diag(E->getExprLoc(), diag::warn_sizeof_array_param) << Type << OType; Diag(PVD->getLocation(), diag::note_declared_at); } } } // Warn on "sizeof(array op x)" and "sizeof(x op array)", where the array // decays into a pointer and returns an unintended result. This is most // likely a typo for "sizeof(array) op x". if (BinaryOperator *BO = dyn_cast(E->IgnoreParens())) { warnOnSizeofOnArrayDecay(*this, BO->getOperatorLoc(), BO->getType(), BO->getLHS()); warnOnSizeofOnArrayDecay(*this, BO->getOperatorLoc(), BO->getType(), BO->getRHS()); } } return false; } /// \brief Check the constraints on operands to unary expression and type /// traits. /// /// This will complete any types necessary, and validate the various constraints /// on those operands. /// /// The UsualUnaryConversions() function is *not* called by this routine. /// C99 6.3.2.1p[2-4] all state: /// Except when it is the operand of the sizeof operator ... /// /// C++ [expr.sizeof]p4 /// The lvalue-to-rvalue, array-to-pointer, and function-to-pointer /// standard conversions are not applied to the operand of sizeof. /// /// This policy is followed for all of the unary trait expressions. bool Sema::CheckUnaryExprOrTypeTraitOperand(QualType ExprType, SourceLocation OpLoc, SourceRange ExprRange, UnaryExprOrTypeTrait ExprKind) { if (ExprType->isDependentType()) return false; // C++ [expr.sizeof]p2: // When applied to a reference or a reference type, the result // is the size of the referenced type. // C++11 [expr.alignof]p3: // When alignof is applied to a reference type, the result // shall be the alignment of the referenced type. if (const ReferenceType *Ref = ExprType->getAs()) ExprType = Ref->getPointeeType(); // C11 6.5.3.4/3, C++11 [expr.alignof]p3: // When alignof or _Alignof is applied to an array type, the result // is the alignment of the element type. if (ExprKind == UETT_AlignOf || ExprKind == UETT_OpenMPRequiredSimdAlign) ExprType = Context.getBaseElementType(ExprType); if (ExprKind == UETT_VecStep) return CheckVecStepTraitOperandType(*this, ExprType, OpLoc, ExprRange); // Whitelist some types as extensions if (!CheckExtensionTraitOperandType(*this, ExprType, OpLoc, ExprRange, ExprKind)) return false; if (RequireCompleteType(OpLoc, ExprType, diag::err_sizeof_alignof_incomplete_type, ExprKind, ExprRange)) return true; if (ExprType->isFunctionType()) { Diag(OpLoc, diag::err_sizeof_alignof_function_type) << ExprKind << ExprRange; return true; } if (CheckObjCTraitOperandConstraints(*this, ExprType, OpLoc, ExprRange, ExprKind)) return true; return false; } static bool CheckAlignOfExpr(Sema &S, Expr *E) { E = E->IgnoreParens(); // Cannot know anything else if the expression is dependent. if (E->isTypeDependent()) return false; if (E->getObjectKind() == OK_BitField) { S.Diag(E->getExprLoc(), diag::err_sizeof_alignof_typeof_bitfield) << 1 << E->getSourceRange(); return true; } ValueDecl *D = nullptr; if (DeclRefExpr *DRE = dyn_cast(E)) { D = DRE->getDecl(); } else if (MemberExpr *ME = dyn_cast(E)) { D = ME->getMemberDecl(); } // If it's a field, require the containing struct to have a // complete definition so that we can compute the layout. // // This can happen in C++11 onwards, either by naming the member // in a way that is not transformed into a member access expression // (in an unevaluated operand, for instance), or by naming the member // in a trailing-return-type. // // For the record, since __alignof__ on expressions is a GCC // extension, GCC seems to permit this but always gives the // nonsensical answer 0. // // We don't really need the layout here --- we could instead just // directly check for all the appropriate alignment-lowing // attributes --- but that would require duplicating a lot of // logic that just isn't worth duplicating for such a marginal // use-case. if (FieldDecl *FD = dyn_cast_or_null(D)) { // Fast path this check, since we at least know the record has a // definition if we can find a member of it. if (!FD->getParent()->isCompleteDefinition()) { S.Diag(E->getExprLoc(), diag::err_alignof_member_of_incomplete_type) << E->getSourceRange(); return true; } // Otherwise, if it's a field, and the field doesn't have // reference type, then it must have a complete type (or be a // flexible array member, which we explicitly want to // white-list anyway), which makes the following checks trivial. if (!FD->getType()->isReferenceType()) return false; } return S.CheckUnaryExprOrTypeTraitOperand(E, UETT_AlignOf); } bool Sema::CheckVecStepExpr(Expr *E) { E = E->IgnoreParens(); // Cannot know anything else if the expression is dependent. if (E->isTypeDependent()) return false; return CheckUnaryExprOrTypeTraitOperand(E, UETT_VecStep); } static void captureVariablyModifiedType(ASTContext &Context, QualType T, CapturingScopeInfo *CSI) { assert(T->isVariablyModifiedType()); assert(CSI != nullptr); // We're going to walk down into the type and look for VLA expressions. do { const Type *Ty = T.getTypePtr(); switch (Ty->getTypeClass()) { #define TYPE(Class, Base) #define ABSTRACT_TYPE(Class, Base) #define NON_CANONICAL_TYPE(Class, Base) #define DEPENDENT_TYPE(Class, Base) case Type::Class: #define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base) #include "clang/AST/TypeNodes.def" T = QualType(); break; // These types are never variably-modified. case Type::Builtin: case Type::Complex: case Type::Vector: case Type::ExtVector: case Type::Record: case Type::Enum: case Type::Elaborated: case Type::TemplateSpecialization: case Type::ObjCObject: case Type::ObjCInterface: case Type::ObjCObjectPointer: case Type::ObjCTypeParam: case Type::Pipe: llvm_unreachable("type class is never variably-modified!"); case Type::Adjusted: T = cast(Ty)->getOriginalType(); break; case Type::Decayed: T = cast(Ty)->getPointeeType(); break; case Type::Pointer: T = cast(Ty)->getPointeeType(); break; case Type::BlockPointer: T = cast(Ty)->getPointeeType(); break; case Type::LValueReference: case Type::RValueReference: T = cast(Ty)->getPointeeType(); break; case Type::MemberPointer: T = cast(Ty)->getPointeeType(); break; case Type::ConstantArray: case Type::IncompleteArray: // Losing element qualification here is fine. T = cast(Ty)->getElementType(); break; case Type::VariableArray: { // Losing element qualification here is fine. const VariableArrayType *VAT = cast(Ty); // Unknown size indication requires no size computation. // Otherwise, evaluate and record it. if (auto Size = VAT->getSizeExpr()) { if (!CSI->isVLATypeCaptured(VAT)) { RecordDecl *CapRecord = nullptr; if (auto LSI = dyn_cast(CSI)) { CapRecord = LSI->Lambda; } else if (auto CRSI = dyn_cast(CSI)) { CapRecord = CRSI->TheRecordDecl; } if (CapRecord) { auto ExprLoc = Size->getExprLoc(); auto SizeType = Context.getSizeType(); // Build the non-static data member. auto Field = FieldDecl::Create(Context, CapRecord, ExprLoc, ExprLoc, /*Id*/ nullptr, SizeType, /*TInfo*/ nullptr, /*BW*/ nullptr, /*Mutable*/ false, /*InitStyle*/ ICIS_NoInit); Field->setImplicit(true); Field->setAccess(AS_private); Field->setCapturedVLAType(VAT); CapRecord->addDecl(Field); CSI->addVLATypeCapture(ExprLoc, SizeType); } } } T = VAT->getElementType(); break; } case Type::FunctionProto: case Type::FunctionNoProto: T = cast(Ty)->getReturnType(); break; case Type::Paren: case Type::TypeOf: case Type::UnaryTransform: case Type::Attributed: case Type::SubstTemplateTypeParm: case Type::PackExpansion: // Keep walking after single level desugaring. T = T.getSingleStepDesugaredType(Context); break; case Type::Typedef: T = cast(Ty)->desugar(); break; case Type::Decltype: T = cast(Ty)->desugar(); break; case Type::Auto: case Type::DeducedTemplateSpecialization: T = cast(Ty)->getDeducedType(); break; case Type::TypeOfExpr: T = cast(Ty)->getUnderlyingExpr()->getType(); break; case Type::Atomic: T = cast(Ty)->getValueType(); break; } } while (!T.isNull() && T->isVariablyModifiedType()); } /// \brief Build a sizeof or alignof expression given a type operand. ExprResult Sema::CreateUnaryExprOrTypeTraitExpr(TypeSourceInfo *TInfo, SourceLocation OpLoc, UnaryExprOrTypeTrait ExprKind, SourceRange R) { if (!TInfo) return ExprError(); QualType T = TInfo->getType(); if (!T->isDependentType() && CheckUnaryExprOrTypeTraitOperand(T, OpLoc, R, ExprKind)) return ExprError(); if (T->isVariablyModifiedType() && FunctionScopes.size() > 1) { if (auto *TT = T->getAs()) { for (auto I = FunctionScopes.rbegin(), E = std::prev(FunctionScopes.rend()); I != E; ++I) { auto *CSI = dyn_cast(*I); if (CSI == nullptr) break; DeclContext *DC = nullptr; if (auto *LSI = dyn_cast(CSI)) DC = LSI->CallOperator; else if (auto *CRSI = dyn_cast(CSI)) DC = CRSI->TheCapturedDecl; else if (auto *BSI = dyn_cast(CSI)) DC = BSI->TheDecl; if (DC) { if (DC->containsDecl(TT->getDecl())) break; captureVariablyModifiedType(Context, T, CSI); } } } } // C99 6.5.3.4p4: the type (an unsigned integer type) is size_t. return new (Context) UnaryExprOrTypeTraitExpr( ExprKind, TInfo, Context.getSizeType(), OpLoc, R.getEnd()); } /// \brief Build a sizeof or alignof expression given an expression /// operand. ExprResult Sema::CreateUnaryExprOrTypeTraitExpr(Expr *E, SourceLocation OpLoc, UnaryExprOrTypeTrait ExprKind) { ExprResult PE = CheckPlaceholderExpr(E); if (PE.isInvalid()) return ExprError(); E = PE.get(); // Verify that the operand is valid. bool isInvalid = false; if (E->isTypeDependent()) { // Delay type-checking for type-dependent expressions. } else if (ExprKind == UETT_AlignOf) { isInvalid = CheckAlignOfExpr(*this, E); } else if (ExprKind == UETT_VecStep) { isInvalid = CheckVecStepExpr(E); } else if (ExprKind == UETT_OpenMPRequiredSimdAlign) { Diag(E->getExprLoc(), diag::err_openmp_default_simd_align_expr); isInvalid = true; } else if (E->refersToBitField()) { // C99 6.5.3.4p1. Diag(E->getExprLoc(), diag::err_sizeof_alignof_typeof_bitfield) << 0; isInvalid = true; } else { isInvalid = CheckUnaryExprOrTypeTraitOperand(E, UETT_SizeOf); } if (isInvalid) return ExprError(); if (ExprKind == UETT_SizeOf && E->getType()->isVariableArrayType()) { PE = TransformToPotentiallyEvaluated(E); if (PE.isInvalid()) return ExprError(); E = PE.get(); } // C99 6.5.3.4p4: the type (an unsigned integer type) is size_t. return new (Context) UnaryExprOrTypeTraitExpr( ExprKind, E, Context.getSizeType(), OpLoc, E->getSourceRange().getEnd()); } /// ActOnUnaryExprOrTypeTraitExpr - Handle @c sizeof(type) and @c sizeof @c /// expr and the same for @c alignof and @c __alignof /// Note that the ArgRange is invalid if isType is false. ExprResult Sema::ActOnUnaryExprOrTypeTraitExpr(SourceLocation OpLoc, UnaryExprOrTypeTrait ExprKind, bool IsType, void *TyOrEx, SourceRange ArgRange) { // If error parsing type, ignore. if (!TyOrEx) return ExprError(); if (IsType) { TypeSourceInfo *TInfo; (void) GetTypeFromParser(ParsedType::getFromOpaquePtr(TyOrEx), &TInfo); return CreateUnaryExprOrTypeTraitExpr(TInfo, OpLoc, ExprKind, ArgRange); } Expr *ArgEx = (Expr *)TyOrEx; ExprResult Result = CreateUnaryExprOrTypeTraitExpr(ArgEx, OpLoc, ExprKind); return Result; } static QualType CheckRealImagOperand(Sema &S, ExprResult &V, SourceLocation Loc, bool IsReal) { if (V.get()->isTypeDependent()) return S.Context.DependentTy; // _Real and _Imag are only l-values for normal l-values. if (V.get()->getObjectKind() != OK_Ordinary) { V = S.DefaultLvalueConversion(V.get()); if (V.isInvalid()) return QualType(); } // These operators return the element type of a complex type. if (const ComplexType *CT = V.get()->getType()->getAs()) return CT->getElementType(); // Otherwise they pass through real integer and floating point types here. if (V.get()->getType()->isArithmeticType()) return V.get()->getType(); // Test for placeholders. ExprResult PR = S.CheckPlaceholderExpr(V.get()); if (PR.isInvalid()) return QualType(); if (PR.get() != V.get()) { V = PR; return CheckRealImagOperand(S, V, Loc, IsReal); } // Reject anything else. S.Diag(Loc, diag::err_realimag_invalid_type) << V.get()->getType() << (IsReal ? "__real" : "__imag"); return QualType(); } ExprResult Sema::ActOnPostfixUnaryOp(Scope *S, SourceLocation OpLoc, tok::TokenKind Kind, Expr *Input) { UnaryOperatorKind Opc; switch (Kind) { default: llvm_unreachable("Unknown unary op!"); case tok::plusplus: Opc = UO_PostInc; break; case tok::minusminus: Opc = UO_PostDec; break; } // Since this might is a postfix expression, get rid of ParenListExprs. ExprResult Result = MaybeConvertParenListExprToParenExpr(S, Input); if (Result.isInvalid()) return ExprError(); Input = Result.get(); return BuildUnaryOp(S, OpLoc, Opc, Input); } /// \brief Diagnose if arithmetic on the given ObjC pointer is illegal. /// /// \return true on error static bool checkArithmeticOnObjCPointer(Sema &S, SourceLocation opLoc, Expr *op) { assert(op->getType()->isObjCObjectPointerType()); if (S.LangOpts.ObjCRuntime.allowsPointerArithmetic() && !S.LangOpts.ObjCSubscriptingLegacyRuntime) return false; S.Diag(opLoc, diag::err_arithmetic_nonfragile_interface) << op->getType()->castAs()->getPointeeType() << op->getSourceRange(); return true; } static bool isMSPropertySubscriptExpr(Sema &S, Expr *Base) { auto *BaseNoParens = Base->IgnoreParens(); if (auto *MSProp = dyn_cast(BaseNoParens)) return MSProp->getPropertyDecl()->getType()->isArrayType(); return isa(BaseNoParens); } ExprResult Sema::ActOnArraySubscriptExpr(Scope *S, Expr *base, SourceLocation lbLoc, Expr *idx, SourceLocation rbLoc) { if (base && !base->getType().isNull() && base->getType()->isSpecificPlaceholderType(BuiltinType::OMPArraySection)) return ActOnOMPArraySectionExpr(base, lbLoc, idx, SourceLocation(), /*Length=*/nullptr, rbLoc); // Since this might be a postfix expression, get rid of ParenListExprs. if (isa(base)) { ExprResult result = MaybeConvertParenListExprToParenExpr(S, base); if (result.isInvalid()) return ExprError(); base = result.get(); } // Handle any non-overload placeholder types in the base and index // expressions. We can't handle overloads here because the other // operand might be an overloadable type, in which case the overload // resolution for the operator overload should get the first crack // at the overload. bool IsMSPropertySubscript = false; if (base->getType()->isNonOverloadPlaceholderType()) { IsMSPropertySubscript = isMSPropertySubscriptExpr(*this, base); if (!IsMSPropertySubscript) { ExprResult result = CheckPlaceholderExpr(base); if (result.isInvalid()) return ExprError(); base = result.get(); } } if (idx->getType()->isNonOverloadPlaceholderType()) { ExprResult result = CheckPlaceholderExpr(idx); if (result.isInvalid()) return ExprError(); idx = result.get(); } // Build an unanalyzed expression if either operand is type-dependent. if (getLangOpts().CPlusPlus && (base->isTypeDependent() || idx->isTypeDependent())) { return new (Context) ArraySubscriptExpr(base, idx, Context.DependentTy, VK_LValue, OK_Ordinary, rbLoc); } // MSDN, property (C++) // https://msdn.microsoft.com/en-us/library/yhfk0thd(v=vs.120).aspx // This attribute can also be used in the declaration of an empty array in a // class or structure definition. For example: // __declspec(property(get=GetX, put=PutX)) int x[]; // The above statement indicates that x[] can be used with one or more array // indices. In this case, i=p->x[a][b] will be turned into i=p->GetX(a, b), // and p->x[a][b] = i will be turned into p->PutX(a, b, i); if (IsMSPropertySubscript) { // Build MS property subscript expression if base is MS property reference // or MS property subscript. return new (Context) MSPropertySubscriptExpr( base, idx, Context.PseudoObjectTy, VK_LValue, OK_Ordinary, rbLoc); } // Use C++ overloaded-operator rules if either operand has record // type. The spec says to do this if either type is *overloadable*, // but enum types can't declare subscript operators or conversion // operators, so there's nothing interesting for overload resolution // to do if there aren't any record types involved. // // ObjC pointers have their own subscripting logic that is not tied // to overload resolution and so should not take this path. if (getLangOpts().CPlusPlus && (base->getType()->isRecordType() || (!base->getType()->isObjCObjectPointerType() && idx->getType()->isRecordType()))) { return CreateOverloadedArraySubscriptExpr(lbLoc, rbLoc, base, idx); } return CreateBuiltinArraySubscriptExpr(base, lbLoc, idx, rbLoc); } ExprResult Sema::ActOnOMPArraySectionExpr(Expr *Base, SourceLocation LBLoc, Expr *LowerBound, SourceLocation ColonLoc, Expr *Length, SourceLocation RBLoc) { if (Base->getType()->isPlaceholderType() && !Base->getType()->isSpecificPlaceholderType( BuiltinType::OMPArraySection)) { ExprResult Result = CheckPlaceholderExpr(Base); if (Result.isInvalid()) return ExprError(); Base = Result.get(); } if (LowerBound && LowerBound->getType()->isNonOverloadPlaceholderType()) { ExprResult Result = CheckPlaceholderExpr(LowerBound); if (Result.isInvalid()) return ExprError(); Result = DefaultLvalueConversion(Result.get()); if (Result.isInvalid()) return ExprError(); LowerBound = Result.get(); } if (Length && Length->getType()->isNonOverloadPlaceholderType()) { ExprResult Result = CheckPlaceholderExpr(Length); if (Result.isInvalid()) return ExprError(); Result = DefaultLvalueConversion(Result.get()); if (Result.isInvalid()) return ExprError(); Length = Result.get(); } // Build an unanalyzed expression if either operand is type-dependent. if (Base->isTypeDependent() || (LowerBound && (LowerBound->isTypeDependent() || LowerBound->isValueDependent())) || (Length && (Length->isTypeDependent() || Length->isValueDependent()))) { return new (Context) OMPArraySectionExpr(Base, LowerBound, Length, Context.DependentTy, VK_LValue, OK_Ordinary, ColonLoc, RBLoc); } // Perform default conversions. QualType OriginalTy = OMPArraySectionExpr::getBaseOriginalType(Base); QualType ResultTy; if (OriginalTy->isAnyPointerType()) { ResultTy = OriginalTy->getPointeeType(); } else if (OriginalTy->isArrayType()) { ResultTy = OriginalTy->getAsArrayTypeUnsafe()->getElementType(); } else { return ExprError( Diag(Base->getExprLoc(), diag::err_omp_typecheck_section_value) << Base->getSourceRange()); } // C99 6.5.2.1p1 if (LowerBound) { auto Res = PerformOpenMPImplicitIntegerConversion(LowerBound->getExprLoc(), LowerBound); if (Res.isInvalid()) return ExprError(Diag(LowerBound->getExprLoc(), diag::err_omp_typecheck_section_not_integer) << 0 << LowerBound->getSourceRange()); LowerBound = Res.get(); if (LowerBound->getType()->isSpecificBuiltinType(BuiltinType::Char_S) || LowerBound->getType()->isSpecificBuiltinType(BuiltinType::Char_U)) Diag(LowerBound->getExprLoc(), diag::warn_omp_section_is_char) << 0 << LowerBound->getSourceRange(); } if (Length) { auto Res = PerformOpenMPImplicitIntegerConversion(Length->getExprLoc(), Length); if (Res.isInvalid()) return ExprError(Diag(Length->getExprLoc(), diag::err_omp_typecheck_section_not_integer) << 1 << Length->getSourceRange()); Length = Res.get(); if (Length->getType()->isSpecificBuiltinType(BuiltinType::Char_S) || Length->getType()->isSpecificBuiltinType(BuiltinType::Char_U)) Diag(Length->getExprLoc(), diag::warn_omp_section_is_char) << 1 << Length->getSourceRange(); } // C99 6.5.2.1p1: "shall have type "pointer to *object* type". Similarly, // C++ [expr.sub]p1: The type "T" shall be a completely-defined object // type. Note that functions are not objects, and that (in C99 parlance) // incomplete types are not object types. if (ResultTy->isFunctionType()) { Diag(Base->getExprLoc(), diag::err_omp_section_function_type) << ResultTy << Base->getSourceRange(); return ExprError(); } if (RequireCompleteType(Base->getExprLoc(), ResultTy, diag::err_omp_section_incomplete_type, Base)) return ExprError(); if (LowerBound && !OriginalTy->isAnyPointerType()) { llvm::APSInt LowerBoundValue; if (LowerBound->EvaluateAsInt(LowerBoundValue, Context)) { // OpenMP 4.5, [2.4 Array Sections] // The array section must be a subset of the original array. if (LowerBoundValue.isNegative()) { Diag(LowerBound->getExprLoc(), diag::err_omp_section_not_subset_of_array) << LowerBound->getSourceRange(); return ExprError(); } } } if (Length) { llvm::APSInt LengthValue; if (Length->EvaluateAsInt(LengthValue, Context)) { // OpenMP 4.5, [2.4 Array Sections] // The length must evaluate to non-negative integers. if (LengthValue.isNegative()) { Diag(Length->getExprLoc(), diag::err_omp_section_length_negative) << LengthValue.toString(/*Radix=*/10, /*Signed=*/true) << Length->getSourceRange(); return ExprError(); } } } else if (ColonLoc.isValid() && (OriginalTy.isNull() || (!OriginalTy->isConstantArrayType() && !OriginalTy->isVariableArrayType()))) { // OpenMP 4.5, [2.4 Array Sections] // When the size of the array dimension is not known, the length must be // specified explicitly. Diag(ColonLoc, diag::err_omp_section_length_undefined) << (!OriginalTy.isNull() && OriginalTy->isArrayType()); return ExprError(); } if (!Base->getType()->isSpecificPlaceholderType( BuiltinType::OMPArraySection)) { ExprResult Result = DefaultFunctionArrayLvalueConversion(Base); if (Result.isInvalid()) return ExprError(); Base = Result.get(); } return new (Context) OMPArraySectionExpr(Base, LowerBound, Length, Context.OMPArraySectionTy, VK_LValue, OK_Ordinary, ColonLoc, RBLoc); } ExprResult Sema::CreateBuiltinArraySubscriptExpr(Expr *Base, SourceLocation LLoc, Expr *Idx, SourceLocation RLoc) { Expr *LHSExp = Base; Expr *RHSExp = Idx; ExprValueKind VK = VK_LValue; ExprObjectKind OK = OK_Ordinary; // Per C++ core issue 1213, the result is an xvalue if either operand is // a non-lvalue array, and an lvalue otherwise. if (getLangOpts().CPlusPlus11 && ((LHSExp->getType()->isArrayType() && !LHSExp->isLValue()) || (RHSExp->getType()->isArrayType() && !RHSExp->isLValue()))) VK = VK_XValue; // Perform default conversions. if (!LHSExp->getType()->getAs()) { ExprResult Result = DefaultFunctionArrayLvalueConversion(LHSExp); if (Result.isInvalid()) return ExprError(); LHSExp = Result.get(); } ExprResult Result = DefaultFunctionArrayLvalueConversion(RHSExp); if (Result.isInvalid()) return ExprError(); RHSExp = Result.get(); QualType LHSTy = LHSExp->getType(), RHSTy = RHSExp->getType(); // C99 6.5.2.1p2: the expression e1[e2] is by definition precisely equivalent // to the expression *((e1)+(e2)). This means the array "Base" may actually be // in the subscript position. As a result, we need to derive the array base // and index from the expression types. Expr *BaseExpr, *IndexExpr; QualType ResultType; if (LHSTy->isDependentType() || RHSTy->isDependentType()) { BaseExpr = LHSExp; IndexExpr = RHSExp; ResultType = Context.DependentTy; } else if (const PointerType *PTy = LHSTy->getAs()) { BaseExpr = LHSExp; IndexExpr = RHSExp; ResultType = PTy->getPointeeType(); } else if (const ObjCObjectPointerType *PTy = LHSTy->getAs()) { BaseExpr = LHSExp; IndexExpr = RHSExp; // Use custom logic if this should be the pseudo-object subscript // expression. if (!LangOpts.isSubscriptPointerArithmetic()) return BuildObjCSubscriptExpression(RLoc, BaseExpr, IndexExpr, nullptr, nullptr); ResultType = PTy->getPointeeType(); } else if (const PointerType *PTy = RHSTy->getAs()) { // Handle the uncommon case of "123[Ptr]". BaseExpr = RHSExp; IndexExpr = LHSExp; ResultType = PTy->getPointeeType(); } else if (const ObjCObjectPointerType *PTy = RHSTy->getAs()) { // Handle the uncommon case of "123[Ptr]". BaseExpr = RHSExp; IndexExpr = LHSExp; ResultType = PTy->getPointeeType(); if (!LangOpts.isSubscriptPointerArithmetic()) { Diag(LLoc, diag::err_subscript_nonfragile_interface) << ResultType << BaseExpr->getSourceRange(); return ExprError(); } } else if (const VectorType *VTy = LHSTy->getAs()) { BaseExpr = LHSExp; // vectors: V[123] IndexExpr = RHSExp; VK = LHSExp->getValueKind(); if (VK != VK_RValue) OK = OK_VectorComponent; // FIXME: need to deal with const... ResultType = VTy->getElementType(); } else if (LHSTy->isArrayType()) { // If we see an array that wasn't promoted by // DefaultFunctionArrayLvalueConversion, it must be an array that // wasn't promoted because of the C90 rule that doesn't // allow promoting non-lvalue arrays. Warn, then // force the promotion here. Diag(LHSExp->getLocStart(), diag::ext_subscript_non_lvalue) << LHSExp->getSourceRange(); LHSExp = ImpCastExprToType(LHSExp, Context.getArrayDecayedType(LHSTy), CK_ArrayToPointerDecay).get(); LHSTy = LHSExp->getType(); BaseExpr = LHSExp; IndexExpr = RHSExp; ResultType = LHSTy->getAs()->getPointeeType(); } else if (RHSTy->isArrayType()) { // Same as previous, except for 123[f().a] case Diag(RHSExp->getLocStart(), diag::ext_subscript_non_lvalue) << RHSExp->getSourceRange(); RHSExp = ImpCastExprToType(RHSExp, Context.getArrayDecayedType(RHSTy), CK_ArrayToPointerDecay).get(); RHSTy = RHSExp->getType(); BaseExpr = RHSExp; IndexExpr = LHSExp; ResultType = RHSTy->getAs()->getPointeeType(); } else { return ExprError(Diag(LLoc, diag::err_typecheck_subscript_value) << LHSExp->getSourceRange() << RHSExp->getSourceRange()); } // C99 6.5.2.1p1 if (!IndexExpr->getType()->isIntegerType() && !IndexExpr->isTypeDependent()) return ExprError(Diag(LLoc, diag::err_typecheck_subscript_not_integer) << IndexExpr->getSourceRange()); if ((IndexExpr->getType()->isSpecificBuiltinType(BuiltinType::Char_S) || IndexExpr->getType()->isSpecificBuiltinType(BuiltinType::Char_U)) && !IndexExpr->isTypeDependent()) Diag(LLoc, diag::warn_subscript_is_char) << IndexExpr->getSourceRange(); // C99 6.5.2.1p1: "shall have type "pointer to *object* type". Similarly, // C++ [expr.sub]p1: The type "T" shall be a completely-defined object // type. Note that Functions are not objects, and that (in C99 parlance) // incomplete types are not object types. if (ResultType->isFunctionType()) { Diag(BaseExpr->getLocStart(), diag::err_subscript_function_type) << ResultType << BaseExpr->getSourceRange(); return ExprError(); } if (ResultType->isVoidType() && !getLangOpts().CPlusPlus) { // GNU extension: subscripting on pointer to void Diag(LLoc, diag::ext_gnu_subscript_void_type) << BaseExpr->getSourceRange(); // C forbids expressions of unqualified void type from being l-values. // See IsCForbiddenLValueType. if (!ResultType.hasQualifiers()) VK = VK_RValue; } else if (!ResultType->isDependentType() && RequireCompleteType(LLoc, ResultType, diag::err_subscript_incomplete_type, BaseExpr)) return ExprError(); assert(VK == VK_RValue || LangOpts.CPlusPlus || !ResultType.isCForbiddenLValueType()); return new (Context) ArraySubscriptExpr(LHSExp, RHSExp, ResultType, VK, OK, RLoc); } bool Sema::CheckCXXDefaultArgExpr(SourceLocation CallLoc, FunctionDecl *FD, ParmVarDecl *Param) { if (Param->hasUnparsedDefaultArg()) { Diag(CallLoc, diag::err_use_of_default_argument_to_function_declared_later) << FD << cast(FD->getDeclContext())->getDeclName(); Diag(UnparsedDefaultArgLocs[Param], diag::note_default_argument_declared_here); return true; } if (Param->hasUninstantiatedDefaultArg()) { Expr *UninstExpr = Param->getUninstantiatedDefaultArg(); EnterExpressionEvaluationContext EvalContext( *this, ExpressionEvaluationContext::PotentiallyEvaluated, Param); // Instantiate the expression. // // FIXME: Pass in a correct Pattern argument, otherwise // getTemplateInstantiationArgs uses the lexical context of FD, e.g. // // template // struct A { // static int FooImpl(); // // template // // bug: default argument A::FooImpl() is evaluated with 2-level // // template argument list [[T], [Tp]], should be [[Tp]]. // friend A Foo(int a); // }; // // template // A Foo(int a = A::FooImpl()); MultiLevelTemplateArgumentList MutiLevelArgList = getTemplateInstantiationArgs(FD, nullptr, /*RelativeToPrimary=*/true); InstantiatingTemplate Inst(*this, CallLoc, Param, MutiLevelArgList.getInnermost()); if (Inst.isInvalid()) return true; if (Inst.isAlreadyInstantiating()) { Diag(Param->getLocStart(), diag::err_recursive_default_argument) << FD; Param->setInvalidDecl(); return true; } ExprResult Result; { // C++ [dcl.fct.default]p5: // The names in the [default argument] expression are bound, and // the semantic constraints are checked, at the point where the // default argument expression appears. ContextRAII SavedContext(*this, FD); LocalInstantiationScope Local(*this); Result = SubstInitializer(UninstExpr, MutiLevelArgList, /*DirectInit*/false); } if (Result.isInvalid()) return true; // Check the expression as an initializer for the parameter. InitializedEntity Entity = InitializedEntity::InitializeParameter(Context, Param); InitializationKind Kind = InitializationKind::CreateCopy(Param->getLocation(), /*FIXME:EqualLoc*/UninstExpr->getLocStart()); Expr *ResultE = Result.getAs(); InitializationSequence InitSeq(*this, Entity, Kind, ResultE); Result = InitSeq.Perform(*this, Entity, Kind, ResultE); if (Result.isInvalid()) return true; Result = ActOnFinishFullExpr(Result.getAs(), Param->getOuterLocStart()); if (Result.isInvalid()) return true; // Remember the instantiated default argument. Param->setDefaultArg(Result.getAs()); if (ASTMutationListener *L = getASTMutationListener()) { L->DefaultArgumentInstantiated(Param); } } // If the default argument expression is not set yet, we are building it now. if (!Param->hasInit()) { Diag(Param->getLocStart(), diag::err_recursive_default_argument) << FD; Param->setInvalidDecl(); return true; } // If the default expression creates temporaries, we need to // push them to the current stack of expression temporaries so they'll // be properly destroyed. // FIXME: We should really be rebuilding the default argument with new // bound temporaries; see the comment in PR5810. // We don't need to do that with block decls, though, because // blocks in default argument expression can never capture anything. if (auto Init = dyn_cast(Param->getInit())) { // Set the "needs cleanups" bit regardless of whether there are // any explicit objects. Cleanup.setExprNeedsCleanups(Init->cleanupsHaveSideEffects()); // Append all the objects to the cleanup list. Right now, this // should always be a no-op, because blocks in default argument // expressions should never be able to capture anything. assert(!Init->getNumObjects() && "default argument expression has capturing blocks?"); } // We already type-checked the argument, so we know it works. // Just mark all of the declarations in this potentially-evaluated expression // as being "referenced". MarkDeclarationsReferencedInExpr(Param->getDefaultArg(), /*SkipLocalVariables=*/true); return false; } ExprResult Sema::BuildCXXDefaultArgExpr(SourceLocation CallLoc, FunctionDecl *FD, ParmVarDecl *Param) { if (CheckCXXDefaultArgExpr(CallLoc, FD, Param)) return ExprError(); return CXXDefaultArgExpr::Create(Context, CallLoc, Param); } Sema::VariadicCallType Sema::getVariadicCallType(FunctionDecl *FDecl, const FunctionProtoType *Proto, Expr *Fn) { if (Proto && Proto->isVariadic()) { if (dyn_cast_or_null(FDecl)) return VariadicConstructor; else if (Fn && Fn->getType()->isBlockPointerType()) return VariadicBlock; else if (FDecl) { if (CXXMethodDecl *Method = dyn_cast_or_null(FDecl)) if (Method->isInstance()) return VariadicMethod; } else if (Fn && Fn->getType() == Context.BoundMemberTy) return VariadicMethod; return VariadicFunction; } return VariadicDoesNotApply; } namespace { class FunctionCallCCC : public FunctionCallFilterCCC { public: FunctionCallCCC(Sema &SemaRef, const IdentifierInfo *FuncName, unsigned NumArgs, MemberExpr *ME) : FunctionCallFilterCCC(SemaRef, NumArgs, false, ME), FunctionName(FuncName) {} bool ValidateCandidate(const TypoCorrection &candidate) override { if (!candidate.getCorrectionSpecifier() || candidate.getCorrectionAsIdentifierInfo() != FunctionName) { return false; } return FunctionCallFilterCCC::ValidateCandidate(candidate); } private: const IdentifierInfo *const FunctionName; }; } static TypoCorrection TryTypoCorrectionForCall(Sema &S, Expr *Fn, FunctionDecl *FDecl, ArrayRef Args) { MemberExpr *ME = dyn_cast(Fn); DeclarationName FuncName = FDecl->getDeclName(); SourceLocation NameLoc = ME ? ME->getMemberLoc() : Fn->getLocStart(); if (TypoCorrection Corrected = S.CorrectTypo( DeclarationNameInfo(FuncName, NameLoc), Sema::LookupOrdinaryName, S.getScopeForContext(S.CurContext), nullptr, llvm::make_unique(S, FuncName.getAsIdentifierInfo(), Args.size(), ME), Sema::CTK_ErrorRecovery)) { if (NamedDecl *ND = Corrected.getFoundDecl()) { if (Corrected.isOverloaded()) { OverloadCandidateSet OCS(NameLoc, OverloadCandidateSet::CSK_Normal); OverloadCandidateSet::iterator Best; for (NamedDecl *CD : Corrected) { if (FunctionDecl *FD = dyn_cast(CD)) S.AddOverloadCandidate(FD, DeclAccessPair::make(FD, AS_none), Args, OCS); } switch (OCS.BestViableFunction(S, NameLoc, Best)) { case OR_Success: ND = Best->FoundDecl; Corrected.setCorrectionDecl(ND); break; default: break; } } ND = ND->getUnderlyingDecl(); if (isa(ND) || isa(ND)) return Corrected; } } return TypoCorrection(); } /// ConvertArgumentsForCall - Converts the arguments specified in /// Args/NumArgs to the parameter types of the function FDecl with /// function prototype Proto. Call is the call expression itself, and /// Fn is the function expression. For a C++ member function, this /// routine does not attempt to convert the object argument. Returns /// true if the call is ill-formed. bool Sema::ConvertArgumentsForCall(CallExpr *Call, Expr *Fn, FunctionDecl *FDecl, const FunctionProtoType *Proto, ArrayRef Args, SourceLocation RParenLoc, bool IsExecConfig) { // Bail out early if calling a builtin with custom typechecking. if (FDecl) if (unsigned ID = FDecl->getBuiltinID()) if (Context.BuiltinInfo.hasCustomTypechecking(ID)) return false; // C99 6.5.2.2p7 - the arguments are implicitly converted, as if by // assignment, to the types of the corresponding parameter, ... unsigned NumParams = Proto->getNumParams(); bool Invalid = false; unsigned MinArgs = FDecl ? FDecl->getMinRequiredArguments() : NumParams; unsigned FnKind = Fn->getType()->isBlockPointerType() ? 1 /* block */ : (IsExecConfig ? 3 /* kernel function (exec config) */ : 0 /* function */); // If too few arguments are available (and we don't have default // arguments for the remaining parameters), don't make the call. if (Args.size() < NumParams) { if (Args.size() < MinArgs) { TypoCorrection TC; if (FDecl && (TC = TryTypoCorrectionForCall(*this, Fn, FDecl, Args))) { unsigned diag_id = MinArgs == NumParams && !Proto->isVariadic() ? diag::err_typecheck_call_too_few_args_suggest : diag::err_typecheck_call_too_few_args_at_least_suggest; diagnoseTypo(TC, PDiag(diag_id) << FnKind << MinArgs << static_cast(Args.size()) << TC.getCorrectionRange()); } else if (MinArgs == 1 && FDecl && FDecl->getParamDecl(0)->getDeclName()) Diag(RParenLoc, MinArgs == NumParams && !Proto->isVariadic() ? diag::err_typecheck_call_too_few_args_one : diag::err_typecheck_call_too_few_args_at_least_one) << FnKind << FDecl->getParamDecl(0) << Fn->getSourceRange(); else Diag(RParenLoc, MinArgs == NumParams && !Proto->isVariadic() ? diag::err_typecheck_call_too_few_args : diag::err_typecheck_call_too_few_args_at_least) << FnKind << MinArgs << static_cast(Args.size()) << Fn->getSourceRange(); // Emit the location of the prototype. if (!TC && FDecl && !FDecl->getBuiltinID() && !IsExecConfig) Diag(FDecl->getLocStart(), diag::note_callee_decl) << FDecl; return true; } Call->setNumArgs(Context, NumParams); } // If too many are passed and not variadic, error on the extras and drop // them. if (Args.size() > NumParams) { if (!Proto->isVariadic()) { TypoCorrection TC; if (FDecl && (TC = TryTypoCorrectionForCall(*this, Fn, FDecl, Args))) { unsigned diag_id = MinArgs == NumParams && !Proto->isVariadic() ? diag::err_typecheck_call_too_many_args_suggest : diag::err_typecheck_call_too_many_args_at_most_suggest; diagnoseTypo(TC, PDiag(diag_id) << FnKind << NumParams << static_cast(Args.size()) << TC.getCorrectionRange()); } else if (NumParams == 1 && FDecl && FDecl->getParamDecl(0)->getDeclName()) Diag(Args[NumParams]->getLocStart(), MinArgs == NumParams ? diag::err_typecheck_call_too_many_args_one : diag::err_typecheck_call_too_many_args_at_most_one) << FnKind << FDecl->getParamDecl(0) << static_cast(Args.size()) << Fn->getSourceRange() << SourceRange(Args[NumParams]->getLocStart(), Args.back()->getLocEnd()); else Diag(Args[NumParams]->getLocStart(), MinArgs == NumParams ? diag::err_typecheck_call_too_many_args : diag::err_typecheck_call_too_many_args_at_most) << FnKind << NumParams << static_cast(Args.size()) << Fn->getSourceRange() << SourceRange(Args[NumParams]->getLocStart(), Args.back()->getLocEnd()); // Emit the location of the prototype. if (!TC && FDecl && !FDecl->getBuiltinID() && !IsExecConfig) Diag(FDecl->getLocStart(), diag::note_callee_decl) << FDecl; // This deletes the extra arguments. Call->setNumArgs(Context, NumParams); return true; } } SmallVector AllArgs; VariadicCallType CallType = getVariadicCallType(FDecl, Proto, Fn); Invalid = GatherArgumentsForCall(Call->getLocStart(), FDecl, Proto, 0, Args, AllArgs, CallType); if (Invalid) return true; unsigned TotalNumArgs = AllArgs.size(); for (unsigned i = 0; i < TotalNumArgs; ++i) Call->setArg(i, AllArgs[i]); return false; } bool Sema::GatherArgumentsForCall(SourceLocation CallLoc, FunctionDecl *FDecl, const FunctionProtoType *Proto, unsigned FirstParam, ArrayRef Args, SmallVectorImpl &AllArgs, VariadicCallType CallType, bool AllowExplicit, bool IsListInitialization) { unsigned NumParams = Proto->getNumParams(); bool Invalid = false; size_t ArgIx = 0; // Continue to check argument types (even if we have too few/many args). for (unsigned i = FirstParam; i < NumParams; i++) { QualType ProtoArgType = Proto->getParamType(i); Expr *Arg; ParmVarDecl *Param = FDecl ? FDecl->getParamDecl(i) : nullptr; if (ArgIx < Args.size()) { Arg = Args[ArgIx++]; if (RequireCompleteType(Arg->getLocStart(), ProtoArgType, diag::err_call_incomplete_argument, Arg)) return true; // Strip the unbridged-cast placeholder expression off, if applicable. bool CFAudited = false; if (Arg->getType() == Context.ARCUnbridgedCastTy && FDecl && FDecl->hasAttr() && (!Param || !Param->hasAttr())) Arg = stripARCUnbridgedCast(Arg); else if (getLangOpts().ObjCAutoRefCount && FDecl && FDecl->hasAttr() && (!Param || !Param->hasAttr())) CFAudited = true; InitializedEntity Entity = Param ? InitializedEntity::InitializeParameter(Context, Param, ProtoArgType) : InitializedEntity::InitializeParameter( Context, ProtoArgType, Proto->isParamConsumed(i)); // Remember that parameter belongs to a CF audited API. if (CFAudited) Entity.setParameterCFAudited(); ExprResult ArgE = PerformCopyInitialization( Entity, SourceLocation(), Arg, IsListInitialization, AllowExplicit); if (ArgE.isInvalid()) return true; Arg = ArgE.getAs(); } else { assert(Param && "can't use default arguments without a known callee"); ExprResult ArgExpr = BuildCXXDefaultArgExpr(CallLoc, FDecl, Param); if (ArgExpr.isInvalid()) return true; Arg = ArgExpr.getAs(); } // Check for array bounds violations for each argument to the call. This // check only triggers warnings when the argument isn't a more complex Expr // with its own checking, such as a BinaryOperator. CheckArrayAccess(Arg); // Check for violations of C99 static array rules (C99 6.7.5.3p7). CheckStaticArrayArgument(CallLoc, Param, Arg); AllArgs.push_back(Arg); } // If this is a variadic call, handle args passed through "...". if (CallType != VariadicDoesNotApply) { // Assume that extern "C" functions with variadic arguments that // return __unknown_anytype aren't *really* variadic. if (Proto->getReturnType() == Context.UnknownAnyTy && FDecl && FDecl->isExternC()) { for (Expr *A : Args.slice(ArgIx)) { QualType paramType; // ignored ExprResult arg = checkUnknownAnyArg(CallLoc, A, paramType); Invalid |= arg.isInvalid(); AllArgs.push_back(arg.get()); } // Otherwise do argument promotion, (C99 6.5.2.2p7). } else { for (Expr *A : Args.slice(ArgIx)) { ExprResult Arg = DefaultVariadicArgumentPromotion(A, CallType, FDecl); Invalid |= Arg.isInvalid(); AllArgs.push_back(Arg.get()); } } // Check for array bounds violations. for (Expr *A : Args.slice(ArgIx)) CheckArrayAccess(A); } return Invalid; } static void DiagnoseCalleeStaticArrayParam(Sema &S, ParmVarDecl *PVD) { TypeLoc TL = PVD->getTypeSourceInfo()->getTypeLoc(); if (DecayedTypeLoc DTL = TL.getAs()) TL = DTL.getOriginalLoc(); if (ArrayTypeLoc ATL = TL.getAs()) S.Diag(PVD->getLocation(), diag::note_callee_static_array) << ATL.getLocalSourceRange(); } /// CheckStaticArrayArgument - If the given argument corresponds to a static /// array parameter, check that it is non-null, and that if it is formed by /// array-to-pointer decay, the underlying array is sufficiently large. /// /// C99 6.7.5.3p7: If the keyword static also appears within the [ and ] of the /// array type derivation, then for each call to the function, the value of the /// corresponding actual argument shall provide access to the first element of /// an array with at least as many elements as specified by the size expression. void Sema::CheckStaticArrayArgument(SourceLocation CallLoc, ParmVarDecl *Param, const Expr *ArgExpr) { // Static array parameters are not supported in C++. if (!Param || getLangOpts().CPlusPlus) return; QualType OrigTy = Param->getOriginalType(); const ArrayType *AT = Context.getAsArrayType(OrigTy); if (!AT || AT->getSizeModifier() != ArrayType::Static) return; if (ArgExpr->isNullPointerConstant(Context, Expr::NPC_NeverValueDependent)) { Diag(CallLoc, diag::warn_null_arg) << ArgExpr->getSourceRange(); DiagnoseCalleeStaticArrayParam(*this, Param); return; } const ConstantArrayType *CAT = dyn_cast(AT); if (!CAT) return; const ConstantArrayType *ArgCAT = Context.getAsConstantArrayType(ArgExpr->IgnoreParenImpCasts()->getType()); if (!ArgCAT) return; if (ArgCAT->getSize().ult(CAT->getSize())) { Diag(CallLoc, diag::warn_static_array_too_small) << ArgExpr->getSourceRange() << (unsigned) ArgCAT->getSize().getZExtValue() << (unsigned) CAT->getSize().getZExtValue(); DiagnoseCalleeStaticArrayParam(*this, Param); } } /// Given a function expression of unknown-any type, try to rebuild it /// to have a function type. static ExprResult rebuildUnknownAnyFunction(Sema &S, Expr *fn); /// Is the given type a placeholder that we need to lower out /// immediately during argument processing? static bool isPlaceholderToRemoveAsArg(QualType type) { // Placeholders are never sugared. const BuiltinType *placeholder = dyn_cast(type); if (!placeholder) return false; switch (placeholder->getKind()) { // Ignore all the non-placeholder types. #define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \ case BuiltinType::Id: #include "clang/Basic/OpenCLImageTypes.def" #define PLACEHOLDER_TYPE(ID, SINGLETON_ID) #define BUILTIN_TYPE(ID, SINGLETON_ID) case BuiltinType::ID: #include "clang/AST/BuiltinTypes.def" return false; // We cannot lower out overload sets; they might validly be resolved // by the call machinery. case BuiltinType::Overload: return false; // Unbridged casts in ARC can be handled in some call positions and // should be left in place. case BuiltinType::ARCUnbridgedCast: return false; // Pseudo-objects should be converted as soon as possible. case BuiltinType::PseudoObject: return true; // The debugger mode could theoretically but currently does not try // to resolve unknown-typed arguments based on known parameter types. case BuiltinType::UnknownAny: return true; // These are always invalid as call arguments and should be reported. case BuiltinType::BoundMember: case BuiltinType::BuiltinFn: case BuiltinType::OMPArraySection: return true; } llvm_unreachable("bad builtin type kind"); } /// Check an argument list for placeholders that we won't try to /// handle later. static bool checkArgsForPlaceholders(Sema &S, MultiExprArg args) { // Apply this processing to all the arguments at once instead of // dying at the first failure. bool hasInvalid = false; for (size_t i = 0, e = args.size(); i != e; i++) { if (isPlaceholderToRemoveAsArg(args[i]->getType())) { ExprResult result = S.CheckPlaceholderExpr(args[i]); if (result.isInvalid()) hasInvalid = true; else args[i] = result.get(); } else if (hasInvalid) { (void)S.CorrectDelayedTyposInExpr(args[i]); } } return hasInvalid; } /// If a builtin function has a pointer argument with no explicit address /// space, then it should be able to accept a pointer to any address /// space as input. In order to do this, we need to replace the /// standard builtin declaration with one that uses the same address space /// as the call. /// /// \returns nullptr If this builtin is not a candidate for a rewrite i.e. /// it does not contain any pointer arguments without /// an address space qualifer. Otherwise the rewritten /// FunctionDecl is returned. /// TODO: Handle pointer return types. static FunctionDecl *rewriteBuiltinFunctionDecl(Sema *Sema, ASTContext &Context, const FunctionDecl *FDecl, MultiExprArg ArgExprs) { QualType DeclType = FDecl->getType(); const FunctionProtoType *FT = dyn_cast(DeclType); if (!Context.BuiltinInfo.hasPtrArgsOrResult(FDecl->getBuiltinID()) || !FT || FT->isVariadic() || ArgExprs.size() != FT->getNumParams()) return nullptr; bool NeedsNewDecl = false; unsigned i = 0; SmallVector OverloadParams; for (QualType ParamType : FT->param_types()) { // Convert array arguments to pointer to simplify type lookup. ExprResult ArgRes = Sema->DefaultFunctionArrayLvalueConversion(ArgExprs[i++]); if (ArgRes.isInvalid()) return nullptr; Expr *Arg = ArgRes.get(); QualType ArgType = Arg->getType(); if (!ParamType->isPointerType() || ParamType.getQualifiers().hasAddressSpace() || !ArgType->isPointerType() || !ArgType->getPointeeType().getQualifiers().hasAddressSpace()) { OverloadParams.push_back(ParamType); continue; } NeedsNewDecl = true; LangAS AS = ArgType->getPointeeType().getAddressSpace(); QualType PointeeType = ParamType->getPointeeType(); PointeeType = Context.getAddrSpaceQualType(PointeeType, AS); OverloadParams.push_back(Context.getPointerType(PointeeType)); } if (!NeedsNewDecl) return nullptr; FunctionProtoType::ExtProtoInfo EPI; QualType OverloadTy = Context.getFunctionType(FT->getReturnType(), OverloadParams, EPI); DeclContext *Parent = Context.getTranslationUnitDecl(); FunctionDecl *OverloadDecl = FunctionDecl::Create(Context, Parent, FDecl->getLocation(), FDecl->getLocation(), FDecl->getIdentifier(), OverloadTy, /*TInfo=*/nullptr, SC_Extern, false, /*hasPrototype=*/true); SmallVector Params; FT = cast(OverloadTy); for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) { QualType ParamType = FT->getParamType(i); ParmVarDecl *Parm = ParmVarDecl::Create(Context, OverloadDecl, SourceLocation(), SourceLocation(), nullptr, ParamType, /*TInfo=*/nullptr, SC_None, nullptr); Parm->setScopeInfo(0, i); Params.push_back(Parm); } OverloadDecl->setParams(Params); return OverloadDecl; } static void checkDirectCallValidity(Sema &S, const Expr *Fn, FunctionDecl *Callee, MultiExprArg ArgExprs) { // `Callee` (when called with ArgExprs) may be ill-formed. enable_if (and // similar attributes) really don't like it when functions are called with an // invalid number of args. if (S.TooManyArguments(Callee->getNumParams(), ArgExprs.size(), /*PartialOverloading=*/false) && !Callee->isVariadic()) return; if (Callee->getMinRequiredArguments() > ArgExprs.size()) return; if (const EnableIfAttr *Attr = S.CheckEnableIf(Callee, ArgExprs, true)) { S.Diag(Fn->getLocStart(), isa(Callee) ? diag::err_ovl_no_viable_member_function_in_call : diag::err_ovl_no_viable_function_in_call) << Callee << Callee->getSourceRange(); S.Diag(Callee->getLocation(), diag::note_ovl_candidate_disabled_by_function_cond_attr) << Attr->getCond()->getSourceRange() << Attr->getMessage(); return; } } static bool enclosingClassIsRelatedToClassInWhichMembersWereFound( const UnresolvedMemberExpr *const UME, Sema &S) { const auto GetFunctionLevelDCIfCXXClass = [](Sema &S) -> const CXXRecordDecl * { const DeclContext *const DC = S.getFunctionLevelDeclContext(); if (!DC || !DC->getParent()) return nullptr; // If the call to some member function was made from within a member // function body 'M' return return 'M's parent. if (const auto *MD = dyn_cast(DC)) return MD->getParent()->getCanonicalDecl(); // else the call was made from within a default member initializer of a // class, so return the class. if (const auto *RD = dyn_cast(DC)) return RD->getCanonicalDecl(); return nullptr; }; // If our DeclContext is neither a member function nor a class (in the // case of a lambda in a default member initializer), we can't have an // enclosing 'this'. const CXXRecordDecl *const CurParentClass = GetFunctionLevelDCIfCXXClass(S); if (!CurParentClass) return false; // The naming class for implicit member functions call is the class in which // name lookup starts. const CXXRecordDecl *const NamingClass = UME->getNamingClass()->getCanonicalDecl(); assert(NamingClass && "Must have naming class even for implicit access"); // If the unresolved member functions were found in a 'naming class' that is // related (either the same or derived from) to the class that contains the // member function that itself contained the implicit member access. return CurParentClass == NamingClass || CurParentClass->isDerivedFrom(NamingClass); } static void tryImplicitlyCaptureThisIfImplicitMemberFunctionAccessWithDependentArgs( Sema &S, const UnresolvedMemberExpr *const UME, SourceLocation CallLoc) { if (!UME) return; LambdaScopeInfo *const CurLSI = S.getCurLambda(); // Only try and implicitly capture 'this' within a C++ Lambda if it hasn't // already been captured, or if this is an implicit member function call (if // it isn't, an attempt to capture 'this' should already have been made). if (!CurLSI || CurLSI->ImpCaptureStyle == CurLSI->ImpCap_None || !UME->isImplicitAccess() || CurLSI->isCXXThisCaptured()) return; // Check if the naming class in which the unresolved members were found is // related (same as or is a base of) to the enclosing class. if (!enclosingClassIsRelatedToClassInWhichMembersWereFound(UME, S)) return; DeclContext *EnclosingFunctionCtx = S.CurContext->getParent()->getParent(); // If the enclosing function is not dependent, then this lambda is // capture ready, so if we can capture this, do so. if (!EnclosingFunctionCtx->isDependentContext()) { // If the current lambda and all enclosing lambdas can capture 'this' - // then go ahead and capture 'this' (since our unresolved overload set // contains at least one non-static member function). if (!S.CheckCXXThisCapture(CallLoc, /*Explcit*/ false, /*Diagnose*/ false)) S.CheckCXXThisCapture(CallLoc); } else if (S.CurContext->isDependentContext()) { // ... since this is an implicit member reference, that might potentially // involve a 'this' capture, mark 'this' for potential capture in // enclosing lambdas. if (CurLSI->ImpCaptureStyle != CurLSI->ImpCap_None) CurLSI->addPotentialThisCapture(CallLoc); } } /// ActOnCallExpr - Handle a call to Fn with the specified array of arguments. /// This provides the location of the left/right parens and a list of comma /// locations. ExprResult Sema::ActOnCallExpr(Scope *Scope, Expr *Fn, SourceLocation LParenLoc, MultiExprArg ArgExprs, SourceLocation RParenLoc, Expr *ExecConfig, bool IsExecConfig) { // Since this might be a postfix expression, get rid of ParenListExprs. ExprResult Result = MaybeConvertParenListExprToParenExpr(Scope, Fn); if (Result.isInvalid()) return ExprError(); Fn = Result.get(); if (checkArgsForPlaceholders(*this, ArgExprs)) return ExprError(); if (getLangOpts().CPlusPlus) { // If this is a pseudo-destructor expression, build the call immediately. if (isa(Fn)) { if (!ArgExprs.empty()) { // Pseudo-destructor calls should not have any arguments. Diag(Fn->getLocStart(), diag::err_pseudo_dtor_call_with_args) << FixItHint::CreateRemoval( SourceRange(ArgExprs.front()->getLocStart(), ArgExprs.back()->getLocEnd())); } return new (Context) CallExpr(Context, Fn, None, Context.VoidTy, VK_RValue, RParenLoc); } if (Fn->getType() == Context.PseudoObjectTy) { ExprResult result = CheckPlaceholderExpr(Fn); if (result.isInvalid()) return ExprError(); Fn = result.get(); } // Determine whether this is a dependent call inside a C++ template, // in which case we won't do any semantic analysis now. bool Dependent = false; if (Fn->isTypeDependent()) Dependent = true; else if (Expr::hasAnyTypeDependentArguments(ArgExprs)) Dependent = true; if (Dependent) { if (ExecConfig) { return new (Context) CUDAKernelCallExpr( Context, Fn, cast(ExecConfig), ArgExprs, Context.DependentTy, VK_RValue, RParenLoc); } else { tryImplicitlyCaptureThisIfImplicitMemberFunctionAccessWithDependentArgs( *this, dyn_cast(Fn->IgnoreParens()), Fn->getLocStart()); return new (Context) CallExpr( Context, Fn, ArgExprs, Context.DependentTy, VK_RValue, RParenLoc); } } // Determine whether this is a call to an object (C++ [over.call.object]). if (Fn->getType()->isRecordType()) return BuildCallToObjectOfClassType(Scope, Fn, LParenLoc, ArgExprs, RParenLoc); if (Fn->getType() == Context.UnknownAnyTy) { ExprResult result = rebuildUnknownAnyFunction(*this, Fn); if (result.isInvalid()) return ExprError(); Fn = result.get(); } if (Fn->getType() == Context.BoundMemberTy) { return BuildCallToMemberFunction(Scope, Fn, LParenLoc, ArgExprs, RParenLoc); } } // Check for overloaded calls. This can happen even in C due to extensions. if (Fn->getType() == Context.OverloadTy) { OverloadExpr::FindResult find = OverloadExpr::find(Fn); // We aren't supposed to apply this logic if there's an '&' involved. if (!find.HasFormOfMemberPointer) { if (Expr::hasAnyTypeDependentArguments(ArgExprs)) return new (Context) CallExpr( Context, Fn, ArgExprs, Context.DependentTy, VK_RValue, RParenLoc); OverloadExpr *ovl = find.Expression; if (UnresolvedLookupExpr *ULE = dyn_cast(ovl)) return BuildOverloadedCallExpr( Scope, Fn, ULE, LParenLoc, ArgExprs, RParenLoc, ExecConfig, /*AllowTypoCorrection=*/true, find.IsAddressOfOperand); return BuildCallToMemberFunction(Scope, Fn, LParenLoc, ArgExprs, RParenLoc); } } // If we're directly calling a function, get the appropriate declaration. if (Fn->getType() == Context.UnknownAnyTy) { ExprResult result = rebuildUnknownAnyFunction(*this, Fn); if (result.isInvalid()) return ExprError(); Fn = result.get(); } Expr *NakedFn = Fn->IgnoreParens(); bool CallingNDeclIndirectly = false; NamedDecl *NDecl = nullptr; if (UnaryOperator *UnOp = dyn_cast(NakedFn)) { if (UnOp->getOpcode() == UO_AddrOf) { CallingNDeclIndirectly = true; NakedFn = UnOp->getSubExpr()->IgnoreParens(); } } if (isa(NakedFn)) { NDecl = cast(NakedFn)->getDecl(); FunctionDecl *FDecl = dyn_cast(NDecl); if (FDecl && FDecl->getBuiltinID()) { // Rewrite the function decl for this builtin by replacing parameters // with no explicit address space with the address space of the arguments // in ArgExprs. if ((FDecl = rewriteBuiltinFunctionDecl(this, Context, FDecl, ArgExprs))) { NDecl = FDecl; Fn = DeclRefExpr::Create( Context, FDecl->getQualifierLoc(), SourceLocation(), FDecl, false, SourceLocation(), FDecl->getType(), Fn->getValueKind(), FDecl); } } } else if (isa(NakedFn)) NDecl = cast(NakedFn)->getMemberDecl(); if (FunctionDecl *FD = dyn_cast_or_null(NDecl)) { if (CallingNDeclIndirectly && !checkAddressOfFunctionIsAvailable(FD, /*Complain=*/true, Fn->getLocStart())) return ExprError(); if (getLangOpts().OpenCL && checkOpenCLDisabledDecl(*FD, *Fn)) return ExprError(); checkDirectCallValidity(*this, Fn, FD, ArgExprs); } return BuildResolvedCallExpr(Fn, NDecl, LParenLoc, ArgExprs, RParenLoc, ExecConfig, IsExecConfig); } /// ActOnAsTypeExpr - create a new asType (bitcast) from the arguments. /// /// __builtin_astype( value, dst type ) /// ExprResult Sema::ActOnAsTypeExpr(Expr *E, ParsedType ParsedDestTy, SourceLocation BuiltinLoc, SourceLocation RParenLoc) { ExprValueKind VK = VK_RValue; ExprObjectKind OK = OK_Ordinary; QualType DstTy = GetTypeFromParser(ParsedDestTy); QualType SrcTy = E->getType(); if (Context.getTypeSize(DstTy) != Context.getTypeSize(SrcTy)) return ExprError(Diag(BuiltinLoc, diag::err_invalid_astype_of_different_size) << DstTy << SrcTy << E->getSourceRange()); return new (Context) AsTypeExpr(E, DstTy, VK, OK, BuiltinLoc, RParenLoc); } /// ActOnConvertVectorExpr - create a new convert-vector expression from the /// provided arguments. /// /// __builtin_convertvector( value, dst type ) /// ExprResult Sema::ActOnConvertVectorExpr(Expr *E, ParsedType ParsedDestTy, SourceLocation BuiltinLoc, SourceLocation RParenLoc) { TypeSourceInfo *TInfo; GetTypeFromParser(ParsedDestTy, &TInfo); return SemaConvertVectorExpr(E, TInfo, BuiltinLoc, RParenLoc); } /// BuildResolvedCallExpr - Build a call to a resolved expression, /// i.e. an expression not of \p OverloadTy. The expression should /// unary-convert to an expression of function-pointer or /// block-pointer type. /// /// \param NDecl the declaration being called, if available ExprResult Sema::BuildResolvedCallExpr(Expr *Fn, NamedDecl *NDecl, SourceLocation LParenLoc, ArrayRef Args, SourceLocation RParenLoc, Expr *Config, bool IsExecConfig) { FunctionDecl *FDecl = dyn_cast_or_null(NDecl); unsigned BuiltinID = (FDecl ? FDecl->getBuiltinID() : 0); // Functions with 'interrupt' attribute cannot be called directly. if (FDecl && FDecl->hasAttr()) { Diag(Fn->getExprLoc(), diag::err_anyx86_interrupt_called); return ExprError(); } // Interrupt handlers don't save off the VFP regs automatically on ARM, // so there's some risk when calling out to non-interrupt handler functions // that the callee might not preserve them. This is easy to diagnose here, // but can be very challenging to debug. if (auto *Caller = getCurFunctionDecl()) if (Caller->hasAttr()) { bool VFP = Context.getTargetInfo().hasFeature("vfp"); if (VFP && (!FDecl || !FDecl->hasAttr())) Diag(Fn->getExprLoc(), diag::warn_arm_interrupt_calling_convention); } // Promote the function operand. // We special-case function promotion here because we only allow promoting // builtin functions to function pointers in the callee of a call. ExprResult Result; if (BuiltinID && Fn->getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn)) { Result = ImpCastExprToType(Fn, Context.getPointerType(FDecl->getType()), CK_BuiltinFnToFnPtr).get(); } else { Result = CallExprUnaryConversions(Fn); } if (Result.isInvalid()) return ExprError(); Fn = Result.get(); // Make the call expr early, before semantic checks. This guarantees cleanup // of arguments and function on error. CallExpr *TheCall; if (Config) TheCall = new (Context) CUDAKernelCallExpr(Context, Fn, cast(Config), Args, Context.BoolTy, VK_RValue, RParenLoc); else TheCall = new (Context) CallExpr(Context, Fn, Args, Context.BoolTy, VK_RValue, RParenLoc); if (!getLangOpts().CPlusPlus) { // C cannot always handle TypoExpr nodes in builtin calls and direct // function calls as their argument checking don't necessarily handle // dependent types properly, so make sure any TypoExprs have been // dealt with. ExprResult Result = CorrectDelayedTyposInExpr(TheCall); if (!Result.isUsable()) return ExprError(); TheCall = dyn_cast(Result.get()); if (!TheCall) return Result; Args = llvm::makeArrayRef(TheCall->getArgs(), TheCall->getNumArgs()); } // Bail out early if calling a builtin with custom typechecking. if (BuiltinID && Context.BuiltinInfo.hasCustomTypechecking(BuiltinID)) return CheckBuiltinFunctionCall(FDecl, BuiltinID, TheCall); retry: const FunctionType *FuncT; if (const PointerType *PT = Fn->getType()->getAs()) { // C99 6.5.2.2p1 - "The expression that denotes the called function shall // have type pointer to function". FuncT = PT->getPointeeType()->getAs(); if (!FuncT) return ExprError(Diag(LParenLoc, diag::err_typecheck_call_not_function) << Fn->getType() << Fn->getSourceRange()); } else if (const BlockPointerType *BPT = Fn->getType()->getAs()) { FuncT = BPT->getPointeeType()->castAs(); } else { // Handle calls to expressions of unknown-any type. if (Fn->getType() == Context.UnknownAnyTy) { ExprResult rewrite = rebuildUnknownAnyFunction(*this, Fn); if (rewrite.isInvalid()) return ExprError(); Fn = rewrite.get(); TheCall->setCallee(Fn); goto retry; } return ExprError(Diag(LParenLoc, diag::err_typecheck_call_not_function) << Fn->getType() << Fn->getSourceRange()); } if (getLangOpts().CUDA) { if (Config) { // CUDA: Kernel calls must be to global functions if (FDecl && !FDecl->hasAttr()) return ExprError(Diag(LParenLoc,diag::err_kern_call_not_global_function) << FDecl->getName() << Fn->getSourceRange()); // CUDA: Kernel function must have 'void' return type if (!FuncT->getReturnType()->isVoidType()) return ExprError(Diag(LParenLoc, diag::err_kern_type_not_void_return) << Fn->getType() << Fn->getSourceRange()); } else { // CUDA: Calls to global functions must be configured if (FDecl && FDecl->hasAttr()) return ExprError(Diag(LParenLoc, diag::err_global_call_not_config) << FDecl->getName() << Fn->getSourceRange()); } } // Check for a valid return type if (CheckCallReturnType(FuncT->getReturnType(), Fn->getLocStart(), TheCall, FDecl)) return ExprError(); // We know the result type of the call, set it. TheCall->setType(FuncT->getCallResultType(Context)); TheCall->setValueKind(Expr::getValueKindForType(FuncT->getReturnType())); const FunctionProtoType *Proto = dyn_cast(FuncT); if (Proto) { if (ConvertArgumentsForCall(TheCall, Fn, FDecl, Proto, Args, RParenLoc, IsExecConfig)) return ExprError(); } else { assert(isa(FuncT) && "Unknown FunctionType!"); if (FDecl) { // Check if we have too few/too many template arguments, based // on our knowledge of the function definition. const FunctionDecl *Def = nullptr; if (FDecl->hasBody(Def) && Args.size() != Def->param_size()) { Proto = Def->getType()->getAs(); if (!Proto || !(Proto->isVariadic() && Args.size() >= Def->param_size())) Diag(RParenLoc, diag::warn_call_wrong_number_of_arguments) << (Args.size() > Def->param_size()) << FDecl << Fn->getSourceRange(); } // If the function we're calling isn't a function prototype, but we have // a function prototype from a prior declaratiom, use that prototype. if (!FDecl->hasPrototype()) Proto = FDecl->getType()->getAs(); } // Promote the arguments (C99 6.5.2.2p6). for (unsigned i = 0, e = Args.size(); i != e; i++) { Expr *Arg = Args[i]; if (Proto && i < Proto->getNumParams()) { InitializedEntity Entity = InitializedEntity::InitializeParameter( Context, Proto->getParamType(i), Proto->isParamConsumed(i)); ExprResult ArgE = PerformCopyInitialization(Entity, SourceLocation(), Arg); if (ArgE.isInvalid()) return true; Arg = ArgE.getAs(); } else { ExprResult ArgE = DefaultArgumentPromotion(Arg); if (ArgE.isInvalid()) return true; Arg = ArgE.getAs(); } if (RequireCompleteType(Arg->getLocStart(), Arg->getType(), diag::err_call_incomplete_argument, Arg)) return ExprError(); TheCall->setArg(i, Arg); } } if (CXXMethodDecl *Method = dyn_cast_or_null(FDecl)) if (!Method->isStatic()) return ExprError(Diag(LParenLoc, diag::err_member_call_without_object) << Fn->getSourceRange()); // Check for sentinels if (NDecl) DiagnoseSentinelCalls(NDecl, LParenLoc, Args); // Do special checking on direct calls to functions. if (FDecl) { if (CheckFunctionCall(FDecl, TheCall, Proto)) return ExprError(); if (BuiltinID) return CheckBuiltinFunctionCall(FDecl, BuiltinID, TheCall); } else if (NDecl) { if (CheckPointerCall(NDecl, TheCall, Proto)) return ExprError(); } else { if (CheckOtherCall(TheCall, Proto)) return ExprError(); } return MaybeBindToTemporary(TheCall); } ExprResult Sema::ActOnCompoundLiteral(SourceLocation LParenLoc, ParsedType Ty, SourceLocation RParenLoc, Expr *InitExpr) { assert(Ty && "ActOnCompoundLiteral(): missing type"); assert(InitExpr && "ActOnCompoundLiteral(): missing expression"); TypeSourceInfo *TInfo; QualType literalType = GetTypeFromParser(Ty, &TInfo); if (!TInfo) TInfo = Context.getTrivialTypeSourceInfo(literalType); return BuildCompoundLiteralExpr(LParenLoc, TInfo, RParenLoc, InitExpr); } ExprResult Sema::BuildCompoundLiteralExpr(SourceLocation LParenLoc, TypeSourceInfo *TInfo, SourceLocation RParenLoc, Expr *LiteralExpr) { QualType literalType = TInfo->getType(); if (literalType->isArrayType()) { if (RequireCompleteType(LParenLoc, Context.getBaseElementType(literalType), diag::err_illegal_decl_array_incomplete_type, SourceRange(LParenLoc, LiteralExpr->getSourceRange().getEnd()))) return ExprError(); if (literalType->isVariableArrayType()) return ExprError(Diag(LParenLoc, diag::err_variable_object_no_init) << SourceRange(LParenLoc, LiteralExpr->getSourceRange().getEnd())); } else if (!literalType->isDependentType() && RequireCompleteType(LParenLoc, literalType, diag::err_typecheck_decl_incomplete_type, SourceRange(LParenLoc, LiteralExpr->getSourceRange().getEnd()))) return ExprError(); InitializedEntity Entity = InitializedEntity::InitializeCompoundLiteralInit(TInfo); InitializationKind Kind = InitializationKind::CreateCStyleCast(LParenLoc, SourceRange(LParenLoc, RParenLoc), /*InitList=*/true); InitializationSequence InitSeq(*this, Entity, Kind, LiteralExpr); ExprResult Result = InitSeq.Perform(*this, Entity, Kind, LiteralExpr, &literalType); if (Result.isInvalid()) return ExprError(); LiteralExpr = Result.get(); bool isFileScope = !CurContext->isFunctionOrMethod(); if (isFileScope && !LiteralExpr->isTypeDependent() && !LiteralExpr->isValueDependent() && !literalType->isDependentType()) { // 6.5.2.5p3 if (CheckForConstantInitializer(LiteralExpr, literalType)) return ExprError(); } // In C, compound literals are l-values for some reason. // For GCC compatibility, in C++, file-scope array compound literals with // constant initializers are also l-values, and compound literals are // otherwise prvalues. // // (GCC also treats C++ list-initialized file-scope array prvalues with // constant initializers as l-values, but that's non-conforming, so we don't // follow it there.) // // FIXME: It would be better to handle the lvalue cases as materializing and // lifetime-extending a temporary object, but our materialized temporaries // representation only supports lifetime extension from a variable, not "out // of thin air". // FIXME: For C++, we might want to instead lifetime-extend only if a pointer // is bound to the result of applying array-to-pointer decay to the compound // literal. // FIXME: GCC supports compound literals of reference type, which should // obviously have a value kind derived from the kind of reference involved. ExprValueKind VK = (getLangOpts().CPlusPlus && !(isFileScope && literalType->isArrayType())) ? VK_RValue : VK_LValue; return MaybeBindToTemporary( new (Context) CompoundLiteralExpr(LParenLoc, TInfo, literalType, VK, LiteralExpr, isFileScope)); } ExprResult Sema::ActOnInitList(SourceLocation LBraceLoc, MultiExprArg InitArgList, SourceLocation RBraceLoc) { // Immediately handle non-overload placeholders. Overloads can be // resolved contextually, but everything else here can't. for (unsigned I = 0, E = InitArgList.size(); I != E; ++I) { if (InitArgList[I]->getType()->isNonOverloadPlaceholderType()) { ExprResult result = CheckPlaceholderExpr(InitArgList[I]); // Ignore failures; dropping the entire initializer list because // of one failure would be terrible for indexing/etc. if (result.isInvalid()) continue; InitArgList[I] = result.get(); } } // Semantic analysis for initializers is done by ActOnDeclarator() and // CheckInitializer() - it requires knowledge of the object being intialized. InitListExpr *E = new (Context) InitListExpr(Context, LBraceLoc, InitArgList, RBraceLoc); E->setType(Context.VoidTy); // FIXME: just a place holder for now. return E; } /// Do an explicit extend of the given block pointer if we're in ARC. void Sema::maybeExtendBlockObject(ExprResult &E) { assert(E.get()->getType()->isBlockPointerType()); assert(E.get()->isRValue()); // Only do this in an r-value context. if (!getLangOpts().ObjCAutoRefCount) return; E = ImplicitCastExpr::Create(Context, E.get()->getType(), CK_ARCExtendBlockObject, E.get(), /*base path*/ nullptr, VK_RValue); Cleanup.setExprNeedsCleanups(true); } /// Prepare a conversion of the given expression to an ObjC object /// pointer type. CastKind Sema::PrepareCastToObjCObjectPointer(ExprResult &E) { QualType type = E.get()->getType(); if (type->isObjCObjectPointerType()) { return CK_BitCast; } else if (type->isBlockPointerType()) { maybeExtendBlockObject(E); return CK_BlockPointerToObjCPointerCast; } else { assert(type->isPointerType()); return CK_CPointerToObjCPointerCast; } } /// Prepares for a scalar cast, performing all the necessary stages /// except the final cast and returning the kind required. CastKind Sema::PrepareScalarCast(ExprResult &Src, QualType DestTy) { // Both Src and Dest are scalar types, i.e. arithmetic or pointer. // Also, callers should have filtered out the invalid cases with // pointers. Everything else should be possible. QualType SrcTy = Src.get()->getType(); if (Context.hasSameUnqualifiedType(SrcTy, DestTy)) return CK_NoOp; switch (Type::ScalarTypeKind SrcKind = SrcTy->getScalarTypeKind()) { case Type::STK_MemberPointer: llvm_unreachable("member pointer type in C"); case Type::STK_CPointer: case Type::STK_BlockPointer: case Type::STK_ObjCObjectPointer: switch (DestTy->getScalarTypeKind()) { case Type::STK_CPointer: { LangAS SrcAS = SrcTy->getPointeeType().getAddressSpace(); LangAS DestAS = DestTy->getPointeeType().getAddressSpace(); if (SrcAS != DestAS) return CK_AddressSpaceConversion; return CK_BitCast; } case Type::STK_BlockPointer: return (SrcKind == Type::STK_BlockPointer ? CK_BitCast : CK_AnyPointerToBlockPointerCast); case Type::STK_ObjCObjectPointer: if (SrcKind == Type::STK_ObjCObjectPointer) return CK_BitCast; if (SrcKind == Type::STK_CPointer) return CK_CPointerToObjCPointerCast; maybeExtendBlockObject(Src); return CK_BlockPointerToObjCPointerCast; case Type::STK_Bool: return CK_PointerToBoolean; case Type::STK_Integral: return CK_PointerToIntegral; case Type::STK_Floating: case Type::STK_FloatingComplex: case Type::STK_IntegralComplex: case Type::STK_MemberPointer: llvm_unreachable("illegal cast from pointer"); } llvm_unreachable("Should have returned before this"); case Type::STK_Bool: // casting from bool is like casting from an integer case Type::STK_Integral: switch (DestTy->getScalarTypeKind()) { case Type::STK_CPointer: case Type::STK_ObjCObjectPointer: case Type::STK_BlockPointer: if (Src.get()->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull)) return CK_NullToPointer; return CK_IntegralToPointer; case Type::STK_Bool: return CK_IntegralToBoolean; case Type::STK_Integral: return CK_IntegralCast; case Type::STK_Floating: return CK_IntegralToFloating; case Type::STK_IntegralComplex: Src = ImpCastExprToType(Src.get(), DestTy->castAs()->getElementType(), CK_IntegralCast); return CK_IntegralRealToComplex; case Type::STK_FloatingComplex: Src = ImpCastExprToType(Src.get(), DestTy->castAs()->getElementType(), CK_IntegralToFloating); return CK_FloatingRealToComplex; case Type::STK_MemberPointer: llvm_unreachable("member pointer type in C"); } llvm_unreachable("Should have returned before this"); case Type::STK_Floating: switch (DestTy->getScalarTypeKind()) { case Type::STK_Floating: return CK_FloatingCast; case Type::STK_Bool: return CK_FloatingToBoolean; case Type::STK_Integral: return CK_FloatingToIntegral; case Type::STK_FloatingComplex: Src = ImpCastExprToType(Src.get(), DestTy->castAs()->getElementType(), CK_FloatingCast); return CK_FloatingRealToComplex; case Type::STK_IntegralComplex: Src = ImpCastExprToType(Src.get(), DestTy->castAs()->getElementType(), CK_FloatingToIntegral); return CK_IntegralRealToComplex; case Type::STK_CPointer: case Type::STK_ObjCObjectPointer: case Type::STK_BlockPointer: llvm_unreachable("valid float->pointer cast?"); case Type::STK_MemberPointer: llvm_unreachable("member pointer type in C"); } llvm_unreachable("Should have returned before this"); case Type::STK_FloatingComplex: switch (DestTy->getScalarTypeKind()) { case Type::STK_FloatingComplex: return CK_FloatingComplexCast; case Type::STK_IntegralComplex: return CK_FloatingComplexToIntegralComplex; case Type::STK_Floating: { QualType ET = SrcTy->castAs()->getElementType(); if (Context.hasSameType(ET, DestTy)) return CK_FloatingComplexToReal; Src = ImpCastExprToType(Src.get(), ET, CK_FloatingComplexToReal); return CK_FloatingCast; } case Type::STK_Bool: return CK_FloatingComplexToBoolean; case Type::STK_Integral: Src = ImpCastExprToType(Src.get(), SrcTy->castAs()->getElementType(), CK_FloatingComplexToReal); return CK_FloatingToIntegral; case Type::STK_CPointer: case Type::STK_ObjCObjectPointer: case Type::STK_BlockPointer: llvm_unreachable("valid complex float->pointer cast?"); case Type::STK_MemberPointer: llvm_unreachable("member pointer type in C"); } llvm_unreachable("Should have returned before this"); case Type::STK_IntegralComplex: switch (DestTy->getScalarTypeKind()) { case Type::STK_FloatingComplex: return CK_IntegralComplexToFloatingComplex; case Type::STK_IntegralComplex: return CK_IntegralComplexCast; case Type::STK_Integral: { QualType ET = SrcTy->castAs()->getElementType(); if (Context.hasSameType(ET, DestTy)) return CK_IntegralComplexToReal; Src = ImpCastExprToType(Src.get(), ET, CK_IntegralComplexToReal); return CK_IntegralCast; } case Type::STK_Bool: return CK_IntegralComplexToBoolean; case Type::STK_Floating: Src = ImpCastExprToType(Src.get(), SrcTy->castAs()->getElementType(), CK_IntegralComplexToReal); return CK_IntegralToFloating; case Type::STK_CPointer: case Type::STK_ObjCObjectPointer: case Type::STK_BlockPointer: llvm_unreachable("valid complex int->pointer cast?"); case Type::STK_MemberPointer: llvm_unreachable("member pointer type in C"); } llvm_unreachable("Should have returned before this"); } llvm_unreachable("Unhandled scalar cast"); } static bool breakDownVectorType(QualType type, uint64_t &len, QualType &eltType) { // Vectors are simple. if (const VectorType *vecType = type->getAs()) { len = vecType->getNumElements(); eltType = vecType->getElementType(); assert(eltType->isScalarType()); return true; } // We allow lax conversion to and from non-vector types, but only if // they're real types (i.e. non-complex, non-pointer scalar types). if (!type->isRealType()) return false; len = 1; eltType = type; return true; } /// Are the two types lax-compatible vector types? That is, given /// that one of them is a vector, do they have equal storage sizes, /// where the storage size is the number of elements times the element /// size? /// /// This will also return false if either of the types is neither a /// vector nor a real type. bool Sema::areLaxCompatibleVectorTypes(QualType srcTy, QualType destTy) { assert(destTy->isVectorType() || srcTy->isVectorType()); // Disallow lax conversions between scalars and ExtVectors (these // conversions are allowed for other vector types because common headers // depend on them). Most scalar OP ExtVector cases are handled by the // splat path anyway, which does what we want (convert, not bitcast). // What this rules out for ExtVectors is crazy things like char4*float. if (srcTy->isScalarType() && destTy->isExtVectorType()) return false; if (destTy->isScalarType() && srcTy->isExtVectorType()) return false; uint64_t srcLen, destLen; QualType srcEltTy, destEltTy; if (!breakDownVectorType(srcTy, srcLen, srcEltTy)) return false; if (!breakDownVectorType(destTy, destLen, destEltTy)) return false; // ASTContext::getTypeSize will return the size rounded up to a // power of 2, so instead of using that, we need to use the raw // element size multiplied by the element count. uint64_t srcEltSize = Context.getTypeSize(srcEltTy); uint64_t destEltSize = Context.getTypeSize(destEltTy); return (srcLen * srcEltSize == destLen * destEltSize); } /// Is this a legal conversion between two types, one of which is /// known to be a vector type? bool Sema::isLaxVectorConversion(QualType srcTy, QualType destTy) { assert(destTy->isVectorType() || srcTy->isVectorType()); if (!Context.getLangOpts().LaxVectorConversions) return false; return areLaxCompatibleVectorTypes(srcTy, destTy); } bool Sema::CheckVectorCast(SourceRange R, QualType VectorTy, QualType Ty, CastKind &Kind) { assert(VectorTy->isVectorType() && "Not a vector type!"); if (Ty->isVectorType() || Ty->isIntegralType(Context)) { if (!areLaxCompatibleVectorTypes(Ty, VectorTy)) return Diag(R.getBegin(), Ty->isVectorType() ? diag::err_invalid_conversion_between_vectors : diag::err_invalid_conversion_between_vector_and_integer) << VectorTy << Ty << R; } else return Diag(R.getBegin(), diag::err_invalid_conversion_between_vector_and_scalar) << VectorTy << Ty << R; Kind = CK_BitCast; return false; } ExprResult Sema::prepareVectorSplat(QualType VectorTy, Expr *SplattedExpr) { QualType DestElemTy = VectorTy->castAs()->getElementType(); if (DestElemTy == SplattedExpr->getType()) return SplattedExpr; assert(DestElemTy->isFloatingType() || DestElemTy->isIntegralOrEnumerationType()); CastKind CK; if (VectorTy->isExtVectorType() && SplattedExpr->getType()->isBooleanType()) { // OpenCL requires that we convert `true` boolean expressions to -1, but // only when splatting vectors. if (DestElemTy->isFloatingType()) { // To avoid having to have a CK_BooleanToSignedFloating cast kind, we cast // in two steps: boolean to signed integral, then to floating. ExprResult CastExprRes = ImpCastExprToType(SplattedExpr, Context.IntTy, CK_BooleanToSignedIntegral); SplattedExpr = CastExprRes.get(); CK = CK_IntegralToFloating; } else { CK = CK_BooleanToSignedIntegral; } } else { ExprResult CastExprRes = SplattedExpr; CK = PrepareScalarCast(CastExprRes, DestElemTy); if (CastExprRes.isInvalid()) return ExprError(); SplattedExpr = CastExprRes.get(); } return ImpCastExprToType(SplattedExpr, DestElemTy, CK); } ExprResult Sema::CheckExtVectorCast(SourceRange R, QualType DestTy, Expr *CastExpr, CastKind &Kind) { assert(DestTy->isExtVectorType() && "Not an extended vector type!"); QualType SrcTy = CastExpr->getType(); // If SrcTy is a VectorType, the total size must match to explicitly cast to // an ExtVectorType. // In OpenCL, casts between vectors of different types are not allowed. // (See OpenCL 6.2). if (SrcTy->isVectorType()) { if (!areLaxCompatibleVectorTypes(SrcTy, DestTy) || (getLangOpts().OpenCL && !Context.hasSameUnqualifiedType(DestTy, SrcTy))) { Diag(R.getBegin(),diag::err_invalid_conversion_between_ext_vectors) << DestTy << SrcTy << R; return ExprError(); } Kind = CK_BitCast; return CastExpr; } // All non-pointer scalars can be cast to ExtVector type. The appropriate // conversion will take place first from scalar to elt type, and then // splat from elt type to vector. if (SrcTy->isPointerType()) return Diag(R.getBegin(), diag::err_invalid_conversion_between_vector_and_scalar) << DestTy << SrcTy << R; Kind = CK_VectorSplat; return prepareVectorSplat(DestTy, CastExpr); } ExprResult Sema::ActOnCastExpr(Scope *S, SourceLocation LParenLoc, Declarator &D, ParsedType &Ty, SourceLocation RParenLoc, Expr *CastExpr) { assert(!D.isInvalidType() && (CastExpr != nullptr) && "ActOnCastExpr(): missing type or expr"); TypeSourceInfo *castTInfo = GetTypeForDeclaratorCast(D, CastExpr->getType()); if (D.isInvalidType()) return ExprError(); if (getLangOpts().CPlusPlus) { // Check that there are no default arguments (C++ only). CheckExtraCXXDefaultArguments(D); } else { // Make sure any TypoExprs have been dealt with. ExprResult Res = CorrectDelayedTyposInExpr(CastExpr); if (!Res.isUsable()) return ExprError(); CastExpr = Res.get(); } checkUnusedDeclAttributes(D); QualType castType = castTInfo->getType(); Ty = CreateParsedType(castType, castTInfo); bool isVectorLiteral = false; // Check for an altivec or OpenCL literal, // i.e. all the elements are integer constants. ParenExpr *PE = dyn_cast(CastExpr); ParenListExpr *PLE = dyn_cast(CastExpr); if ((getLangOpts().AltiVec || getLangOpts().ZVector || getLangOpts().OpenCL) && castType->isVectorType() && (PE || PLE)) { if (PLE && PLE->getNumExprs() == 0) { Diag(PLE->getExprLoc(), diag::err_altivec_empty_initializer); return ExprError(); } if (PE || PLE->getNumExprs() == 1) { Expr *E = (PE ? PE->getSubExpr() : PLE->getExpr(0)); if (!E->getType()->isVectorType()) isVectorLiteral = true; } else isVectorLiteral = true; } // If this is a vector initializer, '(' type ')' '(' init, ..., init ')' // then handle it as such. if (isVectorLiteral) return BuildVectorLiteral(LParenLoc, RParenLoc, CastExpr, castTInfo); // If the Expr being casted is a ParenListExpr, handle it specially. // This is not an AltiVec-style cast, so turn the ParenListExpr into a // sequence of BinOp comma operators. if (isa(CastExpr)) { ExprResult Result = MaybeConvertParenListExprToParenExpr(S, CastExpr); if (Result.isInvalid()) return ExprError(); CastExpr = Result.get(); } if (getLangOpts().CPlusPlus && !castType->isVoidType() && !getSourceManager().isInSystemMacro(LParenLoc)) Diag(LParenLoc, diag::warn_old_style_cast) << CastExpr->getSourceRange(); CheckTollFreeBridgeCast(castType, CastExpr); CheckObjCBridgeRelatedCast(castType, CastExpr); DiscardMisalignedMemberAddress(castType.getTypePtr(), CastExpr); return BuildCStyleCastExpr(LParenLoc, castTInfo, RParenLoc, CastExpr); } ExprResult Sema::BuildVectorLiteral(SourceLocation LParenLoc, SourceLocation RParenLoc, Expr *E, TypeSourceInfo *TInfo) { assert((isa(E) || isa(E)) && "Expected paren or paren list expression"); Expr **exprs; unsigned numExprs; Expr *subExpr; SourceLocation LiteralLParenLoc, LiteralRParenLoc; if (ParenListExpr *PE = dyn_cast(E)) { LiteralLParenLoc = PE->getLParenLoc(); LiteralRParenLoc = PE->getRParenLoc(); exprs = PE->getExprs(); numExprs = PE->getNumExprs(); } else { // isa by assertion at function entrance LiteralLParenLoc = cast(E)->getLParen(); LiteralRParenLoc = cast(E)->getRParen(); subExpr = cast(E)->getSubExpr(); exprs = &subExpr; numExprs = 1; } QualType Ty = TInfo->getType(); assert(Ty->isVectorType() && "Expected vector type"); SmallVector initExprs; const VectorType *VTy = Ty->getAs(); unsigned numElems = Ty->getAs()->getNumElements(); // '(...)' form of vector initialization in AltiVec: the number of // initializers must be one or must match the size of the vector. // If a single value is specified in the initializer then it will be // replicated to all the components of the vector if (VTy->getVectorKind() == VectorType::AltiVecVector) { // The number of initializers must be one or must match the size of the // vector. If a single value is specified in the initializer then it will // be replicated to all the components of the vector if (numExprs == 1) { QualType ElemTy = Ty->getAs()->getElementType(); ExprResult Literal = DefaultLvalueConversion(exprs[0]); if (Literal.isInvalid()) return ExprError(); Literal = ImpCastExprToType(Literal.get(), ElemTy, PrepareScalarCast(Literal, ElemTy)); return BuildCStyleCastExpr(LParenLoc, TInfo, RParenLoc, Literal.get()); } else if (numExprs < numElems) { Diag(E->getExprLoc(), diag::err_incorrect_number_of_vector_initializers); return ExprError(); } else initExprs.append(exprs, exprs + numExprs); } else { // For OpenCL, when the number of initializers is a single value, // it will be replicated to all components of the vector. if (getLangOpts().OpenCL && VTy->getVectorKind() == VectorType::GenericVector && numExprs == 1) { QualType ElemTy = Ty->getAs()->getElementType(); ExprResult Literal = DefaultLvalueConversion(exprs[0]); if (Literal.isInvalid()) return ExprError(); Literal = ImpCastExprToType(Literal.get(), ElemTy, PrepareScalarCast(Literal, ElemTy)); return BuildCStyleCastExpr(LParenLoc, TInfo, RParenLoc, Literal.get()); } initExprs.append(exprs, exprs + numExprs); } // FIXME: This means that pretty-printing the final AST will produce curly // braces instead of the original commas. InitListExpr *initE = new (Context) InitListExpr(Context, LiteralLParenLoc, initExprs, LiteralRParenLoc); initE->setType(Ty); return BuildCompoundLiteralExpr(LParenLoc, TInfo, RParenLoc, initE); } /// This is not an AltiVec-style cast or or C++ direct-initialization, so turn /// the ParenListExpr into a sequence of comma binary operators. ExprResult Sema::MaybeConvertParenListExprToParenExpr(Scope *S, Expr *OrigExpr) { ParenListExpr *E = dyn_cast(OrigExpr); if (!E) return OrigExpr; ExprResult Result(E->getExpr(0)); for (unsigned i = 1, e = E->getNumExprs(); i != e && !Result.isInvalid(); ++i) Result = ActOnBinOp(S, E->getExprLoc(), tok::comma, Result.get(), E->getExpr(i)); if (Result.isInvalid()) return ExprError(); return ActOnParenExpr(E->getLParenLoc(), E->getRParenLoc(), Result.get()); } ExprResult Sema::ActOnParenListExpr(SourceLocation L, SourceLocation R, MultiExprArg Val) { Expr *expr = new (Context) ParenListExpr(Context, L, Val, R); return expr; } /// \brief Emit a specialized diagnostic when one expression is a null pointer /// constant and the other is not a pointer. Returns true if a diagnostic is /// emitted. bool Sema::DiagnoseConditionalForNull(Expr *LHSExpr, Expr *RHSExpr, SourceLocation QuestionLoc) { Expr *NullExpr = LHSExpr; Expr *NonPointerExpr = RHSExpr; Expr::NullPointerConstantKind NullKind = NullExpr->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNotNull); if (NullKind == Expr::NPCK_NotNull) { NullExpr = RHSExpr; NonPointerExpr = LHSExpr; NullKind = NullExpr->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNotNull); } if (NullKind == Expr::NPCK_NotNull) return false; if (NullKind == Expr::NPCK_ZeroExpression) return false; if (NullKind == Expr::NPCK_ZeroLiteral) { // In this case, check to make sure that we got here from a "NULL" // string in the source code. NullExpr = NullExpr->IgnoreParenImpCasts(); SourceLocation loc = NullExpr->getExprLoc(); if (!findMacroSpelling(loc, "NULL")) return false; } int DiagType = (NullKind == Expr::NPCK_CXX11_nullptr); Diag(QuestionLoc, diag::err_typecheck_cond_incompatible_operands_null) << NonPointerExpr->getType() << DiagType << NonPointerExpr->getSourceRange(); return true; } /// \brief Return false if the condition expression is valid, true otherwise. static bool checkCondition(Sema &S, Expr *Cond, SourceLocation QuestionLoc) { QualType CondTy = Cond->getType(); // OpenCL v1.1 s6.3.i says the condition cannot be a floating point type. if (S.getLangOpts().OpenCL && CondTy->isFloatingType()) { S.Diag(QuestionLoc, diag::err_typecheck_cond_expect_nonfloat) << CondTy << Cond->getSourceRange(); return true; } // C99 6.5.15p2 if (CondTy->isScalarType()) return false; S.Diag(QuestionLoc, diag::err_typecheck_cond_expect_scalar) << CondTy << Cond->getSourceRange(); return true; } /// \brief Handle when one or both operands are void type. static QualType checkConditionalVoidType(Sema &S, ExprResult &LHS, ExprResult &RHS) { Expr *LHSExpr = LHS.get(); Expr *RHSExpr = RHS.get(); if (!LHSExpr->getType()->isVoidType()) S.Diag(RHSExpr->getLocStart(), diag::ext_typecheck_cond_one_void) << RHSExpr->getSourceRange(); if (!RHSExpr->getType()->isVoidType()) S.Diag(LHSExpr->getLocStart(), diag::ext_typecheck_cond_one_void) << LHSExpr->getSourceRange(); LHS = S.ImpCastExprToType(LHS.get(), S.Context.VoidTy, CK_ToVoid); RHS = S.ImpCastExprToType(RHS.get(), S.Context.VoidTy, CK_ToVoid); return S.Context.VoidTy; } /// \brief Return false if the NullExpr can be promoted to PointerTy, /// true otherwise. static bool checkConditionalNullPointer(Sema &S, ExprResult &NullExpr, QualType PointerTy) { if ((!PointerTy->isAnyPointerType() && !PointerTy->isBlockPointerType()) || !NullExpr.get()->isNullPointerConstant(S.Context, Expr::NPC_ValueDependentIsNull)) return true; NullExpr = S.ImpCastExprToType(NullExpr.get(), PointerTy, CK_NullToPointer); return false; } /// \brief Checks compatibility between two pointers and return the resulting /// type. static QualType checkConditionalPointerCompatibility(Sema &S, ExprResult &LHS, ExprResult &RHS, SourceLocation Loc) { QualType LHSTy = LHS.get()->getType(); QualType RHSTy = RHS.get()->getType(); if (S.Context.hasSameType(LHSTy, RHSTy)) { // Two identical pointers types are always compatible. return LHSTy; } QualType lhptee, rhptee; // Get the pointee types. bool IsBlockPointer = false; if (const BlockPointerType *LHSBTy = LHSTy->getAs()) { lhptee = LHSBTy->getPointeeType(); rhptee = RHSTy->castAs()->getPointeeType(); IsBlockPointer = true; } else { lhptee = LHSTy->castAs()->getPointeeType(); rhptee = RHSTy->castAs()->getPointeeType(); } // C99 6.5.15p6: If both operands are pointers to compatible types or to // differently qualified versions of compatible types, the result type is // a pointer to an appropriately qualified version of the composite // type. // Only CVR-qualifiers exist in the standard, and the differently-qualified // clause doesn't make sense for our extensions. E.g. address space 2 should // be incompatible with address space 3: they may live on different devices or // anything. Qualifiers lhQual = lhptee.getQualifiers(); Qualifiers rhQual = rhptee.getQualifiers(); LangAS ResultAddrSpace = LangAS::Default; LangAS LAddrSpace = lhQual.getAddressSpace(); LangAS RAddrSpace = rhQual.getAddressSpace(); if (S.getLangOpts().OpenCL) { // OpenCL v1.1 s6.5 - Conversion between pointers to distinct address // spaces is disallowed. if (lhQual.isAddressSpaceSupersetOf(rhQual)) ResultAddrSpace = LAddrSpace; else if (rhQual.isAddressSpaceSupersetOf(lhQual)) ResultAddrSpace = RAddrSpace; else { S.Diag(Loc, diag::err_typecheck_op_on_nonoverlapping_address_space_pointers) << LHSTy << RHSTy << 2 << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } } unsigned MergedCVRQual = lhQual.getCVRQualifiers() | rhQual.getCVRQualifiers(); auto LHSCastKind = CK_BitCast, RHSCastKind = CK_BitCast; lhQual.removeCVRQualifiers(); rhQual.removeCVRQualifiers(); // OpenCL v2.0 specification doesn't extend compatibility of type qualifiers // (C99 6.7.3) for address spaces. We assume that the check should behave in // the same manner as it's defined for CVR qualifiers, so for OpenCL two // qual types are compatible iff // * corresponded types are compatible // * CVR qualifiers are equal // * address spaces are equal // Thus for conditional operator we merge CVR and address space unqualified // pointees and if there is a composite type we return a pointer to it with // merged qualifiers. if (S.getLangOpts().OpenCL) { LHSCastKind = LAddrSpace == ResultAddrSpace ? CK_BitCast : CK_AddressSpaceConversion; RHSCastKind = RAddrSpace == ResultAddrSpace ? CK_BitCast : CK_AddressSpaceConversion; lhQual.removeAddressSpace(); rhQual.removeAddressSpace(); } lhptee = S.Context.getQualifiedType(lhptee.getUnqualifiedType(), lhQual); rhptee = S.Context.getQualifiedType(rhptee.getUnqualifiedType(), rhQual); QualType CompositeTy = S.Context.mergeTypes(lhptee, rhptee); if (CompositeTy.isNull()) { // In this situation, we assume void* type. No especially good // reason, but this is what gcc does, and we do have to pick // to get a consistent AST. QualType incompatTy; incompatTy = S.Context.getPointerType( S.Context.getAddrSpaceQualType(S.Context.VoidTy, ResultAddrSpace)); LHS = S.ImpCastExprToType(LHS.get(), incompatTy, LHSCastKind); RHS = S.ImpCastExprToType(RHS.get(), incompatTy, RHSCastKind); // FIXME: For OpenCL the warning emission and cast to void* leaves a room // for casts between types with incompatible address space qualifiers. // For the following code the compiler produces casts between global and // local address spaces of the corresponded innermost pointees: // local int *global *a; // global int *global *b; // a = (0 ? a : b); // see C99 6.5.16.1.p1. S.Diag(Loc, diag::ext_typecheck_cond_incompatible_pointers) << LHSTy << RHSTy << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return incompatTy; } // The pointer types are compatible. // In case of OpenCL ResultTy should have the address space qualifier // which is a superset of address spaces of both the 2nd and the 3rd // operands of the conditional operator. QualType ResultTy = [&, ResultAddrSpace]() { if (S.getLangOpts().OpenCL) { Qualifiers CompositeQuals = CompositeTy.getQualifiers(); CompositeQuals.setAddressSpace(ResultAddrSpace); return S.Context .getQualifiedType(CompositeTy.getUnqualifiedType(), CompositeQuals) .withCVRQualifiers(MergedCVRQual); } return CompositeTy.withCVRQualifiers(MergedCVRQual); }(); if (IsBlockPointer) ResultTy = S.Context.getBlockPointerType(ResultTy); else ResultTy = S.Context.getPointerType(ResultTy); LHS = S.ImpCastExprToType(LHS.get(), ResultTy, LHSCastKind); RHS = S.ImpCastExprToType(RHS.get(), ResultTy, RHSCastKind); return ResultTy; } /// \brief Return the resulting type when the operands are both block pointers. static QualType checkConditionalBlockPointerCompatibility(Sema &S, ExprResult &LHS, ExprResult &RHS, SourceLocation Loc) { QualType LHSTy = LHS.get()->getType(); QualType RHSTy = RHS.get()->getType(); if (!LHSTy->isBlockPointerType() || !RHSTy->isBlockPointerType()) { if (LHSTy->isVoidPointerType() || RHSTy->isVoidPointerType()) { QualType destType = S.Context.getPointerType(S.Context.VoidTy); LHS = S.ImpCastExprToType(LHS.get(), destType, CK_BitCast); RHS = S.ImpCastExprToType(RHS.get(), destType, CK_BitCast); return destType; } S.Diag(Loc, diag::err_typecheck_cond_incompatible_operands) << LHSTy << RHSTy << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } // We have 2 block pointer types. return checkConditionalPointerCompatibility(S, LHS, RHS, Loc); } /// \brief Return the resulting type when the operands are both pointers. static QualType checkConditionalObjectPointersCompatibility(Sema &S, ExprResult &LHS, ExprResult &RHS, SourceLocation Loc) { // get the pointer types QualType LHSTy = LHS.get()->getType(); QualType RHSTy = RHS.get()->getType(); // get the "pointed to" types QualType lhptee = LHSTy->getAs()->getPointeeType(); QualType rhptee = RHSTy->getAs()->getPointeeType(); // ignore qualifiers on void (C99 6.5.15p3, clause 6) if (lhptee->isVoidType() && rhptee->isIncompleteOrObjectType()) { // Figure out necessary qualifiers (C99 6.5.15p6) QualType destPointee = S.Context.getQualifiedType(lhptee, rhptee.getQualifiers()); QualType destType = S.Context.getPointerType(destPointee); // Add qualifiers if necessary. LHS = S.ImpCastExprToType(LHS.get(), destType, CK_NoOp); // Promote to void*. RHS = S.ImpCastExprToType(RHS.get(), destType, CK_BitCast); return destType; } if (rhptee->isVoidType() && lhptee->isIncompleteOrObjectType()) { QualType destPointee = S.Context.getQualifiedType(rhptee, lhptee.getQualifiers()); QualType destType = S.Context.getPointerType(destPointee); // Add qualifiers if necessary. RHS = S.ImpCastExprToType(RHS.get(), destType, CK_NoOp); // Promote to void*. LHS = S.ImpCastExprToType(LHS.get(), destType, CK_BitCast); return destType; } return checkConditionalPointerCompatibility(S, LHS, RHS, Loc); } /// \brief Return false if the first expression is not an integer and the second /// expression is not a pointer, true otherwise. static bool checkPointerIntegerMismatch(Sema &S, ExprResult &Int, Expr* PointerExpr, SourceLocation Loc, bool IsIntFirstExpr) { if (!PointerExpr->getType()->isPointerType() || !Int.get()->getType()->isIntegerType()) return false; Expr *Expr1 = IsIntFirstExpr ? Int.get() : PointerExpr; Expr *Expr2 = IsIntFirstExpr ? PointerExpr : Int.get(); S.Diag(Loc, diag::ext_typecheck_cond_pointer_integer_mismatch) << Expr1->getType() << Expr2->getType() << Expr1->getSourceRange() << Expr2->getSourceRange(); Int = S.ImpCastExprToType(Int.get(), PointerExpr->getType(), CK_IntegralToPointer); return true; } /// \brief Simple conversion between integer and floating point types. /// /// Used when handling the OpenCL conditional operator where the /// condition is a vector while the other operands are scalar. /// /// OpenCL v1.1 s6.3.i and s6.11.6 together require that the scalar /// types are either integer or floating type. Between the two /// operands, the type with the higher rank is defined as the "result /// type". The other operand needs to be promoted to the same type. No /// other type promotion is allowed. We cannot use /// UsualArithmeticConversions() for this purpose, since it always /// promotes promotable types. static QualType OpenCLArithmeticConversions(Sema &S, ExprResult &LHS, ExprResult &RHS, SourceLocation QuestionLoc) { LHS = S.DefaultFunctionArrayLvalueConversion(LHS.get()); if (LHS.isInvalid()) return QualType(); RHS = S.DefaultFunctionArrayLvalueConversion(RHS.get()); if (RHS.isInvalid()) return QualType(); // For conversion purposes, we ignore any qualifiers. // For example, "const float" and "float" are equivalent. QualType LHSType = S.Context.getCanonicalType(LHS.get()->getType()).getUnqualifiedType(); QualType RHSType = S.Context.getCanonicalType(RHS.get()->getType()).getUnqualifiedType(); if (!LHSType->isIntegerType() && !LHSType->isRealFloatingType()) { S.Diag(QuestionLoc, diag::err_typecheck_cond_expect_int_float) << LHSType << LHS.get()->getSourceRange(); return QualType(); } if (!RHSType->isIntegerType() && !RHSType->isRealFloatingType()) { S.Diag(QuestionLoc, diag::err_typecheck_cond_expect_int_float) << RHSType << RHS.get()->getSourceRange(); return QualType(); } // If both types are identical, no conversion is needed. if (LHSType == RHSType) return LHSType; // Now handle "real" floating types (i.e. float, double, long double). if (LHSType->isRealFloatingType() || RHSType->isRealFloatingType()) return handleFloatConversion(S, LHS, RHS, LHSType, RHSType, /*IsCompAssign = */ false); // Finally, we have two differing integer types. return handleIntegerConversion (S, LHS, RHS, LHSType, RHSType, /*IsCompAssign = */ false); } /// \brief Convert scalar operands to a vector that matches the /// condition in length. /// /// Used when handling the OpenCL conditional operator where the /// condition is a vector while the other operands are scalar. /// /// We first compute the "result type" for the scalar operands /// according to OpenCL v1.1 s6.3.i. Both operands are then converted /// into a vector of that type where the length matches the condition /// vector type. s6.11.6 requires that the element types of the result /// and the condition must have the same number of bits. static QualType OpenCLConvertScalarsToVectors(Sema &S, ExprResult &LHS, ExprResult &RHS, QualType CondTy, SourceLocation QuestionLoc) { QualType ResTy = OpenCLArithmeticConversions(S, LHS, RHS, QuestionLoc); if (ResTy.isNull()) return QualType(); const VectorType *CV = CondTy->getAs(); assert(CV); // Determine the vector result type unsigned NumElements = CV->getNumElements(); QualType VectorTy = S.Context.getExtVectorType(ResTy, NumElements); // Ensure that all types have the same number of bits if (S.Context.getTypeSize(CV->getElementType()) != S.Context.getTypeSize(ResTy)) { // Since VectorTy is created internally, it does not pretty print // with an OpenCL name. Instead, we just print a description. std::string EleTyName = ResTy.getUnqualifiedType().getAsString(); SmallString<64> Str; llvm::raw_svector_ostream OS(Str); OS << "(vector of " << NumElements << " '" << EleTyName << "' values)"; S.Diag(QuestionLoc, diag::err_conditional_vector_element_size) << CondTy << OS.str(); return QualType(); } // Convert operands to the vector result type LHS = S.ImpCastExprToType(LHS.get(), VectorTy, CK_VectorSplat); RHS = S.ImpCastExprToType(RHS.get(), VectorTy, CK_VectorSplat); return VectorTy; } /// \brief Return false if this is a valid OpenCL condition vector static bool checkOpenCLConditionVector(Sema &S, Expr *Cond, SourceLocation QuestionLoc) { // OpenCL v1.1 s6.11.6 says the elements of the vector must be of // integral type. const VectorType *CondTy = Cond->getType()->getAs(); assert(CondTy); QualType EleTy = CondTy->getElementType(); if (EleTy->isIntegerType()) return false; S.Diag(QuestionLoc, diag::err_typecheck_cond_expect_nonfloat) << Cond->getType() << Cond->getSourceRange(); return true; } /// \brief Return false if the vector condition type and the vector /// result type are compatible. /// /// OpenCL v1.1 s6.11.6 requires that both vector types have the same /// number of elements, and their element types have the same number /// of bits. static bool checkVectorResult(Sema &S, QualType CondTy, QualType VecResTy, SourceLocation QuestionLoc) { const VectorType *CV = CondTy->getAs(); const VectorType *RV = VecResTy->getAs(); assert(CV && RV); if (CV->getNumElements() != RV->getNumElements()) { S.Diag(QuestionLoc, diag::err_conditional_vector_size) << CondTy << VecResTy; return true; } QualType CVE = CV->getElementType(); QualType RVE = RV->getElementType(); if (S.Context.getTypeSize(CVE) != S.Context.getTypeSize(RVE)) { S.Diag(QuestionLoc, diag::err_conditional_vector_element_size) << CondTy << VecResTy; return true; } return false; } /// \brief Return the resulting type for the conditional operator in /// OpenCL (aka "ternary selection operator", OpenCL v1.1 /// s6.3.i) when the condition is a vector type. static QualType OpenCLCheckVectorConditional(Sema &S, ExprResult &Cond, ExprResult &LHS, ExprResult &RHS, SourceLocation QuestionLoc) { Cond = S.DefaultFunctionArrayLvalueConversion(Cond.get()); if (Cond.isInvalid()) return QualType(); QualType CondTy = Cond.get()->getType(); if (checkOpenCLConditionVector(S, Cond.get(), QuestionLoc)) return QualType(); // If either operand is a vector then find the vector type of the // result as specified in OpenCL v1.1 s6.3.i. if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) { QualType VecResTy = S.CheckVectorOperands(LHS, RHS, QuestionLoc, /*isCompAssign*/false, /*AllowBothBool*/true, /*AllowBoolConversions*/false); if (VecResTy.isNull()) return QualType(); // The result type must match the condition type as specified in // OpenCL v1.1 s6.11.6. if (checkVectorResult(S, CondTy, VecResTy, QuestionLoc)) return QualType(); return VecResTy; } // Both operands are scalar. return OpenCLConvertScalarsToVectors(S, LHS, RHS, CondTy, QuestionLoc); } /// \brief Return true if the Expr is block type static bool checkBlockType(Sema &S, const Expr *E) { if (const CallExpr *CE = dyn_cast(E)) { QualType Ty = CE->getCallee()->getType(); if (Ty->isBlockPointerType()) { S.Diag(E->getExprLoc(), diag::err_opencl_ternary_with_block); return true; } } return false; } /// Note that LHS is not null here, even if this is the gnu "x ?: y" extension. /// In that case, LHS = cond. /// C99 6.5.15 QualType Sema::CheckConditionalOperands(ExprResult &Cond, ExprResult &LHS, ExprResult &RHS, ExprValueKind &VK, ExprObjectKind &OK, SourceLocation QuestionLoc) { ExprResult LHSResult = CheckPlaceholderExpr(LHS.get()); if (!LHSResult.isUsable()) return QualType(); LHS = LHSResult; ExprResult RHSResult = CheckPlaceholderExpr(RHS.get()); if (!RHSResult.isUsable()) return QualType(); RHS = RHSResult; // C++ is sufficiently different to merit its own checker. if (getLangOpts().CPlusPlus) return CXXCheckConditionalOperands(Cond, LHS, RHS, VK, OK, QuestionLoc); VK = VK_RValue; OK = OK_Ordinary; // The OpenCL operator with a vector condition is sufficiently // different to merit its own checker. if (getLangOpts().OpenCL && Cond.get()->getType()->isVectorType()) return OpenCLCheckVectorConditional(*this, Cond, LHS, RHS, QuestionLoc); // First, check the condition. Cond = UsualUnaryConversions(Cond.get()); if (Cond.isInvalid()) return QualType(); if (checkCondition(*this, Cond.get(), QuestionLoc)) return QualType(); // Now check the two expressions. if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) return CheckVectorOperands(LHS, RHS, QuestionLoc, /*isCompAssign*/false, /*AllowBothBool*/true, /*AllowBoolConversions*/false); QualType ResTy = UsualArithmeticConversions(LHS, RHS); if (LHS.isInvalid() || RHS.isInvalid()) return QualType(); QualType LHSTy = LHS.get()->getType(); QualType RHSTy = RHS.get()->getType(); // Diagnose attempts to convert between __float128 and long double where // such conversions currently can't be handled. if (unsupportedTypeConversion(*this, LHSTy, RHSTy)) { Diag(QuestionLoc, diag::err_typecheck_cond_incompatible_operands) << LHSTy << RHSTy << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } // OpenCL v2.0 s6.12.5 - Blocks cannot be used as expressions of the ternary // selection operator (?:). if (getLangOpts().OpenCL && (checkBlockType(*this, LHS.get()) | checkBlockType(*this, RHS.get()))) { return QualType(); } // If both operands have arithmetic type, do the usual arithmetic conversions // to find a common type: C99 6.5.15p3,5. if (LHSTy->isArithmeticType() && RHSTy->isArithmeticType()) { LHS = ImpCastExprToType(LHS.get(), ResTy, PrepareScalarCast(LHS, ResTy)); RHS = ImpCastExprToType(RHS.get(), ResTy, PrepareScalarCast(RHS, ResTy)); return ResTy; } // If both operands are the same structure or union type, the result is that // type. if (const RecordType *LHSRT = LHSTy->getAs()) { // C99 6.5.15p3 if (const RecordType *RHSRT = RHSTy->getAs()) if (LHSRT->getDecl() == RHSRT->getDecl()) // "If both the operands have structure or union type, the result has // that type." This implies that CV qualifiers are dropped. return LHSTy.getUnqualifiedType(); // FIXME: Type of conditional expression must be complete in C mode. } // C99 6.5.15p5: "If both operands have void type, the result has void type." // The following || allows only one side to be void (a GCC-ism). if (LHSTy->isVoidType() || RHSTy->isVoidType()) { return checkConditionalVoidType(*this, LHS, RHS); } // C99 6.5.15p6 - "if one operand is a null pointer constant, the result has // the type of the other operand." if (!checkConditionalNullPointer(*this, RHS, LHSTy)) return LHSTy; if (!checkConditionalNullPointer(*this, LHS, RHSTy)) return RHSTy; // All objective-c pointer type analysis is done here. QualType compositeType = FindCompositeObjCPointerType(LHS, RHS, QuestionLoc); if (LHS.isInvalid() || RHS.isInvalid()) return QualType(); if (!compositeType.isNull()) return compositeType; // Handle block pointer types. if (LHSTy->isBlockPointerType() || RHSTy->isBlockPointerType()) return checkConditionalBlockPointerCompatibility(*this, LHS, RHS, QuestionLoc); // Check constraints for C object pointers types (C99 6.5.15p3,6). if (LHSTy->isPointerType() && RHSTy->isPointerType()) return checkConditionalObjectPointersCompatibility(*this, LHS, RHS, QuestionLoc); // GCC compatibility: soften pointer/integer mismatch. Note that // null pointers have been filtered out by this point. if (checkPointerIntegerMismatch(*this, LHS, RHS.get(), QuestionLoc, /*isIntFirstExpr=*/true)) return RHSTy; if (checkPointerIntegerMismatch(*this, RHS, LHS.get(), QuestionLoc, /*isIntFirstExpr=*/false)) return LHSTy; // Emit a better diagnostic if one of the expressions is a null pointer // constant and the other is not a pointer type. In this case, the user most // likely forgot to take the address of the other expression. if (DiagnoseConditionalForNull(LHS.get(), RHS.get(), QuestionLoc)) return QualType(); // Otherwise, the operands are not compatible. Diag(QuestionLoc, diag::err_typecheck_cond_incompatible_operands) << LHSTy << RHSTy << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } /// FindCompositeObjCPointerType - Helper method to find composite type of /// two objective-c pointer types of the two input expressions. QualType Sema::FindCompositeObjCPointerType(ExprResult &LHS, ExprResult &RHS, SourceLocation QuestionLoc) { QualType LHSTy = LHS.get()->getType(); QualType RHSTy = RHS.get()->getType(); // Handle things like Class and struct objc_class*. Here we case the result // to the pseudo-builtin, because that will be implicitly cast back to the // redefinition type if an attempt is made to access its fields. if (LHSTy->isObjCClassType() && (Context.hasSameType(RHSTy, Context.getObjCClassRedefinitionType()))) { RHS = ImpCastExprToType(RHS.get(), LHSTy, CK_CPointerToObjCPointerCast); return LHSTy; } if (RHSTy->isObjCClassType() && (Context.hasSameType(LHSTy, Context.getObjCClassRedefinitionType()))) { LHS = ImpCastExprToType(LHS.get(), RHSTy, CK_CPointerToObjCPointerCast); return RHSTy; } // And the same for struct objc_object* / id if (LHSTy->isObjCIdType() && (Context.hasSameType(RHSTy, Context.getObjCIdRedefinitionType()))) { RHS = ImpCastExprToType(RHS.get(), LHSTy, CK_CPointerToObjCPointerCast); return LHSTy; } if (RHSTy->isObjCIdType() && (Context.hasSameType(LHSTy, Context.getObjCIdRedefinitionType()))) { LHS = ImpCastExprToType(LHS.get(), RHSTy, CK_CPointerToObjCPointerCast); return RHSTy; } // And the same for struct objc_selector* / SEL if (Context.isObjCSelType(LHSTy) && (Context.hasSameType(RHSTy, Context.getObjCSelRedefinitionType()))) { RHS = ImpCastExprToType(RHS.get(), LHSTy, CK_BitCast); return LHSTy; } if (Context.isObjCSelType(RHSTy) && (Context.hasSameType(LHSTy, Context.getObjCSelRedefinitionType()))) { LHS = ImpCastExprToType(LHS.get(), RHSTy, CK_BitCast); return RHSTy; } // Check constraints for Objective-C object pointers types. if (LHSTy->isObjCObjectPointerType() && RHSTy->isObjCObjectPointerType()) { if (Context.getCanonicalType(LHSTy) == Context.getCanonicalType(RHSTy)) { // Two identical object pointer types are always compatible. return LHSTy; } const ObjCObjectPointerType *LHSOPT = LHSTy->castAs(); const ObjCObjectPointerType *RHSOPT = RHSTy->castAs(); QualType compositeType = LHSTy; // If both operands are interfaces and either operand can be // assigned to the other, use that type as the composite // type. This allows // xxx ? (A*) a : (B*) b // where B is a subclass of A. // // Additionally, as for assignment, if either type is 'id' // allow silent coercion. Finally, if the types are // incompatible then make sure to use 'id' as the composite // type so the result is acceptable for sending messages to. // FIXME: Consider unifying with 'areComparableObjCPointerTypes'. // It could return the composite type. if (!(compositeType = Context.areCommonBaseCompatible(LHSOPT, RHSOPT)).isNull()) { // Nothing more to do. } else if (Context.canAssignObjCInterfaces(LHSOPT, RHSOPT)) { compositeType = RHSOPT->isObjCBuiltinType() ? RHSTy : LHSTy; } else if (Context.canAssignObjCInterfaces(RHSOPT, LHSOPT)) { compositeType = LHSOPT->isObjCBuiltinType() ? LHSTy : RHSTy; } else if ((LHSTy->isObjCQualifiedIdType() || RHSTy->isObjCQualifiedIdType()) && Context.ObjCQualifiedIdTypesAreCompatible(LHSTy, RHSTy, true)) { // Need to handle "id" explicitly. // GCC allows qualified id and any Objective-C type to devolve to // id. Currently localizing to here until clear this should be // part of ObjCQualifiedIdTypesAreCompatible. compositeType = Context.getObjCIdType(); } else if (LHSTy->isObjCIdType() || RHSTy->isObjCIdType()) { compositeType = Context.getObjCIdType(); } else { Diag(QuestionLoc, diag::ext_typecheck_cond_incompatible_operands) << LHSTy << RHSTy << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); QualType incompatTy = Context.getObjCIdType(); LHS = ImpCastExprToType(LHS.get(), incompatTy, CK_BitCast); RHS = ImpCastExprToType(RHS.get(), incompatTy, CK_BitCast); return incompatTy; } // The object pointer types are compatible. LHS = ImpCastExprToType(LHS.get(), compositeType, CK_BitCast); RHS = ImpCastExprToType(RHS.get(), compositeType, CK_BitCast); return compositeType; } // Check Objective-C object pointer types and 'void *' if (LHSTy->isVoidPointerType() && RHSTy->isObjCObjectPointerType()) { if (getLangOpts().ObjCAutoRefCount) { // ARC forbids the implicit conversion of object pointers to 'void *', // so these types are not compatible. Diag(QuestionLoc, diag::err_cond_voidptr_arc) << LHSTy << RHSTy << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); LHS = RHS = true; return QualType(); } QualType lhptee = LHSTy->getAs()->getPointeeType(); QualType rhptee = RHSTy->getAs()->getPointeeType(); QualType destPointee = Context.getQualifiedType(lhptee, rhptee.getQualifiers()); QualType destType = Context.getPointerType(destPointee); // Add qualifiers if necessary. LHS = ImpCastExprToType(LHS.get(), destType, CK_NoOp); // Promote to void*. RHS = ImpCastExprToType(RHS.get(), destType, CK_BitCast); return destType; } if (LHSTy->isObjCObjectPointerType() && RHSTy->isVoidPointerType()) { if (getLangOpts().ObjCAutoRefCount) { // ARC forbids the implicit conversion of object pointers to 'void *', // so these types are not compatible. Diag(QuestionLoc, diag::err_cond_voidptr_arc) << LHSTy << RHSTy << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); LHS = RHS = true; return QualType(); } QualType lhptee = LHSTy->getAs()->getPointeeType(); QualType rhptee = RHSTy->getAs()->getPointeeType(); QualType destPointee = Context.getQualifiedType(rhptee, lhptee.getQualifiers()); QualType destType = Context.getPointerType(destPointee); // Add qualifiers if necessary. RHS = ImpCastExprToType(RHS.get(), destType, CK_NoOp); // Promote to void*. LHS = ImpCastExprToType(LHS.get(), destType, CK_BitCast); return destType; } return QualType(); } /// SuggestParentheses - Emit a note with a fixit hint that wraps /// ParenRange in parentheses. static void SuggestParentheses(Sema &Self, SourceLocation Loc, const PartialDiagnostic &Note, SourceRange ParenRange) { SourceLocation EndLoc = Self.getLocForEndOfToken(ParenRange.getEnd()); if (ParenRange.getBegin().isFileID() && ParenRange.getEnd().isFileID() && EndLoc.isValid()) { Self.Diag(Loc, Note) << FixItHint::CreateInsertion(ParenRange.getBegin(), "(") << FixItHint::CreateInsertion(EndLoc, ")"); } else { // We can't display the parentheses, so just show the bare note. Self.Diag(Loc, Note) << ParenRange; } } static bool IsArithmeticOp(BinaryOperatorKind Opc) { return BinaryOperator::isAdditiveOp(Opc) || BinaryOperator::isMultiplicativeOp(Opc) || BinaryOperator::isShiftOp(Opc); } /// IsArithmeticBinaryExpr - Returns true if E is an arithmetic binary /// expression, either using a built-in or overloaded operator, /// and sets *OpCode to the opcode and *RHSExprs to the right-hand side /// expression. static bool IsArithmeticBinaryExpr(Expr *E, BinaryOperatorKind *Opcode, Expr **RHSExprs) { // Don't strip parenthesis: we should not warn if E is in parenthesis. E = E->IgnoreImpCasts(); E = E->IgnoreConversionOperator(); E = E->IgnoreImpCasts(); // Built-in binary operator. if (BinaryOperator *OP = dyn_cast(E)) { if (IsArithmeticOp(OP->getOpcode())) { *Opcode = OP->getOpcode(); *RHSExprs = OP->getRHS(); return true; } } // Overloaded operator. if (CXXOperatorCallExpr *Call = dyn_cast(E)) { if (Call->getNumArgs() != 2) return false; // Make sure this is really a binary operator that is safe to pass into // BinaryOperator::getOverloadedOpcode(), e.g. it's not a subscript op. OverloadedOperatorKind OO = Call->getOperator(); if (OO < OO_Plus || OO > OO_Arrow || OO == OO_PlusPlus || OO == OO_MinusMinus) return false; BinaryOperatorKind OpKind = BinaryOperator::getOverloadedOpcode(OO); if (IsArithmeticOp(OpKind)) { *Opcode = OpKind; *RHSExprs = Call->getArg(1); return true; } } return false; } /// ExprLooksBoolean - Returns true if E looks boolean, i.e. it has boolean type /// or is a logical expression such as (x==y) which has int type, but is /// commonly interpreted as boolean. static bool ExprLooksBoolean(Expr *E) { E = E->IgnoreParenImpCasts(); if (E->getType()->isBooleanType()) return true; if (BinaryOperator *OP = dyn_cast(E)) return OP->isComparisonOp() || OP->isLogicalOp(); if (UnaryOperator *OP = dyn_cast(E)) return OP->getOpcode() == UO_LNot; if (E->getType()->isPointerType()) return true; return false; } /// DiagnoseConditionalPrecedence - Emit a warning when a conditional operator /// and binary operator are mixed in a way that suggests the programmer assumed /// the conditional operator has higher precedence, for example: /// "int x = a + someBinaryCondition ? 1 : 2". static void DiagnoseConditionalPrecedence(Sema &Self, SourceLocation OpLoc, Expr *Condition, Expr *LHSExpr, Expr *RHSExpr) { BinaryOperatorKind CondOpcode; Expr *CondRHS; if (!IsArithmeticBinaryExpr(Condition, &CondOpcode, &CondRHS)) return; if (!ExprLooksBoolean(CondRHS)) return; // The condition is an arithmetic binary expression, with a right- // hand side that looks boolean, so warn. Self.Diag(OpLoc, diag::warn_precedence_conditional) << Condition->getSourceRange() << BinaryOperator::getOpcodeStr(CondOpcode); SuggestParentheses(Self, OpLoc, Self.PDiag(diag::note_precedence_silence) << BinaryOperator::getOpcodeStr(CondOpcode), SourceRange(Condition->getLocStart(), Condition->getLocEnd())); SuggestParentheses(Self, OpLoc, Self.PDiag(diag::note_precedence_conditional_first), SourceRange(CondRHS->getLocStart(), RHSExpr->getLocEnd())); } /// Compute the nullability of a conditional expression. static QualType computeConditionalNullability(QualType ResTy, bool IsBin, QualType LHSTy, QualType RHSTy, ASTContext &Ctx) { if (!ResTy->isAnyPointerType()) return ResTy; auto GetNullability = [&Ctx](QualType Ty) { Optional Kind = Ty->getNullability(Ctx); if (Kind) return *Kind; return NullabilityKind::Unspecified; }; auto LHSKind = GetNullability(LHSTy), RHSKind = GetNullability(RHSTy); NullabilityKind MergedKind; // Compute nullability of a binary conditional expression. if (IsBin) { if (LHSKind == NullabilityKind::NonNull) MergedKind = NullabilityKind::NonNull; else MergedKind = RHSKind; // Compute nullability of a normal conditional expression. } else { if (LHSKind == NullabilityKind::Nullable || RHSKind == NullabilityKind::Nullable) MergedKind = NullabilityKind::Nullable; else if (LHSKind == NullabilityKind::NonNull) MergedKind = RHSKind; else if (RHSKind == NullabilityKind::NonNull) MergedKind = LHSKind; else MergedKind = NullabilityKind::Unspecified; } // Return if ResTy already has the correct nullability. if (GetNullability(ResTy) == MergedKind) return ResTy; // Strip all nullability from ResTy. while (ResTy->getNullability(Ctx)) ResTy = ResTy.getSingleStepDesugaredType(Ctx); // Create a new AttributedType with the new nullability kind. auto NewAttr = AttributedType::getNullabilityAttrKind(MergedKind); return Ctx.getAttributedType(NewAttr, ResTy, ResTy); } /// ActOnConditionalOp - Parse a ?: operation. Note that 'LHS' may be null /// in the case of a the GNU conditional expr extension. ExprResult Sema::ActOnConditionalOp(SourceLocation QuestionLoc, SourceLocation ColonLoc, Expr *CondExpr, Expr *LHSExpr, Expr *RHSExpr) { if (!getLangOpts().CPlusPlus) { // C cannot handle TypoExpr nodes in the condition because it // doesn't handle dependent types properly, so make sure any TypoExprs have // been dealt with before checking the operands. ExprResult CondResult = CorrectDelayedTyposInExpr(CondExpr); ExprResult LHSResult = CorrectDelayedTyposInExpr(LHSExpr); ExprResult RHSResult = CorrectDelayedTyposInExpr(RHSExpr); if (!CondResult.isUsable()) return ExprError(); if (LHSExpr) { if (!LHSResult.isUsable()) return ExprError(); } if (!RHSResult.isUsable()) return ExprError(); CondExpr = CondResult.get(); LHSExpr = LHSResult.get(); RHSExpr = RHSResult.get(); } // If this is the gnu "x ?: y" extension, analyze the types as though the LHS // was the condition. OpaqueValueExpr *opaqueValue = nullptr; Expr *commonExpr = nullptr; if (!LHSExpr) { commonExpr = CondExpr; // Lower out placeholder types first. This is important so that we don't // try to capture a placeholder. This happens in few cases in C++; such // as Objective-C++'s dictionary subscripting syntax. if (commonExpr->hasPlaceholderType()) { ExprResult result = CheckPlaceholderExpr(commonExpr); if (!result.isUsable()) return ExprError(); commonExpr = result.get(); } // We usually want to apply unary conversions *before* saving, except // in the special case of a C++ l-value conditional. if (!(getLangOpts().CPlusPlus && !commonExpr->isTypeDependent() && commonExpr->getValueKind() == RHSExpr->getValueKind() && commonExpr->isGLValue() && commonExpr->isOrdinaryOrBitFieldObject() && RHSExpr->isOrdinaryOrBitFieldObject() && Context.hasSameType(commonExpr->getType(), RHSExpr->getType()))) { ExprResult commonRes = UsualUnaryConversions(commonExpr); if (commonRes.isInvalid()) return ExprError(); commonExpr = commonRes.get(); } // If the common expression is a class or array prvalue, materialize it // so that we can safely refer to it multiple times. if (commonExpr->isRValue() && (commonExpr->getType()->isRecordType() || commonExpr->getType()->isArrayType())) { ExprResult MatExpr = TemporaryMaterializationConversion(commonExpr); if (MatExpr.isInvalid()) return ExprError(); commonExpr = MatExpr.get(); } opaqueValue = new (Context) OpaqueValueExpr(commonExpr->getExprLoc(), commonExpr->getType(), commonExpr->getValueKind(), commonExpr->getObjectKind(), commonExpr); LHSExpr = CondExpr = opaqueValue; } QualType LHSTy = LHSExpr->getType(), RHSTy = RHSExpr->getType(); ExprValueKind VK = VK_RValue; ExprObjectKind OK = OK_Ordinary; ExprResult Cond = CondExpr, LHS = LHSExpr, RHS = RHSExpr; QualType result = CheckConditionalOperands(Cond, LHS, RHS, VK, OK, QuestionLoc); if (result.isNull() || Cond.isInvalid() || LHS.isInvalid() || RHS.isInvalid()) return ExprError(); DiagnoseConditionalPrecedence(*this, QuestionLoc, Cond.get(), LHS.get(), RHS.get()); CheckBoolLikeConversion(Cond.get(), QuestionLoc); result = computeConditionalNullability(result, commonExpr, LHSTy, RHSTy, Context); if (!commonExpr) return new (Context) ConditionalOperator(Cond.get(), QuestionLoc, LHS.get(), ColonLoc, RHS.get(), result, VK, OK); return new (Context) BinaryConditionalOperator( commonExpr, opaqueValue, Cond.get(), LHS.get(), RHS.get(), QuestionLoc, ColonLoc, result, VK, OK); } // checkPointerTypesForAssignment - This is a very tricky routine (despite // being closely modeled after the C99 spec:-). The odd characteristic of this // routine is it effectively iqnores the qualifiers on the top level pointee. // This circumvents the usual type rules specified in 6.2.7p1 & 6.7.5.[1-3]. // FIXME: add a couple examples in this comment. static Sema::AssignConvertType checkPointerTypesForAssignment(Sema &S, QualType LHSType, QualType RHSType) { assert(LHSType.isCanonical() && "LHS not canonicalized!"); assert(RHSType.isCanonical() && "RHS not canonicalized!"); // get the "pointed to" type (ignoring qualifiers at the top level) const Type *lhptee, *rhptee; Qualifiers lhq, rhq; std::tie(lhptee, lhq) = cast(LHSType)->getPointeeType().split().asPair(); std::tie(rhptee, rhq) = cast(RHSType)->getPointeeType().split().asPair(); Sema::AssignConvertType ConvTy = Sema::Compatible; // C99 6.5.16.1p1: This following citation is common to constraints // 3 & 4 (below). ...and the type *pointed to* by the left has all the // qualifiers of the type *pointed to* by the right; // As a special case, 'non-__weak A *' -> 'non-__weak const *' is okay. if (lhq.getObjCLifetime() != rhq.getObjCLifetime() && lhq.compatiblyIncludesObjCLifetime(rhq)) { // Ignore lifetime for further calculation. lhq.removeObjCLifetime(); rhq.removeObjCLifetime(); } if (!lhq.compatiblyIncludes(rhq)) { // Treat address-space mismatches as fatal. TODO: address subspaces if (!lhq.isAddressSpaceSupersetOf(rhq)) ConvTy = Sema::IncompatiblePointerDiscardsQualifiers; // It's okay to add or remove GC or lifetime qualifiers when converting to // and from void*. else if (lhq.withoutObjCGCAttr().withoutObjCLifetime() .compatiblyIncludes( rhq.withoutObjCGCAttr().withoutObjCLifetime()) && (lhptee->isVoidType() || rhptee->isVoidType())) ; // keep old // Treat lifetime mismatches as fatal. else if (lhq.getObjCLifetime() != rhq.getObjCLifetime()) ConvTy = Sema::IncompatiblePointerDiscardsQualifiers; // For GCC/MS compatibility, other qualifier mismatches are treated // as still compatible in C. else ConvTy = Sema::CompatiblePointerDiscardsQualifiers; } // C99 6.5.16.1p1 (constraint 4): If one operand is a pointer to an object or // incomplete type and the other is a pointer to a qualified or unqualified // version of void... if (lhptee->isVoidType()) { if (rhptee->isIncompleteOrObjectType()) return ConvTy; // As an extension, we allow cast to/from void* to function pointer. assert(rhptee->isFunctionType()); return Sema::FunctionVoidPointer; } if (rhptee->isVoidType()) { if (lhptee->isIncompleteOrObjectType()) return ConvTy; // As an extension, we allow cast to/from void* to function pointer. assert(lhptee->isFunctionType()); return Sema::FunctionVoidPointer; } // C99 6.5.16.1p1 (constraint 3): both operands are pointers to qualified or // unqualified versions of compatible types, ... QualType ltrans = QualType(lhptee, 0), rtrans = QualType(rhptee, 0); if (!S.Context.typesAreCompatible(ltrans, rtrans)) { // Check if the pointee types are compatible ignoring the sign. // We explicitly check for char so that we catch "char" vs // "unsigned char" on systems where "char" is unsigned. if (lhptee->isCharType()) ltrans = S.Context.UnsignedCharTy; else if (lhptee->hasSignedIntegerRepresentation()) ltrans = S.Context.getCorrespondingUnsignedType(ltrans); if (rhptee->isCharType()) rtrans = S.Context.UnsignedCharTy; else if (rhptee->hasSignedIntegerRepresentation()) rtrans = S.Context.getCorrespondingUnsignedType(rtrans); if (ltrans == rtrans) { // Types are compatible ignoring the sign. Qualifier incompatibility // takes priority over sign incompatibility because the sign // warning can be disabled. if (ConvTy != Sema::Compatible) return ConvTy; return Sema::IncompatiblePointerSign; } // If we are a multi-level pointer, it's possible that our issue is simply // one of qualification - e.g. char ** -> const char ** is not allowed. If // the eventual target type is the same and the pointers have the same // level of indirection, this must be the issue. if (isa(lhptee) && isa(rhptee)) { do { lhptee = cast(lhptee)->getPointeeType().getTypePtr(); rhptee = cast(rhptee)->getPointeeType().getTypePtr(); } while (isa(lhptee) && isa(rhptee)); if (lhptee == rhptee) return Sema::IncompatibleNestedPointerQualifiers; } // General pointer incompatibility takes priority over qualifiers. return Sema::IncompatiblePointer; } if (!S.getLangOpts().CPlusPlus && S.IsFunctionConversion(ltrans, rtrans, ltrans)) return Sema::IncompatiblePointer; return ConvTy; } /// checkBlockPointerTypesForAssignment - This routine determines whether two /// block pointer types are compatible or whether a block and normal pointer /// are compatible. It is more restrict than comparing two function pointer // types. static Sema::AssignConvertType checkBlockPointerTypesForAssignment(Sema &S, QualType LHSType, QualType RHSType) { assert(LHSType.isCanonical() && "LHS not canonicalized!"); assert(RHSType.isCanonical() && "RHS not canonicalized!"); QualType lhptee, rhptee; // get the "pointed to" type (ignoring qualifiers at the top level) lhptee = cast(LHSType)->getPointeeType(); rhptee = cast(RHSType)->getPointeeType(); // In C++, the types have to match exactly. if (S.getLangOpts().CPlusPlus) return Sema::IncompatibleBlockPointer; Sema::AssignConvertType ConvTy = Sema::Compatible; // For blocks we enforce that qualifiers are identical. Qualifiers LQuals = lhptee.getLocalQualifiers(); Qualifiers RQuals = rhptee.getLocalQualifiers(); if (S.getLangOpts().OpenCL) { LQuals.removeAddressSpace(); RQuals.removeAddressSpace(); } if (LQuals != RQuals) ConvTy = Sema::CompatiblePointerDiscardsQualifiers; // FIXME: OpenCL doesn't define the exact compile time semantics for a block // assignment. // The current behavior is similar to C++ lambdas. A block might be // assigned to a variable iff its return type and parameters are compatible // (C99 6.2.7) with the corresponding return type and parameters of the LHS of // an assignment. Presumably it should behave in way that a function pointer // assignment does in C, so for each parameter and return type: // * CVR and address space of LHS should be a superset of CVR and address // space of RHS. // * unqualified types should be compatible. if (S.getLangOpts().OpenCL) { if (!S.Context.typesAreBlockPointerCompatible( S.Context.getQualifiedType(LHSType.getUnqualifiedType(), LQuals), S.Context.getQualifiedType(RHSType.getUnqualifiedType(), RQuals))) return Sema::IncompatibleBlockPointer; } else if (!S.Context.typesAreBlockPointerCompatible(LHSType, RHSType)) return Sema::IncompatibleBlockPointer; return ConvTy; } /// checkObjCPointerTypesForAssignment - Compares two objective-c pointer types /// for assignment compatibility. static Sema::AssignConvertType checkObjCPointerTypesForAssignment(Sema &S, QualType LHSType, QualType RHSType) { assert(LHSType.isCanonical() && "LHS was not canonicalized!"); assert(RHSType.isCanonical() && "RHS was not canonicalized!"); if (LHSType->isObjCBuiltinType()) { // Class is not compatible with ObjC object pointers. if (LHSType->isObjCClassType() && !RHSType->isObjCBuiltinType() && !RHSType->isObjCQualifiedClassType()) return Sema::IncompatiblePointer; return Sema::Compatible; } if (RHSType->isObjCBuiltinType()) { if (RHSType->isObjCClassType() && !LHSType->isObjCBuiltinType() && !LHSType->isObjCQualifiedClassType()) return Sema::IncompatiblePointer; return Sema::Compatible; } QualType lhptee = LHSType->getAs()->getPointeeType(); QualType rhptee = RHSType->getAs()->getPointeeType(); if (!lhptee.isAtLeastAsQualifiedAs(rhptee) && // make an exception for id

!LHSType->isObjCQualifiedIdType()) return Sema::CompatiblePointerDiscardsQualifiers; if (S.Context.typesAreCompatible(LHSType, RHSType)) return Sema::Compatible; if (LHSType->isObjCQualifiedIdType() || RHSType->isObjCQualifiedIdType()) return Sema::IncompatibleObjCQualifiedId; return Sema::IncompatiblePointer; } Sema::AssignConvertType Sema::CheckAssignmentConstraints(SourceLocation Loc, QualType LHSType, QualType RHSType) { // Fake up an opaque expression. We don't actually care about what // cast operations are required, so if CheckAssignmentConstraints // adds casts to this they'll be wasted, but fortunately that doesn't // usually happen on valid code. OpaqueValueExpr RHSExpr(Loc, RHSType, VK_RValue); ExprResult RHSPtr = &RHSExpr; CastKind K; return CheckAssignmentConstraints(LHSType, RHSPtr, K, /*ConvertRHS=*/false); } /// This helper function returns true if QT is a vector type that has element /// type ElementType. static bool isVector(QualType QT, QualType ElementType) { if (const VectorType *VT = QT->getAs()) return VT->getElementType() == ElementType; return false; } /// CheckAssignmentConstraints (C99 6.5.16) - This routine currently /// has code to accommodate several GCC extensions when type checking /// pointers. Here are some objectionable examples that GCC considers warnings: /// /// int a, *pint; /// short *pshort; /// struct foo *pfoo; /// /// pint = pshort; // warning: assignment from incompatible pointer type /// a = pint; // warning: assignment makes integer from pointer without a cast /// pint = a; // warning: assignment makes pointer from integer without a cast /// pint = pfoo; // warning: assignment from incompatible pointer type /// /// As a result, the code for dealing with pointers is more complex than the /// C99 spec dictates. /// /// Sets 'Kind' for any result kind except Incompatible. Sema::AssignConvertType Sema::CheckAssignmentConstraints(QualType LHSType, ExprResult &RHS, CastKind &Kind, bool ConvertRHS) { QualType RHSType = RHS.get()->getType(); QualType OrigLHSType = LHSType; // Get canonical types. We're not formatting these types, just comparing // them. LHSType = Context.getCanonicalType(LHSType).getUnqualifiedType(); RHSType = Context.getCanonicalType(RHSType).getUnqualifiedType(); // Common case: no conversion required. if (LHSType == RHSType) { Kind = CK_NoOp; return Compatible; } // If we have an atomic type, try a non-atomic assignment, then just add an // atomic qualification step. if (const AtomicType *AtomicTy = dyn_cast(LHSType)) { Sema::AssignConvertType result = CheckAssignmentConstraints(AtomicTy->getValueType(), RHS, Kind); if (result != Compatible) return result; if (Kind != CK_NoOp && ConvertRHS) RHS = ImpCastExprToType(RHS.get(), AtomicTy->getValueType(), Kind); Kind = CK_NonAtomicToAtomic; return Compatible; } // If the left-hand side is a reference type, then we are in a // (rare!) case where we've allowed the use of references in C, // e.g., as a parameter type in a built-in function. In this case, // just make sure that the type referenced is compatible with the // right-hand side type. The caller is responsible for adjusting // LHSType so that the resulting expression does not have reference // type. if (const ReferenceType *LHSTypeRef = LHSType->getAs()) { if (Context.typesAreCompatible(LHSTypeRef->getPointeeType(), RHSType)) { Kind = CK_LValueBitCast; return Compatible; } return Incompatible; } // Allow scalar to ExtVector assignments, and assignments of an ExtVector type // to the same ExtVector type. if (LHSType->isExtVectorType()) { if (RHSType->isExtVectorType()) return Incompatible; if (RHSType->isArithmeticType()) { // CK_VectorSplat does T -> vector T, so first cast to the element type. if (ConvertRHS) RHS = prepareVectorSplat(LHSType, RHS.get()); Kind = CK_VectorSplat; return Compatible; } } // Conversions to or from vector type. if (LHSType->isVectorType() || RHSType->isVectorType()) { if (LHSType->isVectorType() && RHSType->isVectorType()) { // Allow assignments of an AltiVec vector type to an equivalent GCC // vector type and vice versa if (Context.areCompatibleVectorTypes(LHSType, RHSType)) { Kind = CK_BitCast; return Compatible; } // If we are allowing lax vector conversions, and LHS and RHS are both // vectors, the total size only needs to be the same. This is a bitcast; // no bits are changed but the result type is different. if (isLaxVectorConversion(RHSType, LHSType)) { Kind = CK_BitCast; return IncompatibleVectors; } } // When the RHS comes from another lax conversion (e.g. binops between // scalars and vectors) the result is canonicalized as a vector. When the // LHS is also a vector, the lax is allowed by the condition above. Handle // the case where LHS is a scalar. if (LHSType->isScalarType()) { const VectorType *VecType = RHSType->getAs(); if (VecType && VecType->getNumElements() == 1 && isLaxVectorConversion(RHSType, LHSType)) { ExprResult *VecExpr = &RHS; *VecExpr = ImpCastExprToType(VecExpr->get(), LHSType, CK_BitCast); Kind = CK_BitCast; return Compatible; } } return Incompatible; } // Diagnose attempts to convert between __float128 and long double where // such conversions currently can't be handled. if (unsupportedTypeConversion(*this, LHSType, RHSType)) return Incompatible; // Disallow assigning a _Complex to a real type in C++ mode since it simply // discards the imaginary part. if (getLangOpts().CPlusPlus && RHSType->getAs() && !LHSType->getAs()) return Incompatible; // Arithmetic conversions. if (LHSType->isArithmeticType() && RHSType->isArithmeticType() && !(getLangOpts().CPlusPlus && LHSType->isEnumeralType())) { if (ConvertRHS) Kind = PrepareScalarCast(RHS, LHSType); return Compatible; } // Conversions to normal pointers. if (const PointerType *LHSPointer = dyn_cast(LHSType)) { // U* -> T* if (isa(RHSType)) { LangAS AddrSpaceL = LHSPointer->getPointeeType().getAddressSpace(); LangAS AddrSpaceR = RHSType->getPointeeType().getAddressSpace(); Kind = AddrSpaceL != AddrSpaceR ? CK_AddressSpaceConversion : CK_BitCast; return checkPointerTypesForAssignment(*this, LHSType, RHSType); } // int -> T* if (RHSType->isIntegerType()) { Kind = CK_IntegralToPointer; // FIXME: null? return IntToPointer; } // C pointers are not compatible with ObjC object pointers, // with two exceptions: if (isa(RHSType)) { // - conversions to void* if (LHSPointer->getPointeeType()->isVoidType()) { Kind = CK_BitCast; return Compatible; } // - conversions from 'Class' to the redefinition type if (RHSType->isObjCClassType() && Context.hasSameType(LHSType, Context.getObjCClassRedefinitionType())) { Kind = CK_BitCast; return Compatible; } Kind = CK_BitCast; return IncompatiblePointer; } // U^ -> void* if (RHSType->getAs()) { if (LHSPointer->getPointeeType()->isVoidType()) { LangAS AddrSpaceL = LHSPointer->getPointeeType().getAddressSpace(); LangAS AddrSpaceR = RHSType->getAs() ->getPointeeType() .getAddressSpace(); Kind = AddrSpaceL != AddrSpaceR ? CK_AddressSpaceConversion : CK_BitCast; return Compatible; } } return Incompatible; } // Conversions to block pointers. if (isa(LHSType)) { // U^ -> T^ if (RHSType->isBlockPointerType()) { LangAS AddrSpaceL = LHSType->getAs() ->getPointeeType() .getAddressSpace(); LangAS AddrSpaceR = RHSType->getAs() ->getPointeeType() .getAddressSpace(); Kind = AddrSpaceL != AddrSpaceR ? CK_AddressSpaceConversion : CK_BitCast; return checkBlockPointerTypesForAssignment(*this, LHSType, RHSType); } // int or null -> T^ if (RHSType->isIntegerType()) { Kind = CK_IntegralToPointer; // FIXME: null return IntToBlockPointer; } // id -> T^ if (getLangOpts().ObjC1 && RHSType->isObjCIdType()) { Kind = CK_AnyPointerToBlockPointerCast; return Compatible; } // void* -> T^ if (const PointerType *RHSPT = RHSType->getAs()) if (RHSPT->getPointeeType()->isVoidType()) { Kind = CK_AnyPointerToBlockPointerCast; return Compatible; } return Incompatible; } // Conversions to Objective-C pointers. if (isa(LHSType)) { // A* -> B* if (RHSType->isObjCObjectPointerType()) { Kind = CK_BitCast; Sema::AssignConvertType result = checkObjCPointerTypesForAssignment(*this, LHSType, RHSType); if (getLangOpts().allowsNonTrivialObjCLifetimeQualifiers() && result == Compatible && !CheckObjCARCUnavailableWeakConversion(OrigLHSType, RHSType)) result = IncompatibleObjCWeakRef; return result; } // int or null -> A* if (RHSType->isIntegerType()) { Kind = CK_IntegralToPointer; // FIXME: null return IntToPointer; } // In general, C pointers are not compatible with ObjC object pointers, // with two exceptions: if (isa(RHSType)) { Kind = CK_CPointerToObjCPointerCast; // - conversions from 'void*' if (RHSType->isVoidPointerType()) { return Compatible; } // - conversions to 'Class' from its redefinition type if (LHSType->isObjCClassType() && Context.hasSameType(RHSType, Context.getObjCClassRedefinitionType())) { return Compatible; } return IncompatiblePointer; } // Only under strict condition T^ is compatible with an Objective-C pointer. if (RHSType->isBlockPointerType() && LHSType->isBlockCompatibleObjCPointerType(Context)) { if (ConvertRHS) maybeExtendBlockObject(RHS); Kind = CK_BlockPointerToObjCPointerCast; return Compatible; } return Incompatible; } // Conversions from pointers that are not covered by the above. if (isa(RHSType)) { // T* -> _Bool if (LHSType == Context.BoolTy) { Kind = CK_PointerToBoolean; return Compatible; } // T* -> int if (LHSType->isIntegerType()) { Kind = CK_PointerToIntegral; return PointerToInt; } return Incompatible; } // Conversions from Objective-C pointers that are not covered by the above. if (isa(RHSType)) { // T* -> _Bool if (LHSType == Context.BoolTy) { Kind = CK_PointerToBoolean; return Compatible; } // T* -> int if (LHSType->isIntegerType()) { Kind = CK_PointerToIntegral; return PointerToInt; } return Incompatible; } // struct A -> struct B if (isa(LHSType) && isa(RHSType)) { if (Context.typesAreCompatible(LHSType, RHSType)) { Kind = CK_NoOp; return Compatible; } } if (LHSType->isSamplerT() && RHSType->isIntegerType()) { Kind = CK_IntToOCLSampler; return Compatible; } return Incompatible; } /// \brief Constructs a transparent union from an expression that is /// used to initialize the transparent union. static void ConstructTransparentUnion(Sema &S, ASTContext &C, ExprResult &EResult, QualType UnionType, FieldDecl *Field) { // Build an initializer list that designates the appropriate member // of the transparent union. Expr *E = EResult.get(); InitListExpr *Initializer = new (C) InitListExpr(C, SourceLocation(), E, SourceLocation()); Initializer->setType(UnionType); Initializer->setInitializedFieldInUnion(Field); // Build a compound literal constructing a value of the transparent // union type from this initializer list. TypeSourceInfo *unionTInfo = C.getTrivialTypeSourceInfo(UnionType); EResult = new (C) CompoundLiteralExpr(SourceLocation(), unionTInfo, UnionType, VK_RValue, Initializer, false); } Sema::AssignConvertType Sema::CheckTransparentUnionArgumentConstraints(QualType ArgType, ExprResult &RHS) { QualType RHSType = RHS.get()->getType(); // If the ArgType is a Union type, we want to handle a potential // transparent_union GCC extension. const RecordType *UT = ArgType->getAsUnionType(); if (!UT || !UT->getDecl()->hasAttr()) return Incompatible; // The field to initialize within the transparent union. RecordDecl *UD = UT->getDecl(); FieldDecl *InitField = nullptr; // It's compatible if the expression matches any of the fields. for (auto *it : UD->fields()) { if (it->getType()->isPointerType()) { // If the transparent union contains a pointer type, we allow: // 1) void pointer // 2) null pointer constant if (RHSType->isPointerType()) if (RHSType->castAs()->getPointeeType()->isVoidType()) { RHS = ImpCastExprToType(RHS.get(), it->getType(), CK_BitCast); InitField = it; break; } if (RHS.get()->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull)) { RHS = ImpCastExprToType(RHS.get(), it->getType(), CK_NullToPointer); InitField = it; break; } } CastKind Kind; if (CheckAssignmentConstraints(it->getType(), RHS, Kind) == Compatible) { RHS = ImpCastExprToType(RHS.get(), it->getType(), Kind); InitField = it; break; } } if (!InitField) return Incompatible; ConstructTransparentUnion(*this, Context, RHS, ArgType, InitField); return Compatible; } Sema::AssignConvertType Sema::CheckSingleAssignmentConstraints(QualType LHSType, ExprResult &CallerRHS, bool Diagnose, bool DiagnoseCFAudited, bool ConvertRHS) { // We need to be able to tell the caller whether we diagnosed a problem, if // they ask us to issue diagnostics. assert((ConvertRHS || !Diagnose) && "can't indicate whether we diagnosed"); // If ConvertRHS is false, we want to leave the caller's RHS untouched. Sadly, // we can't avoid *all* modifications at the moment, so we need some somewhere // to put the updated value. ExprResult LocalRHS = CallerRHS; ExprResult &RHS = ConvertRHS ? CallerRHS : LocalRHS; if (getLangOpts().CPlusPlus) { if (!LHSType->isRecordType() && !LHSType->isAtomicType()) { // C++ 5.17p3: If the left operand is not of class type, the // expression is implicitly converted (C++ 4) to the // cv-unqualified type of the left operand. QualType RHSType = RHS.get()->getType(); if (Diagnose) { RHS = PerformImplicitConversion(RHS.get(), LHSType.getUnqualifiedType(), AA_Assigning); } else { ImplicitConversionSequence ICS = TryImplicitConversion(RHS.get(), LHSType.getUnqualifiedType(), /*SuppressUserConversions=*/false, /*AllowExplicit=*/false, /*InOverloadResolution=*/false, /*CStyle=*/false, /*AllowObjCWritebackConversion=*/false); if (ICS.isFailure()) return Incompatible; RHS = PerformImplicitConversion(RHS.get(), LHSType.getUnqualifiedType(), ICS, AA_Assigning); } if (RHS.isInvalid()) return Incompatible; Sema::AssignConvertType result = Compatible; if (getLangOpts().allowsNonTrivialObjCLifetimeQualifiers() && !CheckObjCARCUnavailableWeakConversion(LHSType, RHSType)) result = IncompatibleObjCWeakRef; return result; } // FIXME: Currently, we fall through and treat C++ classes like C // structures. // FIXME: We also fall through for atomics; not sure what should // happen there, though. } else if (RHS.get()->getType() == Context.OverloadTy) { // As a set of extensions to C, we support overloading on functions. These // functions need to be resolved here. DeclAccessPair DAP; if (FunctionDecl *FD = ResolveAddressOfOverloadedFunction( RHS.get(), LHSType, /*Complain=*/false, DAP)) RHS = FixOverloadedFunctionReference(RHS.get(), DAP, FD); else return Incompatible; } // C99 6.5.16.1p1: the left operand is a pointer and the right is // a null pointer constant. if ((LHSType->isPointerType() || LHSType->isObjCObjectPointerType() || LHSType->isBlockPointerType()) && RHS.get()->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull)) { if (Diagnose || ConvertRHS) { CastKind Kind; CXXCastPath Path; CheckPointerConversion(RHS.get(), LHSType, Kind, Path, /*IgnoreBaseAccess=*/false, Diagnose); if (ConvertRHS) RHS = ImpCastExprToType(RHS.get(), LHSType, Kind, VK_RValue, &Path); } return Compatible; } // This check seems unnatural, however it is necessary to ensure the proper // conversion of functions/arrays. If the conversion were done for all // DeclExpr's (created by ActOnIdExpression), it would mess up the unary // expressions that suppress this implicit conversion (&, sizeof). // // Suppress this for references: C++ 8.5.3p5. if (!LHSType->isReferenceType()) { // FIXME: We potentially allocate here even if ConvertRHS is false. RHS = DefaultFunctionArrayLvalueConversion(RHS.get(), Diagnose); if (RHS.isInvalid()) return Incompatible; } Expr *PRE = RHS.get()->IgnoreParenCasts(); if (Diagnose && isa(PRE)) { ObjCProtocolDecl *PDecl = cast(PRE)->getProtocol(); if (PDecl && !PDecl->hasDefinition()) { Diag(PRE->getExprLoc(), diag::warn_atprotocol_protocol) << PDecl->getName(); Diag(PDecl->getLocation(), diag::note_entity_declared_at) << PDecl; } } CastKind Kind; Sema::AssignConvertType result = CheckAssignmentConstraints(LHSType, RHS, Kind, ConvertRHS); // C99 6.5.16.1p2: The value of the right operand is converted to the // type of the assignment expression. // CheckAssignmentConstraints allows the left-hand side to be a reference, // so that we can use references in built-in functions even in C. // The getNonReferenceType() call makes sure that the resulting expression // does not have reference type. if (result != Incompatible && RHS.get()->getType() != LHSType) { QualType Ty = LHSType.getNonLValueExprType(Context); Expr *E = RHS.get(); // Check for various Objective-C errors. If we are not reporting // diagnostics and just checking for errors, e.g., during overload // resolution, return Incompatible to indicate the failure. if (getLangOpts().allowsNonTrivialObjCLifetimeQualifiers() && CheckObjCConversion(SourceRange(), Ty, E, CCK_ImplicitConversion, Diagnose, DiagnoseCFAudited) != ACR_okay) { if (!Diagnose) return Incompatible; } if (getLangOpts().ObjC1 && (CheckObjCBridgeRelatedConversions(E->getLocStart(), LHSType, E->getType(), E, Diagnose) || ConversionToObjCStringLiteralCheck(LHSType, E, Diagnose))) { if (!Diagnose) return Incompatible; // Replace the expression with a corrected version and continue so we // can find further errors. RHS = E; return Compatible; } if (ConvertRHS) RHS = ImpCastExprToType(E, Ty, Kind); } return result; } QualType Sema::InvalidOperands(SourceLocation Loc, ExprResult &LHS, ExprResult &RHS) { Diag(Loc, diag::err_typecheck_invalid_operands) << LHS.get()->getType() << RHS.get()->getType() << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } // Diagnose cases where a scalar was implicitly converted to a vector and // diagnose the underlying types. Otherwise, diagnose the error // as invalid vector logical operands for non-C++ cases. QualType Sema::InvalidLogicalVectorOperands(SourceLocation Loc, ExprResult &LHS, ExprResult &RHS) { QualType LHSType = LHS.get()->IgnoreImpCasts()->getType(); QualType RHSType = RHS.get()->IgnoreImpCasts()->getType(); bool LHSNatVec = LHSType->isVectorType(); bool RHSNatVec = RHSType->isVectorType(); if (!(LHSNatVec && RHSNatVec)) { Expr *Vector = LHSNatVec ? LHS.get() : RHS.get(); Expr *NonVector = !LHSNatVec ? LHS.get() : RHS.get(); Diag(Loc, diag::err_typecheck_logical_vector_expr_gnu_cpp_restrict) << 0 << Vector->getType() << NonVector->IgnoreImpCasts()->getType() << Vector->getSourceRange(); return QualType(); } Diag(Loc, diag::err_typecheck_logical_vector_expr_gnu_cpp_restrict) << 1 << LHSType << RHSType << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } /// Try to convert a value of non-vector type to a vector type by converting /// the type to the element type of the vector and then performing a splat. /// If the language is OpenCL, we only use conversions that promote scalar /// rank; for C, Obj-C, and C++ we allow any real scalar conversion except /// for float->int. /// /// OpenCL V2.0 6.2.6.p2: /// An error shall occur if any scalar operand type has greater rank /// than the type of the vector element. /// /// \param scalar - if non-null, actually perform the conversions /// \return true if the operation fails (but without diagnosing the failure) static bool tryVectorConvertAndSplat(Sema &S, ExprResult *scalar, QualType scalarTy, QualType vectorEltTy, QualType vectorTy, unsigned &DiagID) { // The conversion to apply to the scalar before splatting it, // if necessary. CastKind scalarCast = CK_NoOp; if (vectorEltTy->isIntegralType(S.Context)) { if (S.getLangOpts().OpenCL && (scalarTy->isRealFloatingType() || (scalarTy->isIntegerType() && S.Context.getIntegerTypeOrder(vectorEltTy, scalarTy) < 0))) { DiagID = diag::err_opencl_scalar_type_rank_greater_than_vector_type; return true; } if (!scalarTy->isIntegralType(S.Context)) return true; scalarCast = CK_IntegralCast; } else if (vectorEltTy->isRealFloatingType()) { if (scalarTy->isRealFloatingType()) { if (S.getLangOpts().OpenCL && S.Context.getFloatingTypeOrder(vectorEltTy, scalarTy) < 0) { DiagID = diag::err_opencl_scalar_type_rank_greater_than_vector_type; return true; } scalarCast = CK_FloatingCast; } else if (scalarTy->isIntegralType(S.Context)) scalarCast = CK_IntegralToFloating; else return true; } else { return true; } // Adjust scalar if desired. if (scalar) { if (scalarCast != CK_NoOp) *scalar = S.ImpCastExprToType(scalar->get(), vectorEltTy, scalarCast); *scalar = S.ImpCastExprToType(scalar->get(), vectorTy, CK_VectorSplat); } return false; } /// Convert vector E to a vector with the same number of elements but different /// element type. static ExprResult convertVector(Expr *E, QualType ElementType, Sema &S) { const auto *VecTy = E->getType()->getAs(); assert(VecTy && "Expression E must be a vector"); QualType NewVecTy = S.Context.getVectorType(ElementType, VecTy->getNumElements(), VecTy->getVectorKind()); // Look through the implicit cast. Return the subexpression if its type is // NewVecTy. if (auto *ICE = dyn_cast(E)) if (ICE->getSubExpr()->getType() == NewVecTy) return ICE->getSubExpr(); auto Cast = ElementType->isIntegerType() ? CK_IntegralCast : CK_FloatingCast; return S.ImpCastExprToType(E, NewVecTy, Cast); } /// Test if a (constant) integer Int can be casted to another integer type /// IntTy without losing precision. static bool canConvertIntToOtherIntTy(Sema &S, ExprResult *Int, QualType OtherIntTy) { QualType IntTy = Int->get()->getType().getUnqualifiedType(); // Reject cases where the value of the Int is unknown as that would // possibly cause truncation, but accept cases where the scalar can be // demoted without loss of precision. llvm::APSInt Result; bool CstInt = Int->get()->EvaluateAsInt(Result, S.Context); int Order = S.Context.getIntegerTypeOrder(OtherIntTy, IntTy); bool IntSigned = IntTy->hasSignedIntegerRepresentation(); bool OtherIntSigned = OtherIntTy->hasSignedIntegerRepresentation(); if (CstInt) { // If the scalar is constant and is of a higher order and has more active // bits that the vector element type, reject it. unsigned NumBits = IntSigned ? (Result.isNegative() ? Result.getMinSignedBits() : Result.getActiveBits()) : Result.getActiveBits(); if (Order < 0 && S.Context.getIntWidth(OtherIntTy) < NumBits) return true; // If the signedness of the scalar type and the vector element type // differs and the number of bits is greater than that of the vector // element reject it. return (IntSigned != OtherIntSigned && NumBits > S.Context.getIntWidth(OtherIntTy)); } // Reject cases where the value of the scalar is not constant and it's // order is greater than that of the vector element type. return (Order < 0); } /// Test if a (constant) integer Int can be casted to floating point type /// FloatTy without losing precision. static bool canConvertIntTyToFloatTy(Sema &S, ExprResult *Int, QualType FloatTy) { QualType IntTy = Int->get()->getType().getUnqualifiedType(); // Determine if the integer constant can be expressed as a floating point // number of the appropiate type. llvm::APSInt Result; bool CstInt = Int->get()->EvaluateAsInt(Result, S.Context); uint64_t Bits = 0; if (CstInt) { // Reject constants that would be truncated if they were converted to // the floating point type. Test by simple to/from conversion. // FIXME: Ideally the conversion to an APFloat and from an APFloat // could be avoided if there was a convertFromAPInt method // which could signal back if implicit truncation occurred. llvm::APFloat Float(S.Context.getFloatTypeSemantics(FloatTy)); Float.convertFromAPInt(Result, IntTy->hasSignedIntegerRepresentation(), llvm::APFloat::rmTowardZero); llvm::APSInt ConvertBack(S.Context.getIntWidth(IntTy), !IntTy->hasSignedIntegerRepresentation()); bool Ignored = false; Float.convertToInteger(ConvertBack, llvm::APFloat::rmNearestTiesToEven, &Ignored); if (Result != ConvertBack) return true; } else { // Reject types that cannot be fully encoded into the mantissa of // the float. Bits = S.Context.getTypeSize(IntTy); unsigned FloatPrec = llvm::APFloat::semanticsPrecision( S.Context.getFloatTypeSemantics(FloatTy)); if (Bits > FloatPrec) return true; } return false; } /// Attempt to convert and splat Scalar into a vector whose types matches /// Vector following GCC conversion rules. The rule is that implicit /// conversion can occur when Scalar can be casted to match Vector's element /// type without causing truncation of Scalar. static bool tryGCCVectorConvertAndSplat(Sema &S, ExprResult *Scalar, ExprResult *Vector) { QualType ScalarTy = Scalar->get()->getType().getUnqualifiedType(); QualType VectorTy = Vector->get()->getType().getUnqualifiedType(); const VectorType *VT = VectorTy->getAs(); assert(!isa(VT) && "ExtVectorTypes should not be handled here!"); QualType VectorEltTy = VT->getElementType(); // Reject cases where the vector element type or the scalar element type are // not integral or floating point types. if (!VectorEltTy->isArithmeticType() || !ScalarTy->isArithmeticType()) return true; // The conversion to apply to the scalar before splatting it, // if necessary. CastKind ScalarCast = CK_NoOp; // Accept cases where the vector elements are integers and the scalar is // an integer. // FIXME: Notionally if the scalar was a floating point value with a precise // integral representation, we could cast it to an appropriate integer // type and then perform the rest of the checks here. GCC will perform // this conversion in some cases as determined by the input language. // We should accept it on a language independent basis. if (VectorEltTy->isIntegralType(S.Context) && ScalarTy->isIntegralType(S.Context) && S.Context.getIntegerTypeOrder(VectorEltTy, ScalarTy)) { if (canConvertIntToOtherIntTy(S, Scalar, VectorEltTy)) return true; ScalarCast = CK_IntegralCast; } else if (VectorEltTy->isRealFloatingType()) { if (ScalarTy->isRealFloatingType()) { // Reject cases where the scalar type is not a constant and has a higher // Order than the vector element type. llvm::APFloat Result(0.0); bool CstScalar = Scalar->get()->EvaluateAsFloat(Result, S.Context); int Order = S.Context.getFloatingTypeOrder(VectorEltTy, ScalarTy); if (!CstScalar && Order < 0) return true; // If the scalar cannot be safely casted to the vector element type, // reject it. if (CstScalar) { bool Truncated = false; Result.convert(S.Context.getFloatTypeSemantics(VectorEltTy), llvm::APFloat::rmNearestTiesToEven, &Truncated); if (Truncated) return true; } ScalarCast = CK_FloatingCast; } else if (ScalarTy->isIntegralType(S.Context)) { if (canConvertIntTyToFloatTy(S, Scalar, VectorEltTy)) return true; ScalarCast = CK_IntegralToFloating; } else return true; } // Adjust scalar if desired. if (Scalar) { if (ScalarCast != CK_NoOp) *Scalar = S.ImpCastExprToType(Scalar->get(), VectorEltTy, ScalarCast); *Scalar = S.ImpCastExprToType(Scalar->get(), VectorTy, CK_VectorSplat); } return false; } QualType Sema::CheckVectorOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, bool IsCompAssign, bool AllowBothBool, bool AllowBoolConversions) { if (!IsCompAssign) { LHS = DefaultFunctionArrayLvalueConversion(LHS.get()); if (LHS.isInvalid()) return QualType(); } RHS = DefaultFunctionArrayLvalueConversion(RHS.get()); if (RHS.isInvalid()) return QualType(); // For conversion purposes, we ignore any qualifiers. // For example, "const float" and "float" are equivalent. QualType LHSType = LHS.get()->getType().getUnqualifiedType(); QualType RHSType = RHS.get()->getType().getUnqualifiedType(); const VectorType *LHSVecType = LHSType->getAs(); const VectorType *RHSVecType = RHSType->getAs(); assert(LHSVecType || RHSVecType); // AltiVec-style "vector bool op vector bool" combinations are allowed // for some operators but not others. if (!AllowBothBool && LHSVecType && LHSVecType->getVectorKind() == VectorType::AltiVecBool && RHSVecType && RHSVecType->getVectorKind() == VectorType::AltiVecBool) return InvalidOperands(Loc, LHS, RHS); // If the vector types are identical, return. if (Context.hasSameType(LHSType, RHSType)) return LHSType; // If we have compatible AltiVec and GCC vector types, use the AltiVec type. if (LHSVecType && RHSVecType && Context.areCompatibleVectorTypes(LHSType, RHSType)) { if (isa(LHSVecType)) { RHS = ImpCastExprToType(RHS.get(), LHSType, CK_BitCast); return LHSType; } if (!IsCompAssign) LHS = ImpCastExprToType(LHS.get(), RHSType, CK_BitCast); return RHSType; } // AllowBoolConversions says that bool and non-bool AltiVec vectors // can be mixed, with the result being the non-bool type. The non-bool // operand must have integer element type. if (AllowBoolConversions && LHSVecType && RHSVecType && LHSVecType->getNumElements() == RHSVecType->getNumElements() && (Context.getTypeSize(LHSVecType->getElementType()) == Context.getTypeSize(RHSVecType->getElementType()))) { if (LHSVecType->getVectorKind() == VectorType::AltiVecVector && LHSVecType->getElementType()->isIntegerType() && RHSVecType->getVectorKind() == VectorType::AltiVecBool) { RHS = ImpCastExprToType(RHS.get(), LHSType, CK_BitCast); return LHSType; } if (!IsCompAssign && LHSVecType->getVectorKind() == VectorType::AltiVecBool && RHSVecType->getVectorKind() == VectorType::AltiVecVector && RHSVecType->getElementType()->isIntegerType()) { LHS = ImpCastExprToType(LHS.get(), RHSType, CK_BitCast); return RHSType; } } // If there's a vector type and a scalar, try to convert the scalar to // the vector element type and splat. unsigned DiagID = diag::err_typecheck_vector_not_convertable; if (!RHSVecType) { if (isa(LHSVecType)) { if (!tryVectorConvertAndSplat(*this, &RHS, RHSType, LHSVecType->getElementType(), LHSType, DiagID)) return LHSType; } else { if (!tryGCCVectorConvertAndSplat(*this, &RHS, &LHS)) return LHSType; } } if (!LHSVecType) { if (isa(RHSVecType)) { if (!tryVectorConvertAndSplat(*this, (IsCompAssign ? nullptr : &LHS), LHSType, RHSVecType->getElementType(), RHSType, DiagID)) return RHSType; } else { if (LHS.get()->getValueKind() == VK_LValue || !tryGCCVectorConvertAndSplat(*this, &LHS, &RHS)) return RHSType; } } // FIXME: The code below also handles conversion between vectors and // non-scalars, we should break this down into fine grained specific checks // and emit proper diagnostics. QualType VecType = LHSVecType ? LHSType : RHSType; const VectorType *VT = LHSVecType ? LHSVecType : RHSVecType; QualType OtherType = LHSVecType ? RHSType : LHSType; ExprResult *OtherExpr = LHSVecType ? &RHS : &LHS; if (isLaxVectorConversion(OtherType, VecType)) { // If we're allowing lax vector conversions, only the total (data) size // needs to be the same. For non compound assignment, if one of the types is // scalar, the result is always the vector type. if (!IsCompAssign) { *OtherExpr = ImpCastExprToType(OtherExpr->get(), VecType, CK_BitCast); return VecType; // In a compound assignment, lhs += rhs, 'lhs' is a lvalue src, forbidding // any implicit cast. Here, the 'rhs' should be implicit casted to 'lhs' // type. Note that this is already done by non-compound assignments in // CheckAssignmentConstraints. If it's a scalar type, only bitcast for // <1 x T> -> T. The result is also a vector type. } else if (OtherType->isExtVectorType() || OtherType->isVectorType() || (OtherType->isScalarType() && VT->getNumElements() == 1)) { ExprResult *RHSExpr = &RHS; *RHSExpr = ImpCastExprToType(RHSExpr->get(), LHSType, CK_BitCast); return VecType; } } // Okay, the expression is invalid. // If there's a non-vector, non-real operand, diagnose that. if ((!RHSVecType && !RHSType->isRealType()) || (!LHSVecType && !LHSType->isRealType())) { Diag(Loc, diag::err_typecheck_vector_not_convertable_non_scalar) << LHSType << RHSType << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } // OpenCL V1.1 6.2.6.p1: // If the operands are of more than one vector type, then an error shall // occur. Implicit conversions between vector types are not permitted, per // section 6.2.1. if (getLangOpts().OpenCL && RHSVecType && isa(RHSVecType) && LHSVecType && isa(LHSVecType)) { Diag(Loc, diag::err_opencl_implicit_vector_conversion) << LHSType << RHSType; return QualType(); } // If there is a vector type that is not a ExtVector and a scalar, we reach // this point if scalar could not be converted to the vector's element type // without truncation. if ((RHSVecType && !isa(RHSVecType)) || (LHSVecType && !isa(LHSVecType))) { QualType Scalar = LHSVecType ? RHSType : LHSType; QualType Vector = LHSVecType ? LHSType : RHSType; unsigned ScalarOrVector = LHSVecType && RHSVecType ? 1 : 0; Diag(Loc, diag::err_typecheck_vector_not_convertable_implict_truncation) << ScalarOrVector << Scalar << Vector; return QualType(); } // Otherwise, use the generic diagnostic. Diag(Loc, DiagID) << LHSType << RHSType << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } // checkArithmeticNull - Detect when a NULL constant is used improperly in an // expression. These are mainly cases where the null pointer is used as an // integer instead of a pointer. static void checkArithmeticNull(Sema &S, ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, bool IsCompare) { // The canonical way to check for a GNU null is with isNullPointerConstant, // but we use a bit of a hack here for speed; this is a relatively // hot path, and isNullPointerConstant is slow. bool LHSNull = isa(LHS.get()->IgnoreParenImpCasts()); bool RHSNull = isa(RHS.get()->IgnoreParenImpCasts()); QualType NonNullType = LHSNull ? RHS.get()->getType() : LHS.get()->getType(); // Avoid analyzing cases where the result will either be invalid (and // diagnosed as such) or entirely valid and not something to warn about. if ((!LHSNull && !RHSNull) || NonNullType->isBlockPointerType() || NonNullType->isMemberPointerType() || NonNullType->isFunctionType()) return; // Comparison operations would not make sense with a null pointer no matter // what the other expression is. if (!IsCompare) { S.Diag(Loc, diag::warn_null_in_arithmetic_operation) << (LHSNull ? LHS.get()->getSourceRange() : SourceRange()) << (RHSNull ? RHS.get()->getSourceRange() : SourceRange()); return; } // The rest of the operations only make sense with a null pointer // if the other expression is a pointer. if (LHSNull == RHSNull || NonNullType->isAnyPointerType() || NonNullType->canDecayToPointerType()) return; S.Diag(Loc, diag::warn_null_in_comparison_operation) << LHSNull /* LHS is NULL */ << NonNullType << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); } static void DiagnoseBadDivideOrRemainderValues(Sema& S, ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, bool IsDiv) { // Check for division/remainder by zero. llvm::APSInt RHSValue; if (!RHS.get()->isValueDependent() && RHS.get()->EvaluateAsInt(RHSValue, S.Context) && RHSValue == 0) S.DiagRuntimeBehavior(Loc, RHS.get(), S.PDiag(diag::warn_remainder_division_by_zero) << IsDiv << RHS.get()->getSourceRange()); } QualType Sema::CheckMultiplyDivideOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, bool IsCompAssign, bool IsDiv) { checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/false); if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) return CheckVectorOperands(LHS, RHS, Loc, IsCompAssign, /*AllowBothBool*/getLangOpts().AltiVec, /*AllowBoolConversions*/false); QualType compType = UsualArithmeticConversions(LHS, RHS, IsCompAssign); if (LHS.isInvalid() || RHS.isInvalid()) return QualType(); if (compType.isNull() || !compType->isArithmeticType()) return InvalidOperands(Loc, LHS, RHS); if (IsDiv) DiagnoseBadDivideOrRemainderValues(*this, LHS, RHS, Loc, IsDiv); return compType; } QualType Sema::CheckRemainderOperands( ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, bool IsCompAssign) { checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/false); if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) { if (LHS.get()->getType()->hasIntegerRepresentation() && RHS.get()->getType()->hasIntegerRepresentation()) return CheckVectorOperands(LHS, RHS, Loc, IsCompAssign, /*AllowBothBool*/getLangOpts().AltiVec, /*AllowBoolConversions*/false); return InvalidOperands(Loc, LHS, RHS); } QualType compType = UsualArithmeticConversions(LHS, RHS, IsCompAssign); if (LHS.isInvalid() || RHS.isInvalid()) return QualType(); if (compType.isNull() || !compType->isIntegerType()) return InvalidOperands(Loc, LHS, RHS); DiagnoseBadDivideOrRemainderValues(*this, LHS, RHS, Loc, false /* IsDiv */); return compType; } /// \brief Diagnose invalid arithmetic on two void pointers. static void diagnoseArithmeticOnTwoVoidPointers(Sema &S, SourceLocation Loc, Expr *LHSExpr, Expr *RHSExpr) { S.Diag(Loc, S.getLangOpts().CPlusPlus ? diag::err_typecheck_pointer_arith_void_type : diag::ext_gnu_void_ptr) << 1 /* two pointers */ << LHSExpr->getSourceRange() << RHSExpr->getSourceRange(); } /// \brief Diagnose invalid arithmetic on a void pointer. static void diagnoseArithmeticOnVoidPointer(Sema &S, SourceLocation Loc, Expr *Pointer) { S.Diag(Loc, S.getLangOpts().CPlusPlus ? diag::err_typecheck_pointer_arith_void_type : diag::ext_gnu_void_ptr) << 0 /* one pointer */ << Pointer->getSourceRange(); } /// \brief Diagnose invalid arithmetic on a null pointer. /// /// If \p IsGNUIdiom is true, the operation is using the 'p = (i8*)nullptr + n' /// idiom, which we recognize as a GNU extension. /// static void diagnoseArithmeticOnNullPointer(Sema &S, SourceLocation Loc, Expr *Pointer, bool IsGNUIdiom) { if (IsGNUIdiom) S.Diag(Loc, diag::warn_gnu_null_ptr_arith) << Pointer->getSourceRange(); else S.Diag(Loc, diag::warn_pointer_arith_null_ptr) << S.getLangOpts().CPlusPlus << Pointer->getSourceRange(); } /// \brief Diagnose invalid arithmetic on two function pointers. static void diagnoseArithmeticOnTwoFunctionPointers(Sema &S, SourceLocation Loc, Expr *LHS, Expr *RHS) { assert(LHS->getType()->isAnyPointerType()); assert(RHS->getType()->isAnyPointerType()); S.Diag(Loc, S.getLangOpts().CPlusPlus ? diag::err_typecheck_pointer_arith_function_type : diag::ext_gnu_ptr_func_arith) << 1 /* two pointers */ << LHS->getType()->getPointeeType() // We only show the second type if it differs from the first. << (unsigned)!S.Context.hasSameUnqualifiedType(LHS->getType(), RHS->getType()) << RHS->getType()->getPointeeType() << LHS->getSourceRange() << RHS->getSourceRange(); } /// \brief Diagnose invalid arithmetic on a function pointer. static void diagnoseArithmeticOnFunctionPointer(Sema &S, SourceLocation Loc, Expr *Pointer) { assert(Pointer->getType()->isAnyPointerType()); S.Diag(Loc, S.getLangOpts().CPlusPlus ? diag::err_typecheck_pointer_arith_function_type : diag::ext_gnu_ptr_func_arith) << 0 /* one pointer */ << Pointer->getType()->getPointeeType() << 0 /* one pointer, so only one type */ << Pointer->getSourceRange(); } /// \brief Emit error if Operand is incomplete pointer type /// /// \returns True if pointer has incomplete type static bool checkArithmeticIncompletePointerType(Sema &S, SourceLocation Loc, Expr *Operand) { QualType ResType = Operand->getType(); if (const AtomicType *ResAtomicType = ResType->getAs()) ResType = ResAtomicType->getValueType(); assert(ResType->isAnyPointerType() && !ResType->isDependentType()); QualType PointeeTy = ResType->getPointeeType(); return S.RequireCompleteType(Loc, PointeeTy, diag::err_typecheck_arithmetic_incomplete_type, PointeeTy, Operand->getSourceRange()); } /// \brief Check the validity of an arithmetic pointer operand. /// /// If the operand has pointer type, this code will check for pointer types /// which are invalid in arithmetic operations. These will be diagnosed /// appropriately, including whether or not the use is supported as an /// extension. /// /// \returns True when the operand is valid to use (even if as an extension). static bool checkArithmeticOpPointerOperand(Sema &S, SourceLocation Loc, Expr *Operand) { QualType ResType = Operand->getType(); if (const AtomicType *ResAtomicType = ResType->getAs()) ResType = ResAtomicType->getValueType(); if (!ResType->isAnyPointerType()) return true; QualType PointeeTy = ResType->getPointeeType(); if (PointeeTy->isVoidType()) { diagnoseArithmeticOnVoidPointer(S, Loc, Operand); return !S.getLangOpts().CPlusPlus; } if (PointeeTy->isFunctionType()) { diagnoseArithmeticOnFunctionPointer(S, Loc, Operand); return !S.getLangOpts().CPlusPlus; } if (checkArithmeticIncompletePointerType(S, Loc, Operand)) return false; return true; } /// \brief Check the validity of a binary arithmetic operation w.r.t. pointer /// operands. /// /// This routine will diagnose any invalid arithmetic on pointer operands much /// like \see checkArithmeticOpPointerOperand. However, it has special logic /// for emitting a single diagnostic even for operations where both LHS and RHS /// are (potentially problematic) pointers. /// /// \returns True when the operand is valid to use (even if as an extension). static bool checkArithmeticBinOpPointerOperands(Sema &S, SourceLocation Loc, Expr *LHSExpr, Expr *RHSExpr) { bool isLHSPointer = LHSExpr->getType()->isAnyPointerType(); bool isRHSPointer = RHSExpr->getType()->isAnyPointerType(); if (!isLHSPointer && !isRHSPointer) return true; QualType LHSPointeeTy, RHSPointeeTy; if (isLHSPointer) LHSPointeeTy = LHSExpr->getType()->getPointeeType(); if (isRHSPointer) RHSPointeeTy = RHSExpr->getType()->getPointeeType(); // if both are pointers check if operation is valid wrt address spaces if (S.getLangOpts().OpenCL && isLHSPointer && isRHSPointer) { const PointerType *lhsPtr = LHSExpr->getType()->getAs(); const PointerType *rhsPtr = RHSExpr->getType()->getAs(); if (!lhsPtr->isAddressSpaceOverlapping(*rhsPtr)) { S.Diag(Loc, diag::err_typecheck_op_on_nonoverlapping_address_space_pointers) << LHSExpr->getType() << RHSExpr->getType() << 1 /*arithmetic op*/ << LHSExpr->getSourceRange() << RHSExpr->getSourceRange(); return false; } } // Check for arithmetic on pointers to incomplete types. bool isLHSVoidPtr = isLHSPointer && LHSPointeeTy->isVoidType(); bool isRHSVoidPtr = isRHSPointer && RHSPointeeTy->isVoidType(); if (isLHSVoidPtr || isRHSVoidPtr) { if (!isRHSVoidPtr) diagnoseArithmeticOnVoidPointer(S, Loc, LHSExpr); else if (!isLHSVoidPtr) diagnoseArithmeticOnVoidPointer(S, Loc, RHSExpr); else diagnoseArithmeticOnTwoVoidPointers(S, Loc, LHSExpr, RHSExpr); return !S.getLangOpts().CPlusPlus; } bool isLHSFuncPtr = isLHSPointer && LHSPointeeTy->isFunctionType(); bool isRHSFuncPtr = isRHSPointer && RHSPointeeTy->isFunctionType(); if (isLHSFuncPtr || isRHSFuncPtr) { if (!isRHSFuncPtr) diagnoseArithmeticOnFunctionPointer(S, Loc, LHSExpr); else if (!isLHSFuncPtr) diagnoseArithmeticOnFunctionPointer(S, Loc, RHSExpr); else diagnoseArithmeticOnTwoFunctionPointers(S, Loc, LHSExpr, RHSExpr); return !S.getLangOpts().CPlusPlus; } if (isLHSPointer && checkArithmeticIncompletePointerType(S, Loc, LHSExpr)) return false; if (isRHSPointer && checkArithmeticIncompletePointerType(S, Loc, RHSExpr)) return false; return true; } /// diagnoseStringPlusInt - Emit a warning when adding an integer to a string /// literal. static void diagnoseStringPlusInt(Sema &Self, SourceLocation OpLoc, Expr *LHSExpr, Expr *RHSExpr) { StringLiteral* StrExpr = dyn_cast(LHSExpr->IgnoreImpCasts()); Expr* IndexExpr = RHSExpr; if (!StrExpr) { StrExpr = dyn_cast(RHSExpr->IgnoreImpCasts()); IndexExpr = LHSExpr; } bool IsStringPlusInt = StrExpr && IndexExpr->getType()->isIntegralOrUnscopedEnumerationType(); if (!IsStringPlusInt || IndexExpr->isValueDependent()) return; llvm::APSInt index; if (IndexExpr->EvaluateAsInt(index, Self.getASTContext())) { unsigned StrLenWithNull = StrExpr->getLength() + 1; if (index.isNonNegative() && index <= llvm::APSInt(llvm::APInt(index.getBitWidth(), StrLenWithNull), index.isUnsigned())) return; } SourceRange DiagRange(LHSExpr->getLocStart(), RHSExpr->getLocEnd()); Self.Diag(OpLoc, diag::warn_string_plus_int) << DiagRange << IndexExpr->IgnoreImpCasts()->getType(); // Only print a fixit for "str" + int, not for int + "str". if (IndexExpr == RHSExpr) { SourceLocation EndLoc = Self.getLocForEndOfToken(RHSExpr->getLocEnd()); Self.Diag(OpLoc, diag::note_string_plus_scalar_silence) << FixItHint::CreateInsertion(LHSExpr->getLocStart(), "&") << FixItHint::CreateReplacement(SourceRange(OpLoc), "[") << FixItHint::CreateInsertion(EndLoc, "]"); } else Self.Diag(OpLoc, diag::note_string_plus_scalar_silence); } /// \brief Emit a warning when adding a char literal to a string. static void diagnoseStringPlusChar(Sema &Self, SourceLocation OpLoc, Expr *LHSExpr, Expr *RHSExpr) { const Expr *StringRefExpr = LHSExpr; const CharacterLiteral *CharExpr = dyn_cast(RHSExpr->IgnoreImpCasts()); if (!CharExpr) { CharExpr = dyn_cast(LHSExpr->IgnoreImpCasts()); StringRefExpr = RHSExpr; } if (!CharExpr || !StringRefExpr) return; const QualType StringType = StringRefExpr->getType(); // Return if not a PointerType. if (!StringType->isAnyPointerType()) return; // Return if not a CharacterType. if (!StringType->getPointeeType()->isAnyCharacterType()) return; ASTContext &Ctx = Self.getASTContext(); SourceRange DiagRange(LHSExpr->getLocStart(), RHSExpr->getLocEnd()); const QualType CharType = CharExpr->getType(); if (!CharType->isAnyCharacterType() && CharType->isIntegerType() && llvm::isUIntN(Ctx.getCharWidth(), CharExpr->getValue())) { Self.Diag(OpLoc, diag::warn_string_plus_char) << DiagRange << Ctx.CharTy; } else { Self.Diag(OpLoc, diag::warn_string_plus_char) << DiagRange << CharExpr->getType(); } // Only print a fixit for str + char, not for char + str. if (isa(RHSExpr->IgnoreImpCasts())) { SourceLocation EndLoc = Self.getLocForEndOfToken(RHSExpr->getLocEnd()); Self.Diag(OpLoc, diag::note_string_plus_scalar_silence) << FixItHint::CreateInsertion(LHSExpr->getLocStart(), "&") << FixItHint::CreateReplacement(SourceRange(OpLoc), "[") << FixItHint::CreateInsertion(EndLoc, "]"); } else { Self.Diag(OpLoc, diag::note_string_plus_scalar_silence); } } /// \brief Emit error when two pointers are incompatible. static void diagnosePointerIncompatibility(Sema &S, SourceLocation Loc, Expr *LHSExpr, Expr *RHSExpr) { assert(LHSExpr->getType()->isAnyPointerType()); assert(RHSExpr->getType()->isAnyPointerType()); S.Diag(Loc, diag::err_typecheck_sub_ptr_compatible) << LHSExpr->getType() << RHSExpr->getType() << LHSExpr->getSourceRange() << RHSExpr->getSourceRange(); } // C99 6.5.6 QualType Sema::CheckAdditionOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, BinaryOperatorKind Opc, QualType* CompLHSTy) { checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/false); if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) { QualType compType = CheckVectorOperands( LHS, RHS, Loc, CompLHSTy, /*AllowBothBool*/getLangOpts().AltiVec, /*AllowBoolConversions*/getLangOpts().ZVector); if (CompLHSTy) *CompLHSTy = compType; return compType; } QualType compType = UsualArithmeticConversions(LHS, RHS, CompLHSTy); if (LHS.isInvalid() || RHS.isInvalid()) return QualType(); // Diagnose "string literal" '+' int and string '+' "char literal". if (Opc == BO_Add) { diagnoseStringPlusInt(*this, Loc, LHS.get(), RHS.get()); diagnoseStringPlusChar(*this, Loc, LHS.get(), RHS.get()); } // handle the common case first (both operands are arithmetic). if (!compType.isNull() && compType->isArithmeticType()) { if (CompLHSTy) *CompLHSTy = compType; return compType; } // Type-checking. Ultimately the pointer's going to be in PExp; // note that we bias towards the LHS being the pointer. Expr *PExp = LHS.get(), *IExp = RHS.get(); bool isObjCPointer; if (PExp->getType()->isPointerType()) { isObjCPointer = false; } else if (PExp->getType()->isObjCObjectPointerType()) { isObjCPointer = true; } else { std::swap(PExp, IExp); if (PExp->getType()->isPointerType()) { isObjCPointer = false; } else if (PExp->getType()->isObjCObjectPointerType()) { isObjCPointer = true; } else { return InvalidOperands(Loc, LHS, RHS); } } assert(PExp->getType()->isAnyPointerType()); if (!IExp->getType()->isIntegerType()) return InvalidOperands(Loc, LHS, RHS); // Adding to a null pointer results in undefined behavior. if (PExp->IgnoreParenCasts()->isNullPointerConstant( Context, Expr::NPC_ValueDependentIsNotNull)) { // In C++ adding zero to a null pointer is defined. llvm::APSInt KnownVal; if (!getLangOpts().CPlusPlus || (!IExp->isValueDependent() && (!IExp->EvaluateAsInt(KnownVal, Context) || KnownVal != 0))) { // Check the conditions to see if this is the 'p = nullptr + n' idiom. bool IsGNUIdiom = BinaryOperator::isNullPointerArithmeticExtension( Context, BO_Add, PExp, IExp); diagnoseArithmeticOnNullPointer(*this, Loc, PExp, IsGNUIdiom); } } if (!checkArithmeticOpPointerOperand(*this, Loc, PExp)) return QualType(); if (isObjCPointer && checkArithmeticOnObjCPointer(*this, Loc, PExp)) return QualType(); // Check array bounds for pointer arithemtic CheckArrayAccess(PExp, IExp); if (CompLHSTy) { QualType LHSTy = Context.isPromotableBitField(LHS.get()); if (LHSTy.isNull()) { LHSTy = LHS.get()->getType(); if (LHSTy->isPromotableIntegerType()) LHSTy = Context.getPromotedIntegerType(LHSTy); } *CompLHSTy = LHSTy; } return PExp->getType(); } // C99 6.5.6 QualType Sema::CheckSubtractionOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, QualType* CompLHSTy) { checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/false); if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) { QualType compType = CheckVectorOperands( LHS, RHS, Loc, CompLHSTy, /*AllowBothBool*/getLangOpts().AltiVec, /*AllowBoolConversions*/getLangOpts().ZVector); if (CompLHSTy) *CompLHSTy = compType; return compType; } QualType compType = UsualArithmeticConversions(LHS, RHS, CompLHSTy); if (LHS.isInvalid() || RHS.isInvalid()) return QualType(); // Enforce type constraints: C99 6.5.6p3. // Handle the common case first (both operands are arithmetic). if (!compType.isNull() && compType->isArithmeticType()) { if (CompLHSTy) *CompLHSTy = compType; return compType; } // Either ptr - int or ptr - ptr. if (LHS.get()->getType()->isAnyPointerType()) { QualType lpointee = LHS.get()->getType()->getPointeeType(); // Diagnose bad cases where we step over interface counts. if (LHS.get()->getType()->isObjCObjectPointerType() && checkArithmeticOnObjCPointer(*this, Loc, LHS.get())) return QualType(); // The result type of a pointer-int computation is the pointer type. if (RHS.get()->getType()->isIntegerType()) { // Subtracting from a null pointer should produce a warning. // The last argument to the diagnose call says this doesn't match the // GNU int-to-pointer idiom. if (LHS.get()->IgnoreParenCasts()->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNotNull)) { // In C++ adding zero to a null pointer is defined. llvm::APSInt KnownVal; if (!getLangOpts().CPlusPlus || (!RHS.get()->isValueDependent() && (!RHS.get()->EvaluateAsInt(KnownVal, Context) || KnownVal != 0))) { diagnoseArithmeticOnNullPointer(*this, Loc, LHS.get(), false); } } if (!checkArithmeticOpPointerOperand(*this, Loc, LHS.get())) return QualType(); // Check array bounds for pointer arithemtic CheckArrayAccess(LHS.get(), RHS.get(), /*ArraySubscriptExpr*/nullptr, /*AllowOnePastEnd*/true, /*IndexNegated*/true); if (CompLHSTy) *CompLHSTy = LHS.get()->getType(); return LHS.get()->getType(); } // Handle pointer-pointer subtractions. if (const PointerType *RHSPTy = RHS.get()->getType()->getAs()) { QualType rpointee = RHSPTy->getPointeeType(); if (getLangOpts().CPlusPlus) { // Pointee types must be the same: C++ [expr.add] if (!Context.hasSameUnqualifiedType(lpointee, rpointee)) { diagnosePointerIncompatibility(*this, Loc, LHS.get(), RHS.get()); } } else { // Pointee types must be compatible C99 6.5.6p3 if (!Context.typesAreCompatible( Context.getCanonicalType(lpointee).getUnqualifiedType(), Context.getCanonicalType(rpointee).getUnqualifiedType())) { diagnosePointerIncompatibility(*this, Loc, LHS.get(), RHS.get()); return QualType(); } } if (!checkArithmeticBinOpPointerOperands(*this, Loc, LHS.get(), RHS.get())) return QualType(); // FIXME: Add warnings for nullptr - ptr. // The pointee type may have zero size. As an extension, a structure or // union may have zero size or an array may have zero length. In this // case subtraction does not make sense. if (!rpointee->isVoidType() && !rpointee->isFunctionType()) { CharUnits ElementSize = Context.getTypeSizeInChars(rpointee); if (ElementSize.isZero()) { Diag(Loc,diag::warn_sub_ptr_zero_size_types) << rpointee.getUnqualifiedType() << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); } } if (CompLHSTy) *CompLHSTy = LHS.get()->getType(); return Context.getPointerDiffType(); } } return InvalidOperands(Loc, LHS, RHS); } static bool isScopedEnumerationType(QualType T) { if (const EnumType *ET = T->getAs()) return ET->getDecl()->isScoped(); return false; } static void DiagnoseBadShiftValues(Sema& S, ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, BinaryOperatorKind Opc, QualType LHSType) { // OpenCL 6.3j: shift values are effectively % word size of LHS (more defined), // so skip remaining warnings as we don't want to modify values within Sema. if (S.getLangOpts().OpenCL) return; llvm::APSInt Right; // Check right/shifter operand if (RHS.get()->isValueDependent() || !RHS.get()->EvaluateAsInt(Right, S.Context)) return; if (Right.isNegative()) { S.DiagRuntimeBehavior(Loc, RHS.get(), S.PDiag(diag::warn_shift_negative) << RHS.get()->getSourceRange()); return; } llvm::APInt LeftBits(Right.getBitWidth(), S.Context.getTypeSize(LHS.get()->getType())); if (Right.uge(LeftBits)) { S.DiagRuntimeBehavior(Loc, RHS.get(), S.PDiag(diag::warn_shift_gt_typewidth) << RHS.get()->getSourceRange()); return; } if (Opc != BO_Shl) return; // When left shifting an ICE which is signed, we can check for overflow which // according to C++ has undefined behavior ([expr.shift] 5.8/2). Unsigned // integers have defined behavior modulo one more than the maximum value // representable in the result type, so never warn for those. llvm::APSInt Left; if (LHS.get()->isValueDependent() || LHSType->hasUnsignedIntegerRepresentation() || !LHS.get()->EvaluateAsInt(Left, S.Context)) return; // If LHS does not have a signed type and non-negative value // then, the behavior is undefined. Warn about it. if (Left.isNegative() && !S.getLangOpts().isSignedOverflowDefined()) { S.DiagRuntimeBehavior(Loc, LHS.get(), S.PDiag(diag::warn_shift_lhs_negative) << LHS.get()->getSourceRange()); return; } llvm::APInt ResultBits = static_cast(Right) + Left.getMinSignedBits(); if (LeftBits.uge(ResultBits)) return; llvm::APSInt Result = Left.extend(ResultBits.getLimitedValue()); Result = Result.shl(Right); // Print the bit representation of the signed integer as an unsigned // hexadecimal number. SmallString<40> HexResult; Result.toString(HexResult, 16, /*Signed =*/false, /*Literal =*/true); // If we are only missing a sign bit, this is less likely to result in actual // bugs -- if the result is cast back to an unsigned type, it will have the // expected value. Thus we place this behind a different warning that can be // turned off separately if needed. if (LeftBits == ResultBits - 1) { S.Diag(Loc, diag::warn_shift_result_sets_sign_bit) << HexResult << LHSType << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return; } S.Diag(Loc, diag::warn_shift_result_gt_typewidth) << HexResult.str() << Result.getMinSignedBits() << LHSType << Left.getBitWidth() << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); } /// \brief Return the resulting type when a vector is shifted /// by a scalar or vector shift amount. static QualType checkVectorShift(Sema &S, ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, bool IsCompAssign) { // OpenCL v1.1 s6.3.j says RHS can be a vector only if LHS is a vector. if ((S.LangOpts.OpenCL || S.LangOpts.ZVector) && !LHS.get()->getType()->isVectorType()) { S.Diag(Loc, diag::err_shift_rhs_only_vector) << RHS.get()->getType() << LHS.get()->getType() << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } if (!IsCompAssign) { LHS = S.UsualUnaryConversions(LHS.get()); if (LHS.isInvalid()) return QualType(); } RHS = S.UsualUnaryConversions(RHS.get()); if (RHS.isInvalid()) return QualType(); QualType LHSType = LHS.get()->getType(); // Note that LHS might be a scalar because the routine calls not only in // OpenCL case. const VectorType *LHSVecTy = LHSType->getAs(); QualType LHSEleType = LHSVecTy ? LHSVecTy->getElementType() : LHSType; // Note that RHS might not be a vector. QualType RHSType = RHS.get()->getType(); const VectorType *RHSVecTy = RHSType->getAs(); QualType RHSEleType = RHSVecTy ? RHSVecTy->getElementType() : RHSType; // The operands need to be integers. if (!LHSEleType->isIntegerType()) { S.Diag(Loc, diag::err_typecheck_expect_int) << LHS.get()->getType() << LHS.get()->getSourceRange(); return QualType(); } if (!RHSEleType->isIntegerType()) { S.Diag(Loc, diag::err_typecheck_expect_int) << RHS.get()->getType() << RHS.get()->getSourceRange(); return QualType(); } if (!LHSVecTy) { assert(RHSVecTy); if (IsCompAssign) return RHSType; if (LHSEleType != RHSEleType) { LHS = S.ImpCastExprToType(LHS.get(),RHSEleType, CK_IntegralCast); LHSEleType = RHSEleType; } QualType VecTy = S.Context.getExtVectorType(LHSEleType, RHSVecTy->getNumElements()); LHS = S.ImpCastExprToType(LHS.get(), VecTy, CK_VectorSplat); LHSType = VecTy; } else if (RHSVecTy) { // OpenCL v1.1 s6.3.j says that for vector types, the operators // are applied component-wise. So if RHS is a vector, then ensure // that the number of elements is the same as LHS... if (RHSVecTy->getNumElements() != LHSVecTy->getNumElements()) { S.Diag(Loc, diag::err_typecheck_vector_lengths_not_equal) << LHS.get()->getType() << RHS.get()->getType() << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } if (!S.LangOpts.OpenCL && !S.LangOpts.ZVector) { const BuiltinType *LHSBT = LHSEleType->getAs(); const BuiltinType *RHSBT = RHSEleType->getAs(); if (LHSBT != RHSBT && S.Context.getTypeSize(LHSBT) != S.Context.getTypeSize(RHSBT)) { S.Diag(Loc, diag::warn_typecheck_vector_element_sizes_not_equal) << LHS.get()->getType() << RHS.get()->getType() << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); } } } else { // ...else expand RHS to match the number of elements in LHS. QualType VecTy = S.Context.getExtVectorType(RHSEleType, LHSVecTy->getNumElements()); RHS = S.ImpCastExprToType(RHS.get(), VecTy, CK_VectorSplat); } return LHSType; } // C99 6.5.7 QualType Sema::CheckShiftOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, BinaryOperatorKind Opc, bool IsCompAssign) { checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/false); // Vector shifts promote their scalar inputs to vector type. if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) { if (LangOpts.ZVector) { // The shift operators for the z vector extensions work basically // like general shifts, except that neither the LHS nor the RHS is // allowed to be a "vector bool". if (auto LHSVecType = LHS.get()->getType()->getAs()) if (LHSVecType->getVectorKind() == VectorType::AltiVecBool) return InvalidOperands(Loc, LHS, RHS); if (auto RHSVecType = RHS.get()->getType()->getAs()) if (RHSVecType->getVectorKind() == VectorType::AltiVecBool) return InvalidOperands(Loc, LHS, RHS); } return checkVectorShift(*this, LHS, RHS, Loc, IsCompAssign); } // Shifts don't perform usual arithmetic conversions, they just do integer // promotions on each operand. C99 6.5.7p3 // For the LHS, do usual unary conversions, but then reset them away // if this is a compound assignment. ExprResult OldLHS = LHS; LHS = UsualUnaryConversions(LHS.get()); if (LHS.isInvalid()) return QualType(); QualType LHSType = LHS.get()->getType(); if (IsCompAssign) LHS = OldLHS; // The RHS is simpler. RHS = UsualUnaryConversions(RHS.get()); if (RHS.isInvalid()) return QualType(); QualType RHSType = RHS.get()->getType(); // C99 6.5.7p2: Each of the operands shall have integer type. if (!LHSType->hasIntegerRepresentation() || !RHSType->hasIntegerRepresentation()) return InvalidOperands(Loc, LHS, RHS); // C++0x: Don't allow scoped enums. FIXME: Use something better than // hasIntegerRepresentation() above instead of this. if (isScopedEnumerationType(LHSType) || isScopedEnumerationType(RHSType)) { return InvalidOperands(Loc, LHS, RHS); } // Sanity-check shift operands DiagnoseBadShiftValues(*this, LHS, RHS, Loc, Opc, LHSType); // "The type of the result is that of the promoted left operand." return LHSType; } static bool IsWithinTemplateSpecialization(Decl *D) { if (DeclContext *DC = D->getDeclContext()) { if (isa(DC)) return true; if (FunctionDecl *FD = dyn_cast(DC)) return FD->isFunctionTemplateSpecialization(); } return false; } /// If two different enums are compared, raise a warning. static void checkEnumComparison(Sema &S, SourceLocation Loc, Expr *LHS, Expr *RHS) { QualType LHSStrippedType = LHS->IgnoreParenImpCasts()->getType(); QualType RHSStrippedType = RHS->IgnoreParenImpCasts()->getType(); const EnumType *LHSEnumType = LHSStrippedType->getAs(); if (!LHSEnumType) return; const EnumType *RHSEnumType = RHSStrippedType->getAs(); if (!RHSEnumType) return; // Ignore anonymous enums. if (!LHSEnumType->getDecl()->getIdentifier() && !LHSEnumType->getDecl()->getTypedefNameForAnonDecl()) return; if (!RHSEnumType->getDecl()->getIdentifier() && !RHSEnumType->getDecl()->getTypedefNameForAnonDecl()) return; if (S.Context.hasSameUnqualifiedType(LHSStrippedType, RHSStrippedType)) return; S.Diag(Loc, diag::warn_comparison_of_mixed_enum_types) << LHSStrippedType << RHSStrippedType << LHS->getSourceRange() << RHS->getSourceRange(); } /// \brief Diagnose bad pointer comparisons. static void diagnoseDistinctPointerComparison(Sema &S, SourceLocation Loc, ExprResult &LHS, ExprResult &RHS, bool IsError) { S.Diag(Loc, IsError ? diag::err_typecheck_comparison_of_distinct_pointers : diag::ext_typecheck_comparison_of_distinct_pointers) << LHS.get()->getType() << RHS.get()->getType() << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); } /// \brief Returns false if the pointers are converted to a composite type, /// true otherwise. static bool convertPointersToCompositeType(Sema &S, SourceLocation Loc, ExprResult &LHS, ExprResult &RHS) { // C++ [expr.rel]p2: // [...] Pointer conversions (4.10) and qualification // conversions (4.4) are performed on pointer operands (or on // a pointer operand and a null pointer constant) to bring // them to their composite pointer type. [...] // // C++ [expr.eq]p1 uses the same notion for (in)equality // comparisons of pointers. QualType LHSType = LHS.get()->getType(); QualType RHSType = RHS.get()->getType(); assert(LHSType->isPointerType() || RHSType->isPointerType() || LHSType->isMemberPointerType() || RHSType->isMemberPointerType()); QualType T = S.FindCompositePointerType(Loc, LHS, RHS); if (T.isNull()) { if ((LHSType->isPointerType() || LHSType->isMemberPointerType()) && (RHSType->isPointerType() || RHSType->isMemberPointerType())) diagnoseDistinctPointerComparison(S, Loc, LHS, RHS, /*isError*/true); else S.InvalidOperands(Loc, LHS, RHS); return true; } LHS = S.ImpCastExprToType(LHS.get(), T, CK_BitCast); RHS = S.ImpCastExprToType(RHS.get(), T, CK_BitCast); return false; } static void diagnoseFunctionPointerToVoidComparison(Sema &S, SourceLocation Loc, ExprResult &LHS, ExprResult &RHS, bool IsError) { S.Diag(Loc, IsError ? diag::err_typecheck_comparison_of_fptr_to_void : diag::ext_typecheck_comparison_of_fptr_to_void) << LHS.get()->getType() << RHS.get()->getType() << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); } static bool isObjCObjectLiteral(ExprResult &E) { switch (E.get()->IgnoreParenImpCasts()->getStmtClass()) { case Stmt::ObjCArrayLiteralClass: case Stmt::ObjCDictionaryLiteralClass: case Stmt::ObjCStringLiteralClass: case Stmt::ObjCBoxedExprClass: return true; default: // Note that ObjCBoolLiteral is NOT an object literal! return false; } } static bool hasIsEqualMethod(Sema &S, const Expr *LHS, const Expr *RHS) { const ObjCObjectPointerType *Type = LHS->getType()->getAs(); // If this is not actually an Objective-C object, bail out. if (!Type) return false; // Get the LHS object's interface type. QualType InterfaceType = Type->getPointeeType(); // If the RHS isn't an Objective-C object, bail out. if (!RHS->getType()->isObjCObjectPointerType()) return false; // Try to find the -isEqual: method. Selector IsEqualSel = S.NSAPIObj->getIsEqualSelector(); ObjCMethodDecl *Method = S.LookupMethodInObjectType(IsEqualSel, InterfaceType, /*instance=*/true); if (!Method) { if (Type->isObjCIdType()) { // For 'id', just check the global pool. Method = S.LookupInstanceMethodInGlobalPool(IsEqualSel, SourceRange(), /*receiverId=*/true); } else { // Check protocols. Method = S.LookupMethodInQualifiedType(IsEqualSel, Type, /*instance=*/true); } } if (!Method) return false; QualType T = Method->parameters()[0]->getType(); if (!T->isObjCObjectPointerType()) return false; QualType R = Method->getReturnType(); if (!R->isScalarType()) return false; return true; } Sema::ObjCLiteralKind Sema::CheckLiteralKind(Expr *FromE) { FromE = FromE->IgnoreParenImpCasts(); switch (FromE->getStmtClass()) { default: break; case Stmt::ObjCStringLiteralClass: // "string literal" return LK_String; case Stmt::ObjCArrayLiteralClass: // "array literal" return LK_Array; case Stmt::ObjCDictionaryLiteralClass: // "dictionary literal" return LK_Dictionary; case Stmt::BlockExprClass: return LK_Block; case Stmt::ObjCBoxedExprClass: { Expr *Inner = cast(FromE)->getSubExpr()->IgnoreParens(); switch (Inner->getStmtClass()) { case Stmt::IntegerLiteralClass: case Stmt::FloatingLiteralClass: case Stmt::CharacterLiteralClass: case Stmt::ObjCBoolLiteralExprClass: case Stmt::CXXBoolLiteralExprClass: // "numeric literal" return LK_Numeric; case Stmt::ImplicitCastExprClass: { CastKind CK = cast(Inner)->getCastKind(); // Boolean literals can be represented by implicit casts. if (CK == CK_IntegralToBoolean || CK == CK_IntegralCast) return LK_Numeric; break; } default: break; } return LK_Boxed; } } return LK_None; } static void diagnoseObjCLiteralComparison(Sema &S, SourceLocation Loc, ExprResult &LHS, ExprResult &RHS, BinaryOperator::Opcode Opc){ Expr *Literal; Expr *Other; if (isObjCObjectLiteral(LHS)) { Literal = LHS.get(); Other = RHS.get(); } else { Literal = RHS.get(); Other = LHS.get(); } // Don't warn on comparisons against nil. Other = Other->IgnoreParenCasts(); if (Other->isNullPointerConstant(S.getASTContext(), Expr::NPC_ValueDependentIsNotNull)) return; // This should be kept in sync with warn_objc_literal_comparison. // LK_String should always be after the other literals, since it has its own // warning flag. Sema::ObjCLiteralKind LiteralKind = S.CheckLiteralKind(Literal); assert(LiteralKind != Sema::LK_Block); if (LiteralKind == Sema::LK_None) { llvm_unreachable("Unknown Objective-C object literal kind"); } if (LiteralKind == Sema::LK_String) S.Diag(Loc, diag::warn_objc_string_literal_comparison) << Literal->getSourceRange(); else S.Diag(Loc, diag::warn_objc_literal_comparison) << LiteralKind << Literal->getSourceRange(); if (BinaryOperator::isEqualityOp(Opc) && hasIsEqualMethod(S, LHS.get(), RHS.get())) { SourceLocation Start = LHS.get()->getLocStart(); SourceLocation End = S.getLocForEndOfToken(RHS.get()->getLocEnd()); CharSourceRange OpRange = CharSourceRange::getCharRange(Loc, S.getLocForEndOfToken(Loc)); S.Diag(Loc, diag::note_objc_literal_comparison_isequal) << FixItHint::CreateInsertion(Start, Opc == BO_EQ ? "[" : "![") << FixItHint::CreateReplacement(OpRange, " isEqual:") << FixItHint::CreateInsertion(End, "]"); } } /// Warns on !x < y, !x & y where !(x < y), !(x & y) was probably intended. static void diagnoseLogicalNotOnLHSofCheck(Sema &S, ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, BinaryOperatorKind Opc) { // Check that left hand side is !something. UnaryOperator *UO = dyn_cast(LHS.get()->IgnoreImpCasts()); if (!UO || UO->getOpcode() != UO_LNot) return; // Only check if the right hand side is non-bool arithmetic type. if (RHS.get()->isKnownToHaveBooleanValue()) return; // Make sure that the something in !something is not bool. Expr *SubExpr = UO->getSubExpr()->IgnoreImpCasts(); if (SubExpr->isKnownToHaveBooleanValue()) return; // Emit warning. bool IsBitwiseOp = Opc == BO_And || Opc == BO_Or || Opc == BO_Xor; S.Diag(UO->getOperatorLoc(), diag::warn_logical_not_on_lhs_of_check) << Loc << IsBitwiseOp; // First note suggest !(x < y) SourceLocation FirstOpen = SubExpr->getLocStart(); SourceLocation FirstClose = RHS.get()->getLocEnd(); FirstClose = S.getLocForEndOfToken(FirstClose); if (FirstClose.isInvalid()) FirstOpen = SourceLocation(); S.Diag(UO->getOperatorLoc(), diag::note_logical_not_fix) << IsBitwiseOp << FixItHint::CreateInsertion(FirstOpen, "(") << FixItHint::CreateInsertion(FirstClose, ")"); // Second note suggests (!x) < y SourceLocation SecondOpen = LHS.get()->getLocStart(); SourceLocation SecondClose = LHS.get()->getLocEnd(); SecondClose = S.getLocForEndOfToken(SecondClose); if (SecondClose.isInvalid()) SecondOpen = SourceLocation(); S.Diag(UO->getOperatorLoc(), diag::note_logical_not_silence_with_parens) << FixItHint::CreateInsertion(SecondOpen, "(") << FixItHint::CreateInsertion(SecondClose, ")"); } // Get the decl for a simple expression: a reference to a variable, // an implicit C++ field reference, or an implicit ObjC ivar reference. static ValueDecl *getCompareDecl(Expr *E) { if (DeclRefExpr* DR = dyn_cast(E)) return DR->getDecl(); if (ObjCIvarRefExpr* Ivar = dyn_cast(E)) { if (Ivar->isFreeIvar()) return Ivar->getDecl(); } if (MemberExpr* Mem = dyn_cast(E)) { if (Mem->isImplicitAccess()) return Mem->getMemberDecl(); } return nullptr; } // C99 6.5.8, C++ [expr.rel] QualType Sema::CheckCompareOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, BinaryOperatorKind Opc, bool IsRelational) { checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/true); // Handle vector comparisons separately. if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) return CheckVectorCompareOperands(LHS, RHS, Loc, IsRelational); QualType LHSType = LHS.get()->getType(); QualType RHSType = RHS.get()->getType(); Expr *LHSStripped = LHS.get()->IgnoreParenImpCasts(); Expr *RHSStripped = RHS.get()->IgnoreParenImpCasts(); checkEnumComparison(*this, Loc, LHS.get(), RHS.get()); diagnoseLogicalNotOnLHSofCheck(*this, LHS, RHS, Loc, Opc); if (!LHSType->hasFloatingRepresentation() && !(LHSType->isBlockPointerType() && IsRelational) && !LHS.get()->getLocStart().isMacroID() && !RHS.get()->getLocStart().isMacroID() && !inTemplateInstantiation()) { // For non-floating point types, check for self-comparisons of the form // x == x, x != x, x < x, etc. These always evaluate to a constant, and // often indicate logic errors in the program. // // NOTE: Don't warn about comparison expressions resulting from macro // expansion. Also don't warn about comparisons which are only self // comparisons within a template specialization. The warnings should catch // obvious cases in the definition of the template anyways. The idea is to // warn when the typed comparison operator will always evaluate to the same // result. ValueDecl *DL = getCompareDecl(LHSStripped); ValueDecl *DR = getCompareDecl(RHSStripped); if (DL && DR && DL == DR && !IsWithinTemplateSpecialization(DL)) { DiagRuntimeBehavior(Loc, nullptr, PDiag(diag::warn_comparison_always) << 0 // self- << (Opc == BO_EQ || Opc == BO_LE || Opc == BO_GE)); } else if (DL && DR && LHSType->isArrayType() && RHSType->isArrayType() && !DL->getType()->isReferenceType() && !DR->getType()->isReferenceType()) { // what is it always going to eval to? char always_evals_to; switch(Opc) { case BO_EQ: // e.g. array1 == array2 always_evals_to = 0; // false break; case BO_NE: // e.g. array1 != array2 always_evals_to = 1; // true break; default: // best we can say is 'a constant' always_evals_to = 2; // e.g. array1 <= array2 break; } DiagRuntimeBehavior(Loc, nullptr, PDiag(diag::warn_comparison_always) << 1 // array << always_evals_to); } if (isa(LHSStripped)) LHSStripped = LHSStripped->IgnoreParenCasts(); if (isa(RHSStripped)) RHSStripped = RHSStripped->IgnoreParenCasts(); // Warn about comparisons against a string constant (unless the other // operand is null), the user probably wants strcmp. Expr *literalString = nullptr; Expr *literalStringStripped = nullptr; if ((isa(LHSStripped) || isa(LHSStripped)) && !RHSStripped->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull)) { literalString = LHS.get(); literalStringStripped = LHSStripped; } else if ((isa(RHSStripped) || isa(RHSStripped)) && !LHSStripped->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull)) { literalString = RHS.get(); literalStringStripped = RHSStripped; } if (literalString) { DiagRuntimeBehavior(Loc, nullptr, PDiag(diag::warn_stringcompare) << isa(literalStringStripped) << literalString->getSourceRange()); } } // C99 6.5.8p3 / C99 6.5.9p4 UsualArithmeticConversions(LHS, RHS); if (LHS.isInvalid() || RHS.isInvalid()) return QualType(); LHSType = LHS.get()->getType(); RHSType = RHS.get()->getType(); // The result of comparisons is 'bool' in C++, 'int' in C. QualType ResultTy = Context.getLogicalOperationType(); if (IsRelational) { if (LHSType->isRealType() && RHSType->isRealType()) return ResultTy; } else { // Check for comparisons of floating point operands using != and ==. if (LHSType->hasFloatingRepresentation()) CheckFloatComparison(Loc, LHS.get(), RHS.get()); if (LHSType->isArithmeticType() && RHSType->isArithmeticType()) return ResultTy; } const Expr::NullPointerConstantKind LHSNullKind = LHS.get()->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull); const Expr::NullPointerConstantKind RHSNullKind = RHS.get()->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull); bool LHSIsNull = LHSNullKind != Expr::NPCK_NotNull; bool RHSIsNull = RHSNullKind != Expr::NPCK_NotNull; if (!IsRelational && LHSIsNull != RHSIsNull) { bool IsEquality = Opc == BO_EQ; if (RHSIsNull) DiagnoseAlwaysNonNullPointer(LHS.get(), RHSNullKind, IsEquality, RHS.get()->getSourceRange()); else DiagnoseAlwaysNonNullPointer(RHS.get(), LHSNullKind, IsEquality, LHS.get()->getSourceRange()); } if ((LHSType->isIntegerType() && !LHSIsNull) || (RHSType->isIntegerType() && !RHSIsNull)) { // Skip normal pointer conversion checks in this case; we have better // diagnostics for this below. } else if (getLangOpts().CPlusPlus) { // Equality comparison of a function pointer to a void pointer is invalid, // but we allow it as an extension. // FIXME: If we really want to allow this, should it be part of composite // pointer type computation so it works in conditionals too? if (!IsRelational && ((LHSType->isFunctionPointerType() && RHSType->isVoidPointerType()) || (RHSType->isFunctionPointerType() && LHSType->isVoidPointerType()))) { // This is a gcc extension compatibility comparison. // In a SFINAE context, we treat this as a hard error to maintain // conformance with the C++ standard. diagnoseFunctionPointerToVoidComparison( *this, Loc, LHS, RHS, /*isError*/ (bool)isSFINAEContext()); if (isSFINAEContext()) return QualType(); RHS = ImpCastExprToType(RHS.get(), LHSType, CK_BitCast); return ResultTy; } // C++ [expr.eq]p2: // If at least one operand is a pointer [...] bring them to their // composite pointer type. // C++ [expr.rel]p2: // If both operands are pointers, [...] bring them to their composite // pointer type. if ((int)LHSType->isPointerType() + (int)RHSType->isPointerType() >= (IsRelational ? 2 : 1) && (!LangOpts.ObjCAutoRefCount || !(LHSType->isObjCObjectPointerType() || RHSType->isObjCObjectPointerType()))) { if (convertPointersToCompositeType(*this, Loc, LHS, RHS)) return QualType(); else return ResultTy; } } else if (LHSType->isPointerType() && RHSType->isPointerType()) { // C99 6.5.8p2 // All of the following pointer-related warnings are GCC extensions, except // when handling null pointer constants. QualType LCanPointeeTy = LHSType->castAs()->getPointeeType().getCanonicalType(); QualType RCanPointeeTy = RHSType->castAs()->getPointeeType().getCanonicalType(); // C99 6.5.9p2 and C99 6.5.8p2 if (Context.typesAreCompatible(LCanPointeeTy.getUnqualifiedType(), RCanPointeeTy.getUnqualifiedType())) { // Valid unless a relational comparison of function pointers if (IsRelational && LCanPointeeTy->isFunctionType()) { Diag(Loc, diag::ext_typecheck_ordered_comparison_of_function_pointers) << LHSType << RHSType << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); } } else if (!IsRelational && (LCanPointeeTy->isVoidType() || RCanPointeeTy->isVoidType())) { // Valid unless comparison between non-null pointer and function pointer if ((LCanPointeeTy->isFunctionType() || RCanPointeeTy->isFunctionType()) && !LHSIsNull && !RHSIsNull) diagnoseFunctionPointerToVoidComparison(*this, Loc, LHS, RHS, /*isError*/false); } else { // Invalid diagnoseDistinctPointerComparison(*this, Loc, LHS, RHS, /*isError*/false); } if (LCanPointeeTy != RCanPointeeTy) { // Treat NULL constant as a special case in OpenCL. if (getLangOpts().OpenCL && !LHSIsNull && !RHSIsNull) { const PointerType *LHSPtr = LHSType->getAs(); if (!LHSPtr->isAddressSpaceOverlapping(*RHSType->getAs())) { Diag(Loc, diag::err_typecheck_op_on_nonoverlapping_address_space_pointers) << LHSType << RHSType << 0 /* comparison */ << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); } } LangAS AddrSpaceL = LCanPointeeTy.getAddressSpace(); LangAS AddrSpaceR = RCanPointeeTy.getAddressSpace(); CastKind Kind = AddrSpaceL != AddrSpaceR ? CK_AddressSpaceConversion : CK_BitCast; if (LHSIsNull && !RHSIsNull) LHS = ImpCastExprToType(LHS.get(), RHSType, Kind); else RHS = ImpCastExprToType(RHS.get(), LHSType, Kind); } return ResultTy; } if (getLangOpts().CPlusPlus) { // C++ [expr.eq]p4: // Two operands of type std::nullptr_t or one operand of type // std::nullptr_t and the other a null pointer constant compare equal. if (!IsRelational && LHSIsNull && RHSIsNull) { if (LHSType->isNullPtrType()) { RHS = ImpCastExprToType(RHS.get(), LHSType, CK_NullToPointer); return ResultTy; } if (RHSType->isNullPtrType()) { LHS = ImpCastExprToType(LHS.get(), RHSType, CK_NullToPointer); return ResultTy; } } // Comparison of Objective-C pointers and block pointers against nullptr_t. // These aren't covered by the composite pointer type rules. if (!IsRelational && RHSType->isNullPtrType() && (LHSType->isObjCObjectPointerType() || LHSType->isBlockPointerType())) { RHS = ImpCastExprToType(RHS.get(), LHSType, CK_NullToPointer); return ResultTy; } if (!IsRelational && LHSType->isNullPtrType() && (RHSType->isObjCObjectPointerType() || RHSType->isBlockPointerType())) { LHS = ImpCastExprToType(LHS.get(), RHSType, CK_NullToPointer); return ResultTy; } if (IsRelational && ((LHSType->isNullPtrType() && RHSType->isPointerType()) || (RHSType->isNullPtrType() && LHSType->isPointerType()))) { // HACK: Relational comparison of nullptr_t against a pointer type is // invalid per DR583, but we allow it within std::less<> and friends, // since otherwise common uses of it break. // FIXME: Consider removing this hack once LWG fixes std::less<> and // friends to have std::nullptr_t overload candidates. DeclContext *DC = CurContext; if (isa(DC)) DC = DC->getParent(); if (auto *CTSD = dyn_cast(DC)) { if (CTSD->isInStdNamespace() && llvm::StringSwitch(CTSD->getName()) .Cases("less", "less_equal", "greater", "greater_equal", true) .Default(false)) { if (RHSType->isNullPtrType()) RHS = ImpCastExprToType(RHS.get(), LHSType, CK_NullToPointer); else LHS = ImpCastExprToType(LHS.get(), RHSType, CK_NullToPointer); return ResultTy; } } } // C++ [expr.eq]p2: // If at least one operand is a pointer to member, [...] bring them to // their composite pointer type. if (!IsRelational && (LHSType->isMemberPointerType() || RHSType->isMemberPointerType())) { if (convertPointersToCompositeType(*this, Loc, LHS, RHS)) return QualType(); else return ResultTy; } // Handle scoped enumeration types specifically, since they don't promote // to integers. if (LHS.get()->getType()->isEnumeralType() && Context.hasSameUnqualifiedType(LHS.get()->getType(), RHS.get()->getType())) return ResultTy; } // Handle block pointer types. if (!IsRelational && LHSType->isBlockPointerType() && RHSType->isBlockPointerType()) { QualType lpointee = LHSType->castAs()->getPointeeType(); QualType rpointee = RHSType->castAs()->getPointeeType(); if (!LHSIsNull && !RHSIsNull && !Context.typesAreCompatible(lpointee, rpointee)) { Diag(Loc, diag::err_typecheck_comparison_of_distinct_blocks) << LHSType << RHSType << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); } RHS = ImpCastExprToType(RHS.get(), LHSType, CK_BitCast); return ResultTy; } // Allow block pointers to be compared with null pointer constants. if (!IsRelational && ((LHSType->isBlockPointerType() && RHSType->isPointerType()) || (LHSType->isPointerType() && RHSType->isBlockPointerType()))) { if (!LHSIsNull && !RHSIsNull) { if (!((RHSType->isPointerType() && RHSType->castAs() ->getPointeeType()->isVoidType()) || (LHSType->isPointerType() && LHSType->castAs() ->getPointeeType()->isVoidType()))) Diag(Loc, diag::err_typecheck_comparison_of_distinct_blocks) << LHSType << RHSType << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); } if (LHSIsNull && !RHSIsNull) LHS = ImpCastExprToType(LHS.get(), RHSType, RHSType->isPointerType() ? CK_BitCast : CK_AnyPointerToBlockPointerCast); else RHS = ImpCastExprToType(RHS.get(), LHSType, LHSType->isPointerType() ? CK_BitCast : CK_AnyPointerToBlockPointerCast); return ResultTy; } if (LHSType->isObjCObjectPointerType() || RHSType->isObjCObjectPointerType()) { const PointerType *LPT = LHSType->getAs(); const PointerType *RPT = RHSType->getAs(); if (LPT || RPT) { bool LPtrToVoid = LPT ? LPT->getPointeeType()->isVoidType() : false; bool RPtrToVoid = RPT ? RPT->getPointeeType()->isVoidType() : false; if (!LPtrToVoid && !RPtrToVoid && !Context.typesAreCompatible(LHSType, RHSType)) { diagnoseDistinctPointerComparison(*this, Loc, LHS, RHS, /*isError*/false); } if (LHSIsNull && !RHSIsNull) { Expr *E = LHS.get(); if (getLangOpts().ObjCAutoRefCount) CheckObjCConversion(SourceRange(), RHSType, E, CCK_ImplicitConversion); LHS = ImpCastExprToType(E, RHSType, RPT ? CK_BitCast :CK_CPointerToObjCPointerCast); } else { Expr *E = RHS.get(); if (getLangOpts().ObjCAutoRefCount) CheckObjCConversion(SourceRange(), LHSType, E, CCK_ImplicitConversion, /*Diagnose=*/true, /*DiagnoseCFAudited=*/false, Opc); RHS = ImpCastExprToType(E, LHSType, LPT ? CK_BitCast :CK_CPointerToObjCPointerCast); } return ResultTy; } if (LHSType->isObjCObjectPointerType() && RHSType->isObjCObjectPointerType()) { if (!Context.areComparableObjCPointerTypes(LHSType, RHSType)) diagnoseDistinctPointerComparison(*this, Loc, LHS, RHS, /*isError*/false); if (isObjCObjectLiteral(LHS) || isObjCObjectLiteral(RHS)) diagnoseObjCLiteralComparison(*this, Loc, LHS, RHS, Opc); if (LHSIsNull && !RHSIsNull) LHS = ImpCastExprToType(LHS.get(), RHSType, CK_BitCast); else RHS = ImpCastExprToType(RHS.get(), LHSType, CK_BitCast); return ResultTy; } } if ((LHSType->isAnyPointerType() && RHSType->isIntegerType()) || (LHSType->isIntegerType() && RHSType->isAnyPointerType())) { unsigned DiagID = 0; bool isError = false; if (LangOpts.DebuggerSupport) { // Under a debugger, allow the comparison of pointers to integers, // since users tend to want to compare addresses. } else if ((LHSIsNull && LHSType->isIntegerType()) || (RHSIsNull && RHSType->isIntegerType())) { if (IsRelational) { isError = getLangOpts().CPlusPlus; DiagID = isError ? diag::err_typecheck_ordered_comparison_of_pointer_and_zero : diag::ext_typecheck_ordered_comparison_of_pointer_and_zero; } } else if (getLangOpts().CPlusPlus) { DiagID = diag::err_typecheck_comparison_of_pointer_integer; isError = true; } else if (IsRelational) DiagID = diag::ext_typecheck_ordered_comparison_of_pointer_integer; else DiagID = diag::ext_typecheck_comparison_of_pointer_integer; if (DiagID) { Diag(Loc, DiagID) << LHSType << RHSType << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); if (isError) return QualType(); } if (LHSType->isIntegerType()) LHS = ImpCastExprToType(LHS.get(), RHSType, LHSIsNull ? CK_NullToPointer : CK_IntegralToPointer); else RHS = ImpCastExprToType(RHS.get(), LHSType, RHSIsNull ? CK_NullToPointer : CK_IntegralToPointer); return ResultTy; } // Handle block pointers. if (!IsRelational && RHSIsNull && LHSType->isBlockPointerType() && RHSType->isIntegerType()) { RHS = ImpCastExprToType(RHS.get(), LHSType, CK_NullToPointer); return ResultTy; } if (!IsRelational && LHSIsNull && LHSType->isIntegerType() && RHSType->isBlockPointerType()) { LHS = ImpCastExprToType(LHS.get(), RHSType, CK_NullToPointer); return ResultTy; } if (getLangOpts().OpenCLVersion >= 200) { if (LHSIsNull && RHSType->isQueueT()) { LHS = ImpCastExprToType(LHS.get(), RHSType, CK_NullToPointer); return ResultTy; } if (LHSType->isQueueT() && RHSIsNull) { RHS = ImpCastExprToType(RHS.get(), LHSType, CK_NullToPointer); return ResultTy; } } return InvalidOperands(Loc, LHS, RHS); } // Return a signed ext_vector_type that is of identical size and number of // elements. For floating point vectors, return an integer type of identical // size and number of elements. In the non ext_vector_type case, search from // the largest type to the smallest type to avoid cases where long long == long, // where long gets picked over long long. QualType Sema::GetSignedVectorType(QualType V) { const VectorType *VTy = V->getAs(); unsigned TypeSize = Context.getTypeSize(VTy->getElementType()); if (isa(VTy)) { if (TypeSize == Context.getTypeSize(Context.CharTy)) return Context.getExtVectorType(Context.CharTy, VTy->getNumElements()); else if (TypeSize == Context.getTypeSize(Context.ShortTy)) return Context.getExtVectorType(Context.ShortTy, VTy->getNumElements()); else if (TypeSize == Context.getTypeSize(Context.IntTy)) return Context.getExtVectorType(Context.IntTy, VTy->getNumElements()); else if (TypeSize == Context.getTypeSize(Context.LongTy)) return Context.getExtVectorType(Context.LongTy, VTy->getNumElements()); assert(TypeSize == Context.getTypeSize(Context.LongLongTy) && "Unhandled vector element size in vector compare"); return Context.getExtVectorType(Context.LongLongTy, VTy->getNumElements()); } if (TypeSize == Context.getTypeSize(Context.LongLongTy)) return Context.getVectorType(Context.LongLongTy, VTy->getNumElements(), VectorType::GenericVector); else if (TypeSize == Context.getTypeSize(Context.LongTy)) return Context.getVectorType(Context.LongTy, VTy->getNumElements(), VectorType::GenericVector); else if (TypeSize == Context.getTypeSize(Context.IntTy)) return Context.getVectorType(Context.IntTy, VTy->getNumElements(), VectorType::GenericVector); else if (TypeSize == Context.getTypeSize(Context.ShortTy)) return Context.getVectorType(Context.ShortTy, VTy->getNumElements(), VectorType::GenericVector); assert(TypeSize == Context.getTypeSize(Context.CharTy) && "Unhandled vector element size in vector compare"); return Context.getVectorType(Context.CharTy, VTy->getNumElements(), VectorType::GenericVector); } /// CheckVectorCompareOperands - vector comparisons are a clang extension that /// operates on extended vector types. Instead of producing an IntTy result, /// like a scalar comparison, a vector comparison produces a vector of integer /// types. QualType Sema::CheckVectorCompareOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, bool IsRelational) { // Check to make sure we're operating on vectors of the same type and width, // Allowing one side to be a scalar of element type. QualType vType = CheckVectorOperands(LHS, RHS, Loc, /*isCompAssign*/false, /*AllowBothBool*/true, /*AllowBoolConversions*/getLangOpts().ZVector); if (vType.isNull()) return vType; QualType LHSType = LHS.get()->getType(); // If AltiVec, the comparison results in a numeric type, i.e. // bool for C++, int for C if (getLangOpts().AltiVec && vType->getAs()->getVectorKind() == VectorType::AltiVecVector) return Context.getLogicalOperationType(); // For non-floating point types, check for self-comparisons of the form // x == x, x != x, x < x, etc. These always evaluate to a constant, and // often indicate logic errors in the program. if (!LHSType->hasFloatingRepresentation() && !inTemplateInstantiation()) { if (DeclRefExpr* DRL = dyn_cast(LHS.get()->IgnoreParenImpCasts())) if (DeclRefExpr* DRR = dyn_cast(RHS.get()->IgnoreParenImpCasts())) if (DRL->getDecl() == DRR->getDecl()) DiagRuntimeBehavior(Loc, nullptr, PDiag(diag::warn_comparison_always) << 0 // self- << 2 // "a constant" ); } // Check for comparisons of floating point operands using != and ==. if (!IsRelational && LHSType->hasFloatingRepresentation()) { assert (RHS.get()->getType()->hasFloatingRepresentation()); CheckFloatComparison(Loc, LHS.get(), RHS.get()); } // Return a signed type for the vector. return GetSignedVectorType(vType); } QualType Sema::CheckVectorLogicalOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc) { // Ensure that either both operands are of the same vector type, or // one operand is of a vector type and the other is of its element type. QualType vType = CheckVectorOperands(LHS, RHS, Loc, false, /*AllowBothBool*/true, /*AllowBoolConversions*/false); if (vType.isNull()) return InvalidOperands(Loc, LHS, RHS); if (getLangOpts().OpenCL && getLangOpts().OpenCLVersion < 120 && vType->hasFloatingRepresentation()) return InvalidOperands(Loc, LHS, RHS); // FIXME: The check for C++ here is for GCC compatibility. GCC rejects the // usage of the logical operators && and || with vectors in C. This // check could be notionally dropped. if (!getLangOpts().CPlusPlus && !(isa(vType->getAs()))) return InvalidLogicalVectorOperands(Loc, LHS, RHS); return GetSignedVectorType(LHS.get()->getType()); } inline QualType Sema::CheckBitwiseOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, BinaryOperatorKind Opc) { checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/false); bool IsCompAssign = Opc == BO_AndAssign || Opc == BO_OrAssign || Opc == BO_XorAssign; if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) { if (LHS.get()->getType()->hasIntegerRepresentation() && RHS.get()->getType()->hasIntegerRepresentation()) return CheckVectorOperands(LHS, RHS, Loc, IsCompAssign, /*AllowBothBool*/true, /*AllowBoolConversions*/getLangOpts().ZVector); return InvalidOperands(Loc, LHS, RHS); } if (Opc == BO_And) diagnoseLogicalNotOnLHSofCheck(*this, LHS, RHS, Loc, Opc); ExprResult LHSResult = LHS, RHSResult = RHS; QualType compType = UsualArithmeticConversions(LHSResult, RHSResult, IsCompAssign); if (LHSResult.isInvalid() || RHSResult.isInvalid()) return QualType(); LHS = LHSResult.get(); RHS = RHSResult.get(); if (!compType.isNull() && compType->isIntegralOrUnscopedEnumerationType()) return compType; return InvalidOperands(Loc, LHS, RHS); } // C99 6.5.[13,14] inline QualType Sema::CheckLogicalOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, BinaryOperatorKind Opc) { // Check vector operands differently. if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) return CheckVectorLogicalOperands(LHS, RHS, Loc); // Diagnose cases where the user write a logical and/or but probably meant a // bitwise one. We do this when the LHS is a non-bool integer and the RHS // is a constant. if (LHS.get()->getType()->isIntegerType() && !LHS.get()->getType()->isBooleanType() && RHS.get()->getType()->isIntegerType() && !RHS.get()->isValueDependent() && // Don't warn in macros or template instantiations. !Loc.isMacroID() && !inTemplateInstantiation()) { // If the RHS can be constant folded, and if it constant folds to something // that isn't 0 or 1 (which indicate a potential logical operation that // happened to fold to true/false) then warn. // Parens on the RHS are ignored. llvm::APSInt Result; if (RHS.get()->EvaluateAsInt(Result, Context)) if ((getLangOpts().Bool && !RHS.get()->getType()->isBooleanType() && !RHS.get()->getExprLoc().isMacroID()) || (Result != 0 && Result != 1)) { Diag(Loc, diag::warn_logical_instead_of_bitwise) << RHS.get()->getSourceRange() << (Opc == BO_LAnd ? "&&" : "||"); // Suggest replacing the logical operator with the bitwise version Diag(Loc, diag::note_logical_instead_of_bitwise_change_operator) << (Opc == BO_LAnd ? "&" : "|") << FixItHint::CreateReplacement(SourceRange( Loc, getLocForEndOfToken(Loc)), Opc == BO_LAnd ? "&" : "|"); if (Opc == BO_LAnd) // Suggest replacing "Foo() && kNonZero" with "Foo()" Diag(Loc, diag::note_logical_instead_of_bitwise_remove_constant) << FixItHint::CreateRemoval( SourceRange(getLocForEndOfToken(LHS.get()->getLocEnd()), RHS.get()->getLocEnd())); } } if (!Context.getLangOpts().CPlusPlus) { // OpenCL v1.1 s6.3.g: The logical operators and (&&), or (||) do // not operate on the built-in scalar and vector float types. if (Context.getLangOpts().OpenCL && Context.getLangOpts().OpenCLVersion < 120) { if (LHS.get()->getType()->isFloatingType() || RHS.get()->getType()->isFloatingType()) return InvalidOperands(Loc, LHS, RHS); } LHS = UsualUnaryConversions(LHS.get()); if (LHS.isInvalid()) return QualType(); RHS = UsualUnaryConversions(RHS.get()); if (RHS.isInvalid()) return QualType(); if (!LHS.get()->getType()->isScalarType() || !RHS.get()->getType()->isScalarType()) return InvalidOperands(Loc, LHS, RHS); return Context.IntTy; } // The following is safe because we only use this method for // non-overloadable operands. // C++ [expr.log.and]p1 // C++ [expr.log.or]p1 // The operands are both contextually converted to type bool. ExprResult LHSRes = PerformContextuallyConvertToBool(LHS.get()); if (LHSRes.isInvalid()) return InvalidOperands(Loc, LHS, RHS); LHS = LHSRes; ExprResult RHSRes = PerformContextuallyConvertToBool(RHS.get()); if (RHSRes.isInvalid()) return InvalidOperands(Loc, LHS, RHS); RHS = RHSRes; // C++ [expr.log.and]p2 // C++ [expr.log.or]p2 // The result is a bool. return Context.BoolTy; } static bool IsReadonlyMessage(Expr *E, Sema &S) { const MemberExpr *ME = dyn_cast(E); if (!ME) return false; if (!isa(ME->getMemberDecl())) return false; ObjCMessageExpr *Base = dyn_cast( ME->getBase()->IgnoreImplicit()->IgnoreParenImpCasts()); if (!Base) return false; return Base->getMethodDecl() != nullptr; } /// Is the given expression (which must be 'const') a reference to a /// variable which was originally non-const, but which has become /// 'const' due to being captured within a block? enum NonConstCaptureKind { NCCK_None, NCCK_Block, NCCK_Lambda }; static NonConstCaptureKind isReferenceToNonConstCapture(Sema &S, Expr *E) { assert(E->isLValue() && E->getType().isConstQualified()); E = E->IgnoreParens(); // Must be a reference to a declaration from an enclosing scope. DeclRefExpr *DRE = dyn_cast(E); if (!DRE) return NCCK_None; if (!DRE->refersToEnclosingVariableOrCapture()) return NCCK_None; // The declaration must be a variable which is not declared 'const'. VarDecl *var = dyn_cast(DRE->getDecl()); if (!var) return NCCK_None; if (var->getType().isConstQualified()) return NCCK_None; assert(var->hasLocalStorage() && "capture added 'const' to non-local?"); // Decide whether the first capture was for a block or a lambda. DeclContext *DC = S.CurContext, *Prev = nullptr; // Decide whether the first capture was for a block or a lambda. while (DC) { // For init-capture, it is possible that the variable belongs to the // template pattern of the current context. if (auto *FD = dyn_cast(DC)) if (var->isInitCapture() && FD->getTemplateInstantiationPattern() == var->getDeclContext()) break; if (DC == var->getDeclContext()) break; Prev = DC; DC = DC->getParent(); } // Unless we have an init-capture, we've gone one step too far. if (!var->isInitCapture()) DC = Prev; return (isa(DC) ? NCCK_Block : NCCK_Lambda); } static bool IsTypeModifiable(QualType Ty, bool IsDereference) { Ty = Ty.getNonReferenceType(); if (IsDereference && Ty->isPointerType()) Ty = Ty->getPointeeType(); return !Ty.isConstQualified(); } // Update err_typecheck_assign_const and note_typecheck_assign_const // when this enum is changed. enum { ConstFunction, ConstVariable, ConstMember, ConstMethod, NestedConstMember, ConstUnknown, // Keep as last element }; /// Emit the "read-only variable not assignable" error and print notes to give /// more information about why the variable is not assignable, such as pointing /// to the declaration of a const variable, showing that a method is const, or /// that the function is returning a const reference. static void DiagnoseConstAssignment(Sema &S, const Expr *E, SourceLocation Loc) { SourceRange ExprRange = E->getSourceRange(); // Only emit one error on the first const found. All other consts will emit // a note to the error. bool DiagnosticEmitted = false; // Track if the current expression is the result of a dereference, and if the // next checked expression is the result of a dereference. bool IsDereference = false; bool NextIsDereference = false; // Loop to process MemberExpr chains. while (true) { IsDereference = NextIsDereference; E = E->IgnoreImplicit()->IgnoreParenImpCasts(); if (const MemberExpr *ME = dyn_cast(E)) { NextIsDereference = ME->isArrow(); const ValueDecl *VD = ME->getMemberDecl(); if (const FieldDecl *Field = dyn_cast(VD)) { // Mutable fields can be modified even if the class is const. if (Field->isMutable()) { assert(DiagnosticEmitted && "Expected diagnostic not emitted."); break; } if (!IsTypeModifiable(Field->getType(), IsDereference)) { if (!DiagnosticEmitted) { S.Diag(Loc, diag::err_typecheck_assign_const) << ExprRange << ConstMember << false /*static*/ << Field << Field->getType(); DiagnosticEmitted = true; } S.Diag(VD->getLocation(), diag::note_typecheck_assign_const) << ConstMember << false /*static*/ << Field << Field->getType() << Field->getSourceRange(); } E = ME->getBase(); continue; } else if (const VarDecl *VDecl = dyn_cast(VD)) { if (VDecl->getType().isConstQualified()) { if (!DiagnosticEmitted) { S.Diag(Loc, diag::err_typecheck_assign_const) << ExprRange << ConstMember << true /*static*/ << VDecl << VDecl->getType(); DiagnosticEmitted = true; } S.Diag(VD->getLocation(), diag::note_typecheck_assign_const) << ConstMember << true /*static*/ << VDecl << VDecl->getType() << VDecl->getSourceRange(); } // Static fields do not inherit constness from parents. break; } break; } // End MemberExpr break; } if (const CallExpr *CE = dyn_cast(E)) { // Function calls const FunctionDecl *FD = CE->getDirectCallee(); if (FD && !IsTypeModifiable(FD->getReturnType(), IsDereference)) { if (!DiagnosticEmitted) { S.Diag(Loc, diag::err_typecheck_assign_const) << ExprRange << ConstFunction << FD; DiagnosticEmitted = true; } S.Diag(FD->getReturnTypeSourceRange().getBegin(), diag::note_typecheck_assign_const) << ConstFunction << FD << FD->getReturnType() << FD->getReturnTypeSourceRange(); } } else if (const DeclRefExpr *DRE = dyn_cast(E)) { // Point to variable declaration. if (const ValueDecl *VD = DRE->getDecl()) { if (!IsTypeModifiable(VD->getType(), IsDereference)) { if (!DiagnosticEmitted) { S.Diag(Loc, diag::err_typecheck_assign_const) << ExprRange << ConstVariable << VD << VD->getType(); DiagnosticEmitted = true; } S.Diag(VD->getLocation(), diag::note_typecheck_assign_const) << ConstVariable << VD << VD->getType() << VD->getSourceRange(); } } } else if (isa(E)) { if (const DeclContext *DC = S.getFunctionLevelDeclContext()) { if (const CXXMethodDecl *MD = dyn_cast(DC)) { if (MD->isConst()) { if (!DiagnosticEmitted) { S.Diag(Loc, diag::err_typecheck_assign_const) << ExprRange << ConstMethod << MD; DiagnosticEmitted = true; } S.Diag(MD->getLocation(), diag::note_typecheck_assign_const) << ConstMethod << MD << MD->getSourceRange(); } } } } if (DiagnosticEmitted) return; // Can't determine a more specific message, so display the generic error. S.Diag(Loc, diag::err_typecheck_assign_const) << ExprRange << ConstUnknown; } enum OriginalExprKind { OEK_Variable, OEK_Member, OEK_LValue }; static void DiagnoseRecursiveConstFields(Sema &S, const ValueDecl *VD, const RecordType *Ty, SourceLocation Loc, SourceRange Range, OriginalExprKind OEK, bool &DiagnosticEmitted, bool IsNested = false) { // We walk the record hierarchy breadth-first to ensure that we print // diagnostics in field nesting order. // First, check every field for constness. for (const FieldDecl *Field : Ty->getDecl()->fields()) { if (Field->getType().isConstQualified()) { if (!DiagnosticEmitted) { S.Diag(Loc, diag::err_typecheck_assign_const) << Range << NestedConstMember << OEK << VD << IsNested << Field; DiagnosticEmitted = true; } S.Diag(Field->getLocation(), diag::note_typecheck_assign_const) << NestedConstMember << IsNested << Field << Field->getType() << Field->getSourceRange(); } } // Then, recurse. for (const FieldDecl *Field : Ty->getDecl()->fields()) { QualType FTy = Field->getType(); if (const RecordType *FieldRecTy = FTy->getAs()) DiagnoseRecursiveConstFields(S, VD, FieldRecTy, Loc, Range, OEK, DiagnosticEmitted, true); } } /// Emit an error for the case where a record we are trying to assign to has a /// const-qualified field somewhere in its hierarchy. static void DiagnoseRecursiveConstFields(Sema &S, const Expr *E, SourceLocation Loc) { QualType Ty = E->getType(); assert(Ty->isRecordType() && "lvalue was not record?"); SourceRange Range = E->getSourceRange(); const RecordType *RTy = Ty.getCanonicalType()->getAs(); bool DiagEmitted = false; if (const MemberExpr *ME = dyn_cast(E)) DiagnoseRecursiveConstFields(S, ME->getMemberDecl(), RTy, Loc, Range, OEK_Member, DiagEmitted); else if (const DeclRefExpr *DRE = dyn_cast(E)) DiagnoseRecursiveConstFields(S, DRE->getDecl(), RTy, Loc, Range, OEK_Variable, DiagEmitted); else DiagnoseRecursiveConstFields(S, nullptr, RTy, Loc, Range, OEK_LValue, DiagEmitted); if (!DiagEmitted) DiagnoseConstAssignment(S, E, Loc); } /// CheckForModifiableLvalue - Verify that E is a modifiable lvalue. If not, /// emit an error and return true. If so, return false. static bool CheckForModifiableLvalue(Expr *E, SourceLocation Loc, Sema &S) { assert(!E->hasPlaceholderType(BuiltinType::PseudoObject)); S.CheckShadowingDeclModification(E, Loc); SourceLocation OrigLoc = Loc; Expr::isModifiableLvalueResult IsLV = E->isModifiableLvalue(S.Context, &Loc); if (IsLV == Expr::MLV_ClassTemporary && IsReadonlyMessage(E, S)) IsLV = Expr::MLV_InvalidMessageExpression; if (IsLV == Expr::MLV_Valid) return false; unsigned DiagID = 0; bool NeedType = false; switch (IsLV) { // C99 6.5.16p2 case Expr::MLV_ConstQualified: // Use a specialized diagnostic when we're assigning to an object // from an enclosing function or block. if (NonConstCaptureKind NCCK = isReferenceToNonConstCapture(S, E)) { if (NCCK == NCCK_Block) DiagID = diag::err_block_decl_ref_not_modifiable_lvalue; else DiagID = diag::err_lambda_decl_ref_not_modifiable_lvalue; break; } // In ARC, use some specialized diagnostics for occasions where we // infer 'const'. These are always pseudo-strong variables. if (S.getLangOpts().ObjCAutoRefCount) { DeclRefExpr *declRef = dyn_cast(E->IgnoreParenCasts()); if (declRef && isa(declRef->getDecl())) { VarDecl *var = cast(declRef->getDecl()); // Use the normal diagnostic if it's pseudo-__strong but the // user actually wrote 'const'. if (var->isARCPseudoStrong() && (!var->getTypeSourceInfo() || !var->getTypeSourceInfo()->getType().isConstQualified())) { // There are two pseudo-strong cases: // - self ObjCMethodDecl *method = S.getCurMethodDecl(); if (method && var == method->getSelfDecl()) DiagID = method->isClassMethod() ? diag::err_typecheck_arc_assign_self_class_method : diag::err_typecheck_arc_assign_self; // - fast enumeration variables else DiagID = diag::err_typecheck_arr_assign_enumeration; SourceRange Assign; if (Loc != OrigLoc) Assign = SourceRange(OrigLoc, OrigLoc); S.Diag(Loc, DiagID) << E->getSourceRange() << Assign; // We need to preserve the AST regardless, so migration tool // can do its job. return false; } } } // If none of the special cases above are triggered, then this is a // simple const assignment. if (DiagID == 0) { DiagnoseConstAssignment(S, E, Loc); return true; } break; case Expr::MLV_ConstAddrSpace: DiagnoseConstAssignment(S, E, Loc); return true; case Expr::MLV_ConstQualifiedField: DiagnoseRecursiveConstFields(S, E, Loc); return true; case Expr::MLV_ArrayType: case Expr::MLV_ArrayTemporary: DiagID = diag::err_typecheck_array_not_modifiable_lvalue; NeedType = true; break; case Expr::MLV_NotObjectType: DiagID = diag::err_typecheck_non_object_not_modifiable_lvalue; NeedType = true; break; case Expr::MLV_LValueCast: DiagID = diag::err_typecheck_lvalue_casts_not_supported; break; case Expr::MLV_Valid: llvm_unreachable("did not take early return for MLV_Valid"); case Expr::MLV_InvalidExpression: case Expr::MLV_MemberFunction: case Expr::MLV_ClassTemporary: DiagID = diag::err_typecheck_expression_not_modifiable_lvalue; break; case Expr::MLV_IncompleteType: case Expr::MLV_IncompleteVoidType: return S.RequireCompleteType(Loc, E->getType(), diag::err_typecheck_incomplete_type_not_modifiable_lvalue, E); case Expr::MLV_DuplicateVectorComponents: DiagID = diag::err_typecheck_duplicate_vector_components_not_mlvalue; break; case Expr::MLV_NoSetterProperty: llvm_unreachable("readonly properties should be processed differently"); case Expr::MLV_InvalidMessageExpression: DiagID = diag::err_readonly_message_assignment; break; case Expr::MLV_SubObjCPropertySetting: DiagID = diag::err_no_subobject_property_setting; break; } SourceRange Assign; if (Loc != OrigLoc) Assign = SourceRange(OrigLoc, OrigLoc); if (NeedType) S.Diag(Loc, DiagID) << E->getType() << E->getSourceRange() << Assign; else S.Diag(Loc, DiagID) << E->getSourceRange() << Assign; return true; } static void CheckIdentityFieldAssignment(Expr *LHSExpr, Expr *RHSExpr, SourceLocation Loc, Sema &Sema) { // C / C++ fields MemberExpr *ML = dyn_cast(LHSExpr); MemberExpr *MR = dyn_cast(RHSExpr); if (ML && MR && ML->getMemberDecl() == MR->getMemberDecl()) { if (isa(ML->getBase()) && isa(MR->getBase())) Sema.Diag(Loc, diag::warn_identity_field_assign) << 0; } // Objective-C instance variables ObjCIvarRefExpr *OL = dyn_cast(LHSExpr); ObjCIvarRefExpr *OR = dyn_cast(RHSExpr); if (OL && OR && OL->getDecl() == OR->getDecl()) { DeclRefExpr *RL = dyn_cast(OL->getBase()->IgnoreImpCasts()); DeclRefExpr *RR = dyn_cast(OR->getBase()->IgnoreImpCasts()); if (RL && RR && RL->getDecl() == RR->getDecl()) Sema.Diag(Loc, diag::warn_identity_field_assign) << 1; } } // C99 6.5.16.1 QualType Sema::CheckAssignmentOperands(Expr *LHSExpr, ExprResult &RHS, SourceLocation Loc, QualType CompoundType) { assert(!LHSExpr->hasPlaceholderType(BuiltinType::PseudoObject)); // Verify that LHS is a modifiable lvalue, and emit error if not. if (CheckForModifiableLvalue(LHSExpr, Loc, *this)) return QualType(); QualType LHSType = LHSExpr->getType(); QualType RHSType = CompoundType.isNull() ? RHS.get()->getType() : CompoundType; // OpenCL v1.2 s6.1.1.1 p2: // The half data type can only be used to declare a pointer to a buffer that // contains half values if (getLangOpts().OpenCL && !getOpenCLOptions().isEnabled("cl_khr_fp16") && LHSType->isHalfType()) { Diag(Loc, diag::err_opencl_half_load_store) << 1 << LHSType.getUnqualifiedType(); return QualType(); } AssignConvertType ConvTy; if (CompoundType.isNull()) { Expr *RHSCheck = RHS.get(); CheckIdentityFieldAssignment(LHSExpr, RHSCheck, Loc, *this); QualType LHSTy(LHSType); ConvTy = CheckSingleAssignmentConstraints(LHSTy, RHS); if (RHS.isInvalid()) return QualType(); // Special case of NSObject attributes on c-style pointer types. if (ConvTy == IncompatiblePointer && ((Context.isObjCNSObjectType(LHSType) && RHSType->isObjCObjectPointerType()) || (Context.isObjCNSObjectType(RHSType) && LHSType->isObjCObjectPointerType()))) ConvTy = Compatible; if (ConvTy == Compatible && LHSType->isObjCObjectType()) Diag(Loc, diag::err_objc_object_assignment) << LHSType; // If the RHS is a unary plus or minus, check to see if they = and + are // right next to each other. If so, the user may have typo'd "x =+ 4" // instead of "x += 4". if (ImplicitCastExpr *ICE = dyn_cast(RHSCheck)) RHSCheck = ICE->getSubExpr(); if (UnaryOperator *UO = dyn_cast(RHSCheck)) { if ((UO->getOpcode() == UO_Plus || UO->getOpcode() == UO_Minus) && Loc.isFileID() && UO->getOperatorLoc().isFileID() && // Only if the two operators are exactly adjacent. Loc.getLocWithOffset(1) == UO->getOperatorLoc() && // And there is a space or other character before the subexpr of the // unary +/-. We don't want to warn on "x=-1". Loc.getLocWithOffset(2) != UO->getSubExpr()->getLocStart() && UO->getSubExpr()->getLocStart().isFileID()) { Diag(Loc, diag::warn_not_compound_assign) << (UO->getOpcode() == UO_Plus ? "+" : "-") << SourceRange(UO->getOperatorLoc(), UO->getOperatorLoc()); } } if (ConvTy == Compatible) { if (LHSType.getObjCLifetime() == Qualifiers::OCL_Strong) { // Warn about retain cycles where a block captures the LHS, but // not if the LHS is a simple variable into which the block is // being stored...unless that variable can be captured by reference! const Expr *InnerLHS = LHSExpr->IgnoreParenCasts(); const DeclRefExpr *DRE = dyn_cast(InnerLHS); if (!DRE || DRE->getDecl()->hasAttr()) checkRetainCycles(LHSExpr, RHS.get()); } if (LHSType.getObjCLifetime() == Qualifiers::OCL_Strong || LHSType.isNonWeakInMRRWithObjCWeak(Context)) { // It is safe to assign a weak reference into a strong variable. // Although this code can still have problems: // id x = self.weakProp; // id y = self.weakProp; // we do not warn to warn spuriously when 'x' and 'y' are on separate // paths through the function. This should be revisited if // -Wrepeated-use-of-weak is made flow-sensitive. // For ObjCWeak only, we do not warn if the assign is to a non-weak // variable, which will be valid for the current autorelease scope. if (!Diags.isIgnored(diag::warn_arc_repeated_use_of_weak, RHS.get()->getLocStart())) getCurFunction()->markSafeWeakUse(RHS.get()); } else if (getLangOpts().ObjCAutoRefCount || getLangOpts().ObjCWeak) { checkUnsafeExprAssigns(Loc, LHSExpr, RHS.get()); } } } else { // Compound assignment "x += y" ConvTy = CheckAssignmentConstraints(Loc, LHSType, RHSType); } if (DiagnoseAssignmentResult(ConvTy, Loc, LHSType, RHSType, RHS.get(), AA_Assigning)) return QualType(); CheckForNullPointerDereference(*this, LHSExpr); // C99 6.5.16p3: The type of an assignment expression is the type of the // left operand unless the left operand has qualified type, in which case // it is the unqualified version of the type of the left operand. // C99 6.5.16.1p2: In simple assignment, the value of the right operand // is converted to the type of the assignment expression (above). // C++ 5.17p1: the type of the assignment expression is that of its left // operand. return (getLangOpts().CPlusPlus ? LHSType : LHSType.getUnqualifiedType()); } // Only ignore explicit casts to void. static bool IgnoreCommaOperand(const Expr *E) { E = E->IgnoreParens(); if (const CastExpr *CE = dyn_cast(E)) { if (CE->getCastKind() == CK_ToVoid) { return true; } } return false; } // Look for instances where it is likely the comma operator is confused with // another operator. There is a whitelist of acceptable expressions for the // left hand side of the comma operator, otherwise emit a warning. void Sema::DiagnoseCommaOperator(const Expr *LHS, SourceLocation Loc) { // No warnings in macros if (Loc.isMacroID()) return; // Don't warn in template instantiations. if (inTemplateInstantiation()) return; // Scope isn't fine-grained enough to whitelist the specific cases, so // instead, skip more than needed, then call back into here with the // CommaVisitor in SemaStmt.cpp. // The whitelisted locations are the initialization and increment portions // of a for loop. The additional checks are on the condition of // if statements, do/while loops, and for loops. const unsigned ForIncrementFlags = Scope::ControlScope | Scope::ContinueScope | Scope::BreakScope; const unsigned ForInitFlags = Scope::ControlScope | Scope::DeclScope; const unsigned ScopeFlags = getCurScope()->getFlags(); if ((ScopeFlags & ForIncrementFlags) == ForIncrementFlags || (ScopeFlags & ForInitFlags) == ForInitFlags) return; // If there are multiple comma operators used together, get the RHS of the // of the comma operator as the LHS. while (const BinaryOperator *BO = dyn_cast(LHS)) { if (BO->getOpcode() != BO_Comma) break; LHS = BO->getRHS(); } // Only allow some expressions on LHS to not warn. if (IgnoreCommaOperand(LHS)) return; Diag(Loc, diag::warn_comma_operator); Diag(LHS->getLocStart(), diag::note_cast_to_void) << LHS->getSourceRange() << FixItHint::CreateInsertion(LHS->getLocStart(), LangOpts.CPlusPlus ? "static_cast(" : "(void)(") << FixItHint::CreateInsertion(PP.getLocForEndOfToken(LHS->getLocEnd()), ")"); } // C99 6.5.17 static QualType CheckCommaOperands(Sema &S, ExprResult &LHS, ExprResult &RHS, SourceLocation Loc) { LHS = S.CheckPlaceholderExpr(LHS.get()); RHS = S.CheckPlaceholderExpr(RHS.get()); if (LHS.isInvalid() || RHS.isInvalid()) return QualType(); // C's comma performs lvalue conversion (C99 6.3.2.1) on both its // operands, but not unary promotions. // C++'s comma does not do any conversions at all (C++ [expr.comma]p1). // So we treat the LHS as a ignored value, and in C++ we allow the // containing site to determine what should be done with the RHS. LHS = S.IgnoredValueConversions(LHS.get()); if (LHS.isInvalid()) return QualType(); S.DiagnoseUnusedExprResult(LHS.get()); if (!S.getLangOpts().CPlusPlus) { RHS = S.DefaultFunctionArrayLvalueConversion(RHS.get()); if (RHS.isInvalid()) return QualType(); if (!RHS.get()->getType()->isVoidType()) S.RequireCompleteType(Loc, RHS.get()->getType(), diag::err_incomplete_type); } if (!S.getDiagnostics().isIgnored(diag::warn_comma_operator, Loc)) S.DiagnoseCommaOperator(LHS.get(), Loc); return RHS.get()->getType(); } /// CheckIncrementDecrementOperand - unlike most "Check" methods, this routine /// doesn't need to call UsualUnaryConversions or UsualArithmeticConversions. static QualType CheckIncrementDecrementOperand(Sema &S, Expr *Op, ExprValueKind &VK, ExprObjectKind &OK, SourceLocation OpLoc, bool IsInc, bool IsPrefix) { if (Op->isTypeDependent()) return S.Context.DependentTy; QualType ResType = Op->getType(); // Atomic types can be used for increment / decrement where the non-atomic // versions can, so ignore the _Atomic() specifier for the purpose of // checking. if (const AtomicType *ResAtomicType = ResType->getAs()) ResType = ResAtomicType->getValueType(); assert(!ResType.isNull() && "no type for increment/decrement expression"); if (S.getLangOpts().CPlusPlus && ResType->isBooleanType()) { // Decrement of bool is not allowed. if (!IsInc) { S.Diag(OpLoc, diag::err_decrement_bool) << Op->getSourceRange(); return QualType(); } // Increment of bool sets it to true, but is deprecated. S.Diag(OpLoc, S.getLangOpts().CPlusPlus17 ? diag::ext_increment_bool : diag::warn_increment_bool) << Op->getSourceRange(); } else if (S.getLangOpts().CPlusPlus && ResType->isEnumeralType()) { // Error on enum increments and decrements in C++ mode S.Diag(OpLoc, diag::err_increment_decrement_enum) << IsInc << ResType; return QualType(); } else if (ResType->isRealType()) { // OK! } else if (ResType->isPointerType()) { // C99 6.5.2.4p2, 6.5.6p2 if (!checkArithmeticOpPointerOperand(S, OpLoc, Op)) return QualType(); } else if (ResType->isObjCObjectPointerType()) { // On modern runtimes, ObjC pointer arithmetic is forbidden. // Otherwise, we just need a complete type. if (checkArithmeticIncompletePointerType(S, OpLoc, Op) || checkArithmeticOnObjCPointer(S, OpLoc, Op)) return QualType(); } else if (ResType->isAnyComplexType()) { // C99 does not support ++/-- on complex types, we allow as an extension. S.Diag(OpLoc, diag::ext_integer_increment_complex) << ResType << Op->getSourceRange(); } else if (ResType->isPlaceholderType()) { ExprResult PR = S.CheckPlaceholderExpr(Op); if (PR.isInvalid()) return QualType(); return CheckIncrementDecrementOperand(S, PR.get(), VK, OK, OpLoc, IsInc, IsPrefix); } else if (S.getLangOpts().AltiVec && ResType->isVectorType()) { // OK! ( C/C++ Language Extensions for CBEA(Version 2.6) 10.3 ) } else if (S.getLangOpts().ZVector && ResType->isVectorType() && (ResType->getAs()->getVectorKind() != VectorType::AltiVecBool)) { // The z vector extensions allow ++ and -- for non-bool vectors. } else if(S.getLangOpts().OpenCL && ResType->isVectorType() && ResType->getAs()->getElementType()->isIntegerType()) { // OpenCL V1.2 6.3 says dec/inc ops operate on integer vector types. } else { S.Diag(OpLoc, diag::err_typecheck_illegal_increment_decrement) << ResType << int(IsInc) << Op->getSourceRange(); return QualType(); } // At this point, we know we have a real, complex or pointer type. // Now make sure the operand is a modifiable lvalue. if (CheckForModifiableLvalue(Op, OpLoc, S)) return QualType(); // In C++, a prefix increment is the same type as the operand. Otherwise // (in C or with postfix), the increment is the unqualified type of the // operand. if (IsPrefix && S.getLangOpts().CPlusPlus) { VK = VK_LValue; OK = Op->getObjectKind(); return ResType; } else { VK = VK_RValue; return ResType.getUnqualifiedType(); } } /// getPrimaryDecl - Helper function for CheckAddressOfOperand(). /// This routine allows us to typecheck complex/recursive expressions /// where the declaration is needed for type checking. We only need to /// handle cases when the expression references a function designator /// or is an lvalue. Here are some examples: /// - &(x) => x /// - &*****f => f for f a function designator. /// - &s.xx => s /// - &s.zz[1].yy -> s, if zz is an array /// - *(x + 1) -> x, if x is an array /// - &"123"[2] -> 0 /// - & __real__ x -> x static ValueDecl *getPrimaryDecl(Expr *E) { switch (E->getStmtClass()) { case Stmt::DeclRefExprClass: return cast(E)->getDecl(); case Stmt::MemberExprClass: // If this is an arrow operator, the address is an offset from // the base's value, so the object the base refers to is // irrelevant. if (cast(E)->isArrow()) return nullptr; // Otherwise, the expression refers to a part of the base return getPrimaryDecl(cast(E)->getBase()); case Stmt::ArraySubscriptExprClass: { // FIXME: This code shouldn't be necessary! We should catch the implicit // promotion of register arrays earlier. Expr* Base = cast(E)->getBase(); if (ImplicitCastExpr* ICE = dyn_cast(Base)) { if (ICE->getSubExpr()->getType()->isArrayType()) return getPrimaryDecl(ICE->getSubExpr()); } return nullptr; } case Stmt::UnaryOperatorClass: { UnaryOperator *UO = cast(E); switch(UO->getOpcode()) { case UO_Real: case UO_Imag: case UO_Extension: return getPrimaryDecl(UO->getSubExpr()); default: return nullptr; } } case Stmt::ParenExprClass: return getPrimaryDecl(cast(E)->getSubExpr()); case Stmt::ImplicitCastExprClass: // If the result of an implicit cast is an l-value, we care about // the sub-expression; otherwise, the result here doesn't matter. return getPrimaryDecl(cast(E)->getSubExpr()); default: return nullptr; } } namespace { enum { AO_Bit_Field = 0, AO_Vector_Element = 1, AO_Property_Expansion = 2, AO_Register_Variable = 3, AO_No_Error = 4 }; } /// \brief Diagnose invalid operand for address of operations. /// /// \param Type The type of operand which cannot have its address taken. static void diagnoseAddressOfInvalidType(Sema &S, SourceLocation Loc, Expr *E, unsigned Type) { S.Diag(Loc, diag::err_typecheck_address_of) << Type << E->getSourceRange(); } /// CheckAddressOfOperand - The operand of & must be either a function /// designator or an lvalue designating an object. If it is an lvalue, the /// object cannot be declared with storage class register or be a bit field. /// Note: The usual conversions are *not* applied to the operand of the & /// operator (C99 6.3.2.1p[2-4]), and its result is never an lvalue. /// In C++, the operand might be an overloaded function name, in which case /// we allow the '&' but retain the overloaded-function type. QualType Sema::CheckAddressOfOperand(ExprResult &OrigOp, SourceLocation OpLoc) { if (const BuiltinType *PTy = OrigOp.get()->getType()->getAsPlaceholderType()){ if (PTy->getKind() == BuiltinType::Overload) { Expr *E = OrigOp.get()->IgnoreParens(); if (!isa(E)) { assert(cast(E)->getOpcode() == UO_AddrOf); Diag(OpLoc, diag::err_typecheck_invalid_lvalue_addrof_addrof_function) << OrigOp.get()->getSourceRange(); return QualType(); } OverloadExpr *Ovl = cast(E); if (isa(Ovl)) if (!ResolveSingleFunctionTemplateSpecialization(Ovl)) { Diag(OpLoc, diag::err_invalid_form_pointer_member_function) << OrigOp.get()->getSourceRange(); return QualType(); } return Context.OverloadTy; } if (PTy->getKind() == BuiltinType::UnknownAny) return Context.UnknownAnyTy; if (PTy->getKind() == BuiltinType::BoundMember) { Diag(OpLoc, diag::err_invalid_form_pointer_member_function) << OrigOp.get()->getSourceRange(); return QualType(); } OrigOp = CheckPlaceholderExpr(OrigOp.get()); if (OrigOp.isInvalid()) return QualType(); } if (OrigOp.get()->isTypeDependent()) return Context.DependentTy; assert(!OrigOp.get()->getType()->isPlaceholderType()); // Make sure to ignore parentheses in subsequent checks Expr *op = OrigOp.get()->IgnoreParens(); // In OpenCL captures for blocks called as lambda functions // are located in the private address space. Blocks used in // enqueue_kernel can be located in a different address space // depending on a vendor implementation. Thus preventing // taking an address of the capture to avoid invalid AS casts. if (LangOpts.OpenCL) { auto* VarRef = dyn_cast(op); if (VarRef && VarRef->refersToEnclosingVariableOrCapture()) { Diag(op->getExprLoc(), diag::err_opencl_taking_address_capture); return QualType(); } } if (getLangOpts().C99) { // Implement C99-only parts of addressof rules. if (UnaryOperator* uOp = dyn_cast(op)) { if (uOp->getOpcode() == UO_Deref) // Per C99 6.5.3.2, the address of a deref always returns a valid result // (assuming the deref expression is valid). return uOp->getSubExpr()->getType(); } // Technically, there should be a check for array subscript // expressions here, but the result of one is always an lvalue anyway. } ValueDecl *dcl = getPrimaryDecl(op); if (auto *FD = dyn_cast_or_null(dcl)) if (!checkAddressOfFunctionIsAvailable(FD, /*Complain=*/true, op->getLocStart())) return QualType(); Expr::LValueClassification lval = op->ClassifyLValue(Context); unsigned AddressOfError = AO_No_Error; if (lval == Expr::LV_ClassTemporary || lval == Expr::LV_ArrayTemporary) { bool sfinae = (bool)isSFINAEContext(); Diag(OpLoc, isSFINAEContext() ? diag::err_typecheck_addrof_temporary : diag::ext_typecheck_addrof_temporary) << op->getType() << op->getSourceRange(); if (sfinae) return QualType(); // Materialize the temporary as an lvalue so that we can take its address. OrigOp = op = CreateMaterializeTemporaryExpr(op->getType(), OrigOp.get(), true); } else if (isa(op)) { return Context.getPointerType(op->getType()); } else if (lval == Expr::LV_MemberFunction) { // If it's an instance method, make a member pointer. // The expression must have exactly the form &A::foo. // If the underlying expression isn't a decl ref, give up. if (!isa(op)) { Diag(OpLoc, diag::err_invalid_form_pointer_member_function) << OrigOp.get()->getSourceRange(); return QualType(); } DeclRefExpr *DRE = cast(op); CXXMethodDecl *MD = cast(DRE->getDecl()); // The id-expression was parenthesized. if (OrigOp.get() != DRE) { Diag(OpLoc, diag::err_parens_pointer_member_function) << OrigOp.get()->getSourceRange(); // The method was named without a qualifier. } else if (!DRE->getQualifier()) { if (MD->getParent()->getName().empty()) Diag(OpLoc, diag::err_unqualified_pointer_member_function) << op->getSourceRange(); else { SmallString<32> Str; StringRef Qual = (MD->getParent()->getName() + "::").toStringRef(Str); Diag(OpLoc, diag::err_unqualified_pointer_member_function) << op->getSourceRange() << FixItHint::CreateInsertion(op->getSourceRange().getBegin(), Qual); } } // Taking the address of a dtor is illegal per C++ [class.dtor]p2. if (isa(MD)) Diag(OpLoc, diag::err_typecheck_addrof_dtor) << op->getSourceRange(); QualType MPTy = Context.getMemberPointerType( op->getType(), Context.getTypeDeclType(MD->getParent()).getTypePtr()); // Under the MS ABI, lock down the inheritance model now. if (Context.getTargetInfo().getCXXABI().isMicrosoft()) (void)isCompleteType(OpLoc, MPTy); return MPTy; } else if (lval != Expr::LV_Valid && lval != Expr::LV_IncompleteVoidType) { // C99 6.5.3.2p1 // The operand must be either an l-value or a function designator if (!op->getType()->isFunctionType()) { // Use a special diagnostic for loads from property references. if (isa(op)) { AddressOfError = AO_Property_Expansion; } else { Diag(OpLoc, diag::err_typecheck_invalid_lvalue_addrof) << op->getType() << op->getSourceRange(); return QualType(); } } } else if (op->getObjectKind() == OK_BitField) { // C99 6.5.3.2p1 // The operand cannot be a bit-field AddressOfError = AO_Bit_Field; } else if (op->getObjectKind() == OK_VectorComponent) { // The operand cannot be an element of a vector AddressOfError = AO_Vector_Element; } else if (dcl) { // C99 6.5.3.2p1 // We have an lvalue with a decl. Make sure the decl is not declared // with the register storage-class specifier. if (const VarDecl *vd = dyn_cast(dcl)) { // in C++ it is not error to take address of a register // variable (c++03 7.1.1P3) if (vd->getStorageClass() == SC_Register && !getLangOpts().CPlusPlus) { AddressOfError = AO_Register_Variable; } } else if (isa(dcl)) { AddressOfError = AO_Property_Expansion; } else if (isa(dcl)) { return Context.OverloadTy; } else if (isa(dcl) || isa(dcl)) { // Okay: we can take the address of a field. // Could be a pointer to member, though, if there is an explicit // scope qualifier for the class. if (isa(op) && cast(op)->getQualifier()) { DeclContext *Ctx = dcl->getDeclContext(); if (Ctx && Ctx->isRecord()) { if (dcl->getType()->isReferenceType()) { Diag(OpLoc, diag::err_cannot_form_pointer_to_member_of_reference_type) << dcl->getDeclName() << dcl->getType(); return QualType(); } while (cast(Ctx)->isAnonymousStructOrUnion()) Ctx = Ctx->getParent(); QualType MPTy = Context.getMemberPointerType( op->getType(), Context.getTypeDeclType(cast(Ctx)).getTypePtr()); // Under the MS ABI, lock down the inheritance model now. if (Context.getTargetInfo().getCXXABI().isMicrosoft()) (void)isCompleteType(OpLoc, MPTy); return MPTy; } } } else if (!isa(dcl) && !isa(dcl) && !isa(dcl)) llvm_unreachable("Unknown/unexpected decl type"); } if (AddressOfError != AO_No_Error) { diagnoseAddressOfInvalidType(*this, OpLoc, op, AddressOfError); return QualType(); } if (lval == Expr::LV_IncompleteVoidType) { // Taking the address of a void variable is technically illegal, but we // allow it in cases which are otherwise valid. // Example: "extern void x; void* y = &x;". Diag(OpLoc, diag::ext_typecheck_addrof_void) << op->getSourceRange(); } // If the operand has type "type", the result has type "pointer to type". if (op->getType()->isObjCObjectType()) return Context.getObjCObjectPointerType(op->getType()); CheckAddressOfPackedMember(op); return Context.getPointerType(op->getType()); } static void RecordModifiableNonNullParam(Sema &S, const Expr *Exp) { const DeclRefExpr *DRE = dyn_cast(Exp); if (!DRE) return; const Decl *D = DRE->getDecl(); if (!D) return; const ParmVarDecl *Param = dyn_cast(D); if (!Param) return; if (const FunctionDecl* FD = dyn_cast(Param->getDeclContext())) if (!FD->hasAttr() && !Param->hasAttr()) return; if (FunctionScopeInfo *FD = S.getCurFunction()) if (!FD->ModifiedNonNullParams.count(Param)) FD->ModifiedNonNullParams.insert(Param); } /// CheckIndirectionOperand - Type check unary indirection (prefix '*'). static QualType CheckIndirectionOperand(Sema &S, Expr *Op, ExprValueKind &VK, SourceLocation OpLoc) { if (Op->isTypeDependent()) return S.Context.DependentTy; ExprResult ConvResult = S.UsualUnaryConversions(Op); if (ConvResult.isInvalid()) return QualType(); Op = ConvResult.get(); QualType OpTy = Op->getType(); QualType Result; if (isa(Op)) { QualType OpOrigType = Op->IgnoreParenCasts()->getType(); S.CheckCompatibleReinterpretCast(OpOrigType, OpTy, /*IsDereference*/true, Op->getSourceRange()); } if (const PointerType *PT = OpTy->getAs()) { Result = PT->getPointeeType(); } else if (const ObjCObjectPointerType *OPT = OpTy->getAs()) Result = OPT->getPointeeType(); else { ExprResult PR = S.CheckPlaceholderExpr(Op); if (PR.isInvalid()) return QualType(); if (PR.get() != Op) return CheckIndirectionOperand(S, PR.get(), VK, OpLoc); } if (Result.isNull()) { S.Diag(OpLoc, diag::err_typecheck_indirection_requires_pointer) << OpTy << Op->getSourceRange(); return QualType(); } // Note that per both C89 and C99, indirection is always legal, even if Result // is an incomplete type or void. It would be possible to warn about // dereferencing a void pointer, but it's completely well-defined, and such a // warning is unlikely to catch any mistakes. In C++, indirection is not valid // for pointers to 'void' but is fine for any other pointer type: // // C++ [expr.unary.op]p1: // [...] the expression to which [the unary * operator] is applied shall // be a pointer to an object type, or a pointer to a function type if (S.getLangOpts().CPlusPlus && Result->isVoidType()) S.Diag(OpLoc, diag::ext_typecheck_indirection_through_void_pointer) << OpTy << Op->getSourceRange(); // Dereferences are usually l-values... VK = VK_LValue; // ...except that certain expressions are never l-values in C. if (!S.getLangOpts().CPlusPlus && Result.isCForbiddenLValueType()) VK = VK_RValue; return Result; } BinaryOperatorKind Sema::ConvertTokenKindToBinaryOpcode(tok::TokenKind Kind) { BinaryOperatorKind Opc; switch (Kind) { default: llvm_unreachable("Unknown binop!"); case tok::periodstar: Opc = BO_PtrMemD; break; case tok::arrowstar: Opc = BO_PtrMemI; break; case tok::star: Opc = BO_Mul; break; case tok::slash: Opc = BO_Div; break; case tok::percent: Opc = BO_Rem; break; case tok::plus: Opc = BO_Add; break; case tok::minus: Opc = BO_Sub; break; case tok::lessless: Opc = BO_Shl; break; case tok::greatergreater: Opc = BO_Shr; break; case tok::lessequal: Opc = BO_LE; break; case tok::less: Opc = BO_LT; break; case tok::greaterequal: Opc = BO_GE; break; case tok::greater: Opc = BO_GT; break; case tok::exclaimequal: Opc = BO_NE; break; case tok::equalequal: Opc = BO_EQ; break; case tok::spaceship: Opc = BO_Cmp; break; case tok::amp: Opc = BO_And; break; case tok::caret: Opc = BO_Xor; break; case tok::pipe: Opc = BO_Or; break; case tok::ampamp: Opc = BO_LAnd; break; case tok::pipepipe: Opc = BO_LOr; break; case tok::equal: Opc = BO_Assign; break; case tok::starequal: Opc = BO_MulAssign; break; case tok::slashequal: Opc = BO_DivAssign; break; case tok::percentequal: Opc = BO_RemAssign; break; case tok::plusequal: Opc = BO_AddAssign; break; case tok::minusequal: Opc = BO_SubAssign; break; case tok::lesslessequal: Opc = BO_ShlAssign; break; case tok::greatergreaterequal: Opc = BO_ShrAssign; break; case tok::ampequal: Opc = BO_AndAssign; break; case tok::caretequal: Opc = BO_XorAssign; break; case tok::pipeequal: Opc = BO_OrAssign; break; case tok::comma: Opc = BO_Comma; break; } return Opc; } static inline UnaryOperatorKind ConvertTokenKindToUnaryOpcode( tok::TokenKind Kind) { UnaryOperatorKind Opc; switch (Kind) { default: llvm_unreachable("Unknown unary op!"); case tok::plusplus: Opc = UO_PreInc; break; case tok::minusminus: Opc = UO_PreDec; break; case tok::amp: Opc = UO_AddrOf; break; case tok::star: Opc = UO_Deref; break; case tok::plus: Opc = UO_Plus; break; case tok::minus: Opc = UO_Minus; break; case tok::tilde: Opc = UO_Not; break; case tok::exclaim: Opc = UO_LNot; break; case tok::kw___real: Opc = UO_Real; break; case tok::kw___imag: Opc = UO_Imag; break; case tok::kw___extension__: Opc = UO_Extension; break; } return Opc; } /// DiagnoseSelfAssignment - Emits a warning if a value is assigned to itself. /// This warning is only emitted for builtin assignment operations. It is also /// suppressed in the event of macro expansions. static void DiagnoseSelfAssignment(Sema &S, Expr *LHSExpr, Expr *RHSExpr, SourceLocation OpLoc) { if (S.inTemplateInstantiation()) return; if (OpLoc.isInvalid() || OpLoc.isMacroID()) return; LHSExpr = LHSExpr->IgnoreParenImpCasts(); RHSExpr = RHSExpr->IgnoreParenImpCasts(); const DeclRefExpr *LHSDeclRef = dyn_cast(LHSExpr); const DeclRefExpr *RHSDeclRef = dyn_cast(RHSExpr); if (!LHSDeclRef || !RHSDeclRef || LHSDeclRef->getLocation().isMacroID() || RHSDeclRef->getLocation().isMacroID()) return; const ValueDecl *LHSDecl = cast(LHSDeclRef->getDecl()->getCanonicalDecl()); const ValueDecl *RHSDecl = cast(RHSDeclRef->getDecl()->getCanonicalDecl()); if (LHSDecl != RHSDecl) return; if (LHSDecl->getType().isVolatileQualified()) return; if (const ReferenceType *RefTy = LHSDecl->getType()->getAs()) if (RefTy->getPointeeType().isVolatileQualified()) return; S.Diag(OpLoc, diag::warn_self_assignment) << LHSDeclRef->getType() << LHSExpr->getSourceRange() << RHSExpr->getSourceRange(); } /// Check if a bitwise-& is performed on an Objective-C pointer. This /// is usually indicative of introspection within the Objective-C pointer. static void checkObjCPointerIntrospection(Sema &S, ExprResult &L, ExprResult &R, SourceLocation OpLoc) { if (!S.getLangOpts().ObjC1) return; const Expr *ObjCPointerExpr = nullptr, *OtherExpr = nullptr; const Expr *LHS = L.get(); const Expr *RHS = R.get(); if (LHS->IgnoreParenCasts()->getType()->isObjCObjectPointerType()) { ObjCPointerExpr = LHS; OtherExpr = RHS; } else if (RHS->IgnoreParenCasts()->getType()->isObjCObjectPointerType()) { ObjCPointerExpr = RHS; OtherExpr = LHS; } // This warning is deliberately made very specific to reduce false // positives with logic that uses '&' for hashing. This logic mainly // looks for code trying to introspect into tagged pointers, which // code should generally never do. if (ObjCPointerExpr && isa(OtherExpr->IgnoreParenCasts())) { unsigned Diag = diag::warn_objc_pointer_masking; // Determine if we are introspecting the result of performSelectorXXX. const Expr *Ex = ObjCPointerExpr->IgnoreParenCasts(); // Special case messages to -performSelector and friends, which // can return non-pointer values boxed in a pointer value. // Some clients may wish to silence warnings in this subcase. if (const ObjCMessageExpr *ME = dyn_cast(Ex)) { Selector S = ME->getSelector(); StringRef SelArg0 = S.getNameForSlot(0); if (SelArg0.startswith("performSelector")) Diag = diag::warn_objc_pointer_masking_performSelector; } S.Diag(OpLoc, Diag) << ObjCPointerExpr->getSourceRange(); } } static NamedDecl *getDeclFromExpr(Expr *E) { if (!E) return nullptr; if (auto *DRE = dyn_cast(E)) return DRE->getDecl(); if (auto *ME = dyn_cast(E)) return ME->getMemberDecl(); if (auto *IRE = dyn_cast(E)) return IRE->getDecl(); return nullptr; } // This helper function promotes a binary operator's operands (which are of a // half vector type) to a vector of floats and then truncates the result to // a vector of either half or short. static ExprResult convertHalfVecBinOp(Sema &S, ExprResult LHS, ExprResult RHS, BinaryOperatorKind Opc, QualType ResultTy, ExprValueKind VK, ExprObjectKind OK, bool IsCompAssign, SourceLocation OpLoc, FPOptions FPFeatures) { auto &Context = S.getASTContext(); assert((isVector(ResultTy, Context.HalfTy) || isVector(ResultTy, Context.ShortTy)) && "Result must be a vector of half or short"); assert(isVector(LHS.get()->getType(), Context.HalfTy) && isVector(RHS.get()->getType(), Context.HalfTy) && "both operands expected to be a half vector"); RHS = convertVector(RHS.get(), Context.FloatTy, S); QualType BinOpResTy = RHS.get()->getType(); // If Opc is a comparison, ResultType is a vector of shorts. In that case, // change BinOpResTy to a vector of ints. if (isVector(ResultTy, Context.ShortTy)) BinOpResTy = S.GetSignedVectorType(BinOpResTy); if (IsCompAssign) return new (Context) CompoundAssignOperator( LHS.get(), RHS.get(), Opc, ResultTy, VK, OK, BinOpResTy, BinOpResTy, OpLoc, FPFeatures); LHS = convertVector(LHS.get(), Context.FloatTy, S); auto *BO = new (Context) BinaryOperator(LHS.get(), RHS.get(), Opc, BinOpResTy, VK, OK, OpLoc, FPFeatures); return convertVector(BO, ResultTy->getAs()->getElementType(), S); } static std::pair CorrectDelayedTyposInBinOp(Sema &S, BinaryOperatorKind Opc, Expr *LHSExpr, Expr *RHSExpr) { ExprResult LHS = LHSExpr, RHS = RHSExpr; if (!S.getLangOpts().CPlusPlus) { // C cannot handle TypoExpr nodes on either side of a binop because it // doesn't handle dependent types properly, so make sure any TypoExprs have // been dealt with before checking the operands. LHS = S.CorrectDelayedTyposInExpr(LHS); RHS = S.CorrectDelayedTyposInExpr(RHS, [Opc, LHS](Expr *E) { if (Opc != BO_Assign) return ExprResult(E); // Avoid correcting the RHS to the same Expr as the LHS. Decl *D = getDeclFromExpr(E); return (D && D == getDeclFromExpr(LHS.get())) ? ExprError() : E; }); } return std::make_pair(LHS, RHS); } /// Returns true if conversion between vectors of halfs and vectors of floats /// is needed. static bool needsConversionOfHalfVec(bool OpRequiresConversion, ASTContext &Ctx, QualType SrcType) { return OpRequiresConversion && !Ctx.getLangOpts().NativeHalfType && !Ctx.getTargetInfo().useFP16ConversionIntrinsics() && isVector(SrcType, Ctx.HalfTy); } /// CreateBuiltinBinOp - Creates a new built-in binary operation with /// operator @p Opc at location @c TokLoc. This routine only supports /// built-in operations; ActOnBinOp handles overloaded operators. ExprResult Sema::CreateBuiltinBinOp(SourceLocation OpLoc, BinaryOperatorKind Opc, Expr *LHSExpr, Expr *RHSExpr) { if (getLangOpts().CPlusPlus11 && isa(RHSExpr)) { // The syntax only allows initializer lists on the RHS of assignment, // so we don't need to worry about accepting invalid code for // non-assignment operators. // C++11 5.17p9: // The meaning of x = {v} [...] is that of x = T(v) [...]. The meaning // of x = {} is x = T(). InitializationKind Kind = InitializationKind::CreateDirectList(RHSExpr->getLocStart()); InitializedEntity Entity = InitializedEntity::InitializeTemporary(LHSExpr->getType()); InitializationSequence InitSeq(*this, Entity, Kind, RHSExpr); ExprResult Init = InitSeq.Perform(*this, Entity, Kind, RHSExpr); if (Init.isInvalid()) return Init; RHSExpr = Init.get(); } ExprResult LHS = LHSExpr, RHS = RHSExpr; QualType ResultTy; // Result type of the binary operator. // The following two variables are used for compound assignment operators QualType CompLHSTy; // Type of LHS after promotions for computation QualType CompResultTy; // Type of computation result ExprValueKind VK = VK_RValue; ExprObjectKind OK = OK_Ordinary; bool ConvertHalfVec = false; std::tie(LHS, RHS) = CorrectDelayedTyposInBinOp(*this, Opc, LHSExpr, RHSExpr); if (!LHS.isUsable() || !RHS.isUsable()) return ExprError(); if (getLangOpts().OpenCL) { QualType LHSTy = LHSExpr->getType(); QualType RHSTy = RHSExpr->getType(); // OpenCLC v2.0 s6.13.11.1 allows atomic variables to be initialized by // the ATOMIC_VAR_INIT macro. if (LHSTy->isAtomicType() || RHSTy->isAtomicType()) { SourceRange SR(LHSExpr->getLocStart(), RHSExpr->getLocEnd()); if (BO_Assign == Opc) Diag(OpLoc, diag::err_opencl_atomic_init) << 0 << SR; else ResultTy = InvalidOperands(OpLoc, LHS, RHS); return ExprError(); } // OpenCL special types - image, sampler, pipe, and blocks are to be used // only with a builtin functions and therefore should be disallowed here. if (LHSTy->isImageType() || RHSTy->isImageType() || LHSTy->isSamplerT() || RHSTy->isSamplerT() || LHSTy->isPipeType() || RHSTy->isPipeType() || LHSTy->isBlockPointerType() || RHSTy->isBlockPointerType()) { ResultTy = InvalidOperands(OpLoc, LHS, RHS); return ExprError(); } } switch (Opc) { case BO_Assign: ResultTy = CheckAssignmentOperands(LHS.get(), RHS, OpLoc, QualType()); if (getLangOpts().CPlusPlus && LHS.get()->getObjectKind() != OK_ObjCProperty) { VK = LHS.get()->getValueKind(); OK = LHS.get()->getObjectKind(); } if (!ResultTy.isNull()) { DiagnoseSelfAssignment(*this, LHS.get(), RHS.get(), OpLoc); DiagnoseSelfMove(LHS.get(), RHS.get(), OpLoc); } RecordModifiableNonNullParam(*this, LHS.get()); break; case BO_PtrMemD: case BO_PtrMemI: ResultTy = CheckPointerToMemberOperands(LHS, RHS, VK, OpLoc, Opc == BO_PtrMemI); break; case BO_Mul: case BO_Div: ConvertHalfVec = true; ResultTy = CheckMultiplyDivideOperands(LHS, RHS, OpLoc, false, Opc == BO_Div); break; case BO_Rem: ResultTy = CheckRemainderOperands(LHS, RHS, OpLoc); break; case BO_Add: ConvertHalfVec = true; ResultTy = CheckAdditionOperands(LHS, RHS, OpLoc, Opc); break; case BO_Sub: ConvertHalfVec = true; ResultTy = CheckSubtractionOperands(LHS, RHS, OpLoc); break; case BO_Shl: case BO_Shr: ResultTy = CheckShiftOperands(LHS, RHS, OpLoc, Opc); break; case BO_LE: case BO_LT: case BO_GE: case BO_GT: ConvertHalfVec = true; ResultTy = CheckCompareOperands(LHS, RHS, OpLoc, Opc, true); break; case BO_EQ: case BO_NE: ConvertHalfVec = true; ResultTy = CheckCompareOperands(LHS, RHS, OpLoc, Opc, false); break; case BO_Cmp: // FIXME: Implement proper semantic checking of '<=>'. ConvertHalfVec = true; ResultTy = CheckCompareOperands(LHS, RHS, OpLoc, Opc, true); if (!ResultTy.isNull()) ResultTy = Context.VoidTy; break; case BO_And: checkObjCPointerIntrospection(*this, LHS, RHS, OpLoc); LLVM_FALLTHROUGH; case BO_Xor: case BO_Or: ResultTy = CheckBitwiseOperands(LHS, RHS, OpLoc, Opc); break; case BO_LAnd: case BO_LOr: ConvertHalfVec = true; ResultTy = CheckLogicalOperands(LHS, RHS, OpLoc, Opc); break; case BO_MulAssign: case BO_DivAssign: ConvertHalfVec = true; CompResultTy = CheckMultiplyDivideOperands(LHS, RHS, OpLoc, true, Opc == BO_DivAssign); CompLHSTy = CompResultTy; if (!CompResultTy.isNull() && !LHS.isInvalid() && !RHS.isInvalid()) ResultTy = CheckAssignmentOperands(LHS.get(), RHS, OpLoc, CompResultTy); break; case BO_RemAssign: CompResultTy = CheckRemainderOperands(LHS, RHS, OpLoc, true); CompLHSTy = CompResultTy; if (!CompResultTy.isNull() && !LHS.isInvalid() && !RHS.isInvalid()) ResultTy = CheckAssignmentOperands(LHS.get(), RHS, OpLoc, CompResultTy); break; case BO_AddAssign: ConvertHalfVec = true; CompResultTy = CheckAdditionOperands(LHS, RHS, OpLoc, Opc, &CompLHSTy); if (!CompResultTy.isNull() && !LHS.isInvalid() && !RHS.isInvalid()) ResultTy = CheckAssignmentOperands(LHS.get(), RHS, OpLoc, CompResultTy); break; case BO_SubAssign: ConvertHalfVec = true; CompResultTy = CheckSubtractionOperands(LHS, RHS, OpLoc, &CompLHSTy); if (!CompResultTy.isNull() && !LHS.isInvalid() && !RHS.isInvalid()) ResultTy = CheckAssignmentOperands(LHS.get(), RHS, OpLoc, CompResultTy); break; case BO_ShlAssign: case BO_ShrAssign: CompResultTy = CheckShiftOperands(LHS, RHS, OpLoc, Opc, true); CompLHSTy = CompResultTy; if (!CompResultTy.isNull() && !LHS.isInvalid() && !RHS.isInvalid()) ResultTy = CheckAssignmentOperands(LHS.get(), RHS, OpLoc, CompResultTy); break; case BO_AndAssign: case BO_OrAssign: // fallthrough DiagnoseSelfAssignment(*this, LHS.get(), RHS.get(), OpLoc); LLVM_FALLTHROUGH; case BO_XorAssign: CompResultTy = CheckBitwiseOperands(LHS, RHS, OpLoc, Opc); CompLHSTy = CompResultTy; if (!CompResultTy.isNull() && !LHS.isInvalid() && !RHS.isInvalid()) ResultTy = CheckAssignmentOperands(LHS.get(), RHS, OpLoc, CompResultTy); break; case BO_Comma: ResultTy = CheckCommaOperands(*this, LHS, RHS, OpLoc); if (getLangOpts().CPlusPlus && !RHS.isInvalid()) { VK = RHS.get()->getValueKind(); OK = RHS.get()->getObjectKind(); } break; } if (ResultTy.isNull() || LHS.isInvalid() || RHS.isInvalid()) return ExprError(); // Some of the binary operations require promoting operands of half vector to // float vectors and truncating the result back to half vector. For now, we do // this only when HalfArgsAndReturn is set (that is, when the target is arm or // arm64). assert(isVector(RHS.get()->getType(), Context.HalfTy) == isVector(LHS.get()->getType(), Context.HalfTy) && "both sides are half vectors or neither sides are"); ConvertHalfVec = needsConversionOfHalfVec(ConvertHalfVec, Context, LHS.get()->getType()); // Check for array bounds violations for both sides of the BinaryOperator CheckArrayAccess(LHS.get()); CheckArrayAccess(RHS.get()); if (const ObjCIsaExpr *OISA = dyn_cast(LHS.get()->IgnoreParenCasts())) { NamedDecl *ObjectSetClass = LookupSingleName(TUScope, &Context.Idents.get("object_setClass"), SourceLocation(), LookupOrdinaryName); if (ObjectSetClass && isa(LHS.get())) { SourceLocation RHSLocEnd = getLocForEndOfToken(RHS.get()->getLocEnd()); Diag(LHS.get()->getExprLoc(), diag::warn_objc_isa_assign) << FixItHint::CreateInsertion(LHS.get()->getLocStart(), "object_setClass(") << FixItHint::CreateReplacement(SourceRange(OISA->getOpLoc(), OpLoc), ",") << FixItHint::CreateInsertion(RHSLocEnd, ")"); } else Diag(LHS.get()->getExprLoc(), diag::warn_objc_isa_assign); } else if (const ObjCIvarRefExpr *OIRE = dyn_cast(LHS.get()->IgnoreParenCasts())) DiagnoseDirectIsaAccess(*this, OIRE, OpLoc, RHS.get()); // Opc is not a compound assignment if CompResultTy is null. if (CompResultTy.isNull()) { if (ConvertHalfVec) return convertHalfVecBinOp(*this, LHS, RHS, Opc, ResultTy, VK, OK, false, OpLoc, FPFeatures); return new (Context) BinaryOperator(LHS.get(), RHS.get(), Opc, ResultTy, VK, OK, OpLoc, FPFeatures); } // Handle compound assignments. if (getLangOpts().CPlusPlus && LHS.get()->getObjectKind() != OK_ObjCProperty) { VK = VK_LValue; OK = LHS.get()->getObjectKind(); } if (ConvertHalfVec) return convertHalfVecBinOp(*this, LHS, RHS, Opc, ResultTy, VK, OK, true, OpLoc, FPFeatures); return new (Context) CompoundAssignOperator( LHS.get(), RHS.get(), Opc, ResultTy, VK, OK, CompLHSTy, CompResultTy, OpLoc, FPFeatures); } /// DiagnoseBitwisePrecedence - Emit a warning when bitwise and comparison /// operators are mixed in a way that suggests that the programmer forgot that /// comparison operators have higher precedence. The most typical example of /// such code is "flags & 0x0020 != 0", which is equivalent to "flags & 1". static void DiagnoseBitwisePrecedence(Sema &Self, BinaryOperatorKind Opc, SourceLocation OpLoc, Expr *LHSExpr, Expr *RHSExpr) { BinaryOperator *LHSBO = dyn_cast(LHSExpr); BinaryOperator *RHSBO = dyn_cast(RHSExpr); // Check that one of the sides is a comparison operator and the other isn't. bool isLeftComp = LHSBO && LHSBO->isComparisonOp(); bool isRightComp = RHSBO && RHSBO->isComparisonOp(); if (isLeftComp == isRightComp) return; // Bitwise operations are sometimes used as eager logical ops. // Don't diagnose this. bool isLeftBitwise = LHSBO && LHSBO->isBitwiseOp(); bool isRightBitwise = RHSBO && RHSBO->isBitwiseOp(); if (isLeftBitwise || isRightBitwise) return; SourceRange DiagRange = isLeftComp ? SourceRange(LHSExpr->getLocStart(), OpLoc) : SourceRange(OpLoc, RHSExpr->getLocEnd()); StringRef OpStr = isLeftComp ? LHSBO->getOpcodeStr() : RHSBO->getOpcodeStr(); SourceRange ParensRange = isLeftComp ? SourceRange(LHSBO->getRHS()->getLocStart(), RHSExpr->getLocEnd()) : SourceRange(LHSExpr->getLocStart(), RHSBO->getLHS()->getLocEnd()); Self.Diag(OpLoc, diag::warn_precedence_bitwise_rel) << DiagRange << BinaryOperator::getOpcodeStr(Opc) << OpStr; SuggestParentheses(Self, OpLoc, Self.PDiag(diag::note_precedence_silence) << OpStr, (isLeftComp ? LHSExpr : RHSExpr)->getSourceRange()); SuggestParentheses(Self, OpLoc, Self.PDiag(diag::note_precedence_bitwise_first) << BinaryOperator::getOpcodeStr(Opc), ParensRange); } /// \brief It accepts a '&&' expr that is inside a '||' one. /// Emit a diagnostic together with a fixit hint that wraps the '&&' expression /// in parentheses. static void EmitDiagnosticForLogicalAndInLogicalOr(Sema &Self, SourceLocation OpLoc, BinaryOperator *Bop) { assert(Bop->getOpcode() == BO_LAnd); Self.Diag(Bop->getOperatorLoc(), diag::warn_logical_and_in_logical_or) << Bop->getSourceRange() << OpLoc; SuggestParentheses(Self, Bop->getOperatorLoc(), Self.PDiag(diag::note_precedence_silence) << Bop->getOpcodeStr(), Bop->getSourceRange()); } /// \brief Returns true if the given expression can be evaluated as a constant /// 'true'. static bool EvaluatesAsTrue(Sema &S, Expr *E) { bool Res; return !E->isValueDependent() && E->EvaluateAsBooleanCondition(Res, S.getASTContext()) && Res; } /// \brief Returns true if the given expression can be evaluated as a constant /// 'false'. static bool EvaluatesAsFalse(Sema &S, Expr *E) { bool Res; return !E->isValueDependent() && E->EvaluateAsBooleanCondition(Res, S.getASTContext()) && !Res; } /// \brief Look for '&&' in the left hand of a '||' expr. static void DiagnoseLogicalAndInLogicalOrLHS(Sema &S, SourceLocation OpLoc, Expr *LHSExpr, Expr *RHSExpr) { if (BinaryOperator *Bop = dyn_cast(LHSExpr)) { if (Bop->getOpcode() == BO_LAnd) { // If it's "a && b || 0" don't warn since the precedence doesn't matter. if (EvaluatesAsFalse(S, RHSExpr)) return; // If it's "1 && a || b" don't warn since the precedence doesn't matter. if (!EvaluatesAsTrue(S, Bop->getLHS())) return EmitDiagnosticForLogicalAndInLogicalOr(S, OpLoc, Bop); } else if (Bop->getOpcode() == BO_LOr) { if (BinaryOperator *RBop = dyn_cast(Bop->getRHS())) { // If it's "a || b && 1 || c" we didn't warn earlier for // "a || b && 1", but warn now. if (RBop->getOpcode() == BO_LAnd && EvaluatesAsTrue(S, RBop->getRHS())) return EmitDiagnosticForLogicalAndInLogicalOr(S, OpLoc, RBop); } } } } /// \brief Look for '&&' in the right hand of a '||' expr. static void DiagnoseLogicalAndInLogicalOrRHS(Sema &S, SourceLocation OpLoc, Expr *LHSExpr, Expr *RHSExpr) { if (BinaryOperator *Bop = dyn_cast(RHSExpr)) { if (Bop->getOpcode() == BO_LAnd) { // If it's "0 || a && b" don't warn since the precedence doesn't matter. if (EvaluatesAsFalse(S, LHSExpr)) return; // If it's "a || b && 1" don't warn since the precedence doesn't matter. if (!EvaluatesAsTrue(S, Bop->getRHS())) return EmitDiagnosticForLogicalAndInLogicalOr(S, OpLoc, Bop); } } } /// \brief Look for bitwise op in the left or right hand of a bitwise op with /// lower precedence and emit a diagnostic together with a fixit hint that wraps /// the '&' expression in parentheses. static void DiagnoseBitwiseOpInBitwiseOp(Sema &S, BinaryOperatorKind Opc, SourceLocation OpLoc, Expr *SubExpr) { if (BinaryOperator *Bop = dyn_cast(SubExpr)) { if (Bop->isBitwiseOp() && Bop->getOpcode() < Opc) { S.Diag(Bop->getOperatorLoc(), diag::warn_bitwise_op_in_bitwise_op) << Bop->getOpcodeStr() << BinaryOperator::getOpcodeStr(Opc) << Bop->getSourceRange() << OpLoc; SuggestParentheses(S, Bop->getOperatorLoc(), S.PDiag(diag::note_precedence_silence) << Bop->getOpcodeStr(), Bop->getSourceRange()); } } } static void DiagnoseAdditionInShift(Sema &S, SourceLocation OpLoc, Expr *SubExpr, StringRef Shift) { if (BinaryOperator *Bop = dyn_cast(SubExpr)) { if (Bop->getOpcode() == BO_Add || Bop->getOpcode() == BO_Sub) { StringRef Op = Bop->getOpcodeStr(); S.Diag(Bop->getOperatorLoc(), diag::warn_addition_in_bitshift) << Bop->getSourceRange() << OpLoc << Shift << Op; SuggestParentheses(S, Bop->getOperatorLoc(), S.PDiag(diag::note_precedence_silence) << Op, Bop->getSourceRange()); } } } static void DiagnoseShiftCompare(Sema &S, SourceLocation OpLoc, Expr *LHSExpr, Expr *RHSExpr) { CXXOperatorCallExpr *OCE = dyn_cast(LHSExpr); if (!OCE) return; FunctionDecl *FD = OCE->getDirectCallee(); if (!FD || !FD->isOverloadedOperator()) return; OverloadedOperatorKind Kind = FD->getOverloadedOperator(); if (Kind != OO_LessLess && Kind != OO_GreaterGreater) return; S.Diag(OpLoc, diag::warn_overloaded_shift_in_comparison) << LHSExpr->getSourceRange() << RHSExpr->getSourceRange() << (Kind == OO_LessLess); SuggestParentheses(S, OCE->getOperatorLoc(), S.PDiag(diag::note_precedence_silence) << (Kind == OO_LessLess ? "<<" : ">>"), OCE->getSourceRange()); SuggestParentheses(S, OpLoc, S.PDiag(diag::note_evaluate_comparison_first), SourceRange(OCE->getArg(1)->getLocStart(), RHSExpr->getLocEnd())); } /// DiagnoseBinOpPrecedence - Emit warnings for expressions with tricky /// precedence. static void DiagnoseBinOpPrecedence(Sema &Self, BinaryOperatorKind Opc, SourceLocation OpLoc, Expr *LHSExpr, Expr *RHSExpr){ // Diagnose "arg1 'bitwise' arg2 'eq' arg3". if (BinaryOperator::isBitwiseOp(Opc)) DiagnoseBitwisePrecedence(Self, Opc, OpLoc, LHSExpr, RHSExpr); // Diagnose "arg1 & arg2 | arg3" if ((Opc == BO_Or || Opc == BO_Xor) && !OpLoc.isMacroID()/* Don't warn in macros. */) { DiagnoseBitwiseOpInBitwiseOp(Self, Opc, OpLoc, LHSExpr); DiagnoseBitwiseOpInBitwiseOp(Self, Opc, OpLoc, RHSExpr); } // Warn about arg1 || arg2 && arg3, as GCC 4.3+ does. // We don't warn for 'assert(a || b && "bad")' since this is safe. if (Opc == BO_LOr && !OpLoc.isMacroID()/* Don't warn in macros. */) { DiagnoseLogicalAndInLogicalOrLHS(Self, OpLoc, LHSExpr, RHSExpr); DiagnoseLogicalAndInLogicalOrRHS(Self, OpLoc, LHSExpr, RHSExpr); } if ((Opc == BO_Shl && LHSExpr->getType()->isIntegralType(Self.getASTContext())) || Opc == BO_Shr) { StringRef Shift = BinaryOperator::getOpcodeStr(Opc); DiagnoseAdditionInShift(Self, OpLoc, LHSExpr, Shift); DiagnoseAdditionInShift(Self, OpLoc, RHSExpr, Shift); } // Warn on overloaded shift operators and comparisons, such as: // cout << 5 == 4; if (BinaryOperator::isComparisonOp(Opc)) DiagnoseShiftCompare(Self, OpLoc, LHSExpr, RHSExpr); } // Binary Operators. 'Tok' is the token for the operator. ExprResult Sema::ActOnBinOp(Scope *S, SourceLocation TokLoc, tok::TokenKind Kind, Expr *LHSExpr, Expr *RHSExpr) { BinaryOperatorKind Opc = ConvertTokenKindToBinaryOpcode(Kind); assert(LHSExpr && "ActOnBinOp(): missing left expression"); assert(RHSExpr && "ActOnBinOp(): missing right expression"); // Emit warnings for tricky precedence issues, e.g. "bitfield & 0x4 == 0" DiagnoseBinOpPrecedence(*this, Opc, TokLoc, LHSExpr, RHSExpr); return BuildBinOp(S, TokLoc, Opc, LHSExpr, RHSExpr); } /// Build an overloaded binary operator expression in the given scope. static ExprResult BuildOverloadedBinOp(Sema &S, Scope *Sc, SourceLocation OpLoc, BinaryOperatorKind Opc, Expr *LHS, Expr *RHS) { // Find all of the overloaded operators visible from this // point. We perform both an operator-name lookup from the local // scope and an argument-dependent lookup based on the types of // the arguments. UnresolvedSet<16> Functions; OverloadedOperatorKind OverOp = BinaryOperator::getOverloadedOperator(Opc); if (Sc && OverOp != OO_None && OverOp != OO_Equal) S.LookupOverloadedOperatorName(OverOp, Sc, LHS->getType(), RHS->getType(), Functions); // Build the (potentially-overloaded, potentially-dependent) // binary operation. return S.CreateOverloadedBinOp(OpLoc, Opc, Functions, LHS, RHS); } ExprResult Sema::BuildBinOp(Scope *S, SourceLocation OpLoc, BinaryOperatorKind Opc, Expr *LHSExpr, Expr *RHSExpr) { ExprResult LHS, RHS; std::tie(LHS, RHS) = CorrectDelayedTyposInBinOp(*this, Opc, LHSExpr, RHSExpr); if (!LHS.isUsable() || !RHS.isUsable()) return ExprError(); LHSExpr = LHS.get(); RHSExpr = RHS.get(); // We want to end up calling one of checkPseudoObjectAssignment // (if the LHS is a pseudo-object), BuildOverloadedBinOp (if // both expressions are overloadable or either is type-dependent), // or CreateBuiltinBinOp (in any other case). We also want to get // any placeholder types out of the way. // Handle pseudo-objects in the LHS. if (const BuiltinType *pty = LHSExpr->getType()->getAsPlaceholderType()) { // Assignments with a pseudo-object l-value need special analysis. if (pty->getKind() == BuiltinType::PseudoObject && BinaryOperator::isAssignmentOp(Opc)) return checkPseudoObjectAssignment(S, OpLoc, Opc, LHSExpr, RHSExpr); // Don't resolve overloads if the other type is overloadable. if (getLangOpts().CPlusPlus && pty->getKind() == BuiltinType::Overload) { // We can't actually test that if we still have a placeholder, // though. Fortunately, none of the exceptions we see in that // code below are valid when the LHS is an overload set. Note // that an overload set can be dependently-typed, but it never // instantiates to having an overloadable type. ExprResult resolvedRHS = CheckPlaceholderExpr(RHSExpr); if (resolvedRHS.isInvalid()) return ExprError(); RHSExpr = resolvedRHS.get(); if (RHSExpr->isTypeDependent() || RHSExpr->getType()->isOverloadableType()) return BuildOverloadedBinOp(*this, S, OpLoc, Opc, LHSExpr, RHSExpr); } // If we're instantiating "a.x < b" or "A::x < b" and 'x' names a function // template, diagnose the missing 'template' keyword instead of diagnosing // an invalid use of a bound member function. // // Note that "A::x < b" might be valid if 'b' has an overloadable type due // to C++1z [over.over]/1.4, but we already checked for that case above. if (Opc == BO_LT && inTemplateInstantiation() && (pty->getKind() == BuiltinType::BoundMember || pty->getKind() == BuiltinType::Overload)) { auto *OE = dyn_cast(LHSExpr); if (OE && !OE->hasTemplateKeyword() && !OE->hasExplicitTemplateArgs() && std::any_of(OE->decls_begin(), OE->decls_end(), [](NamedDecl *ND) { return isa(ND); })) { Diag(OE->getQualifier() ? OE->getQualifierLoc().getBeginLoc() : OE->getNameLoc(), diag::err_template_kw_missing) << OE->getName().getAsString() << ""; return ExprError(); } } ExprResult LHS = CheckPlaceholderExpr(LHSExpr); if (LHS.isInvalid()) return ExprError(); LHSExpr = LHS.get(); } // Handle pseudo-objects in the RHS. if (const BuiltinType *pty = RHSExpr->getType()->getAsPlaceholderType()) { // An overload in the RHS can potentially be resolved by the type // being assigned to. if (Opc == BO_Assign && pty->getKind() == BuiltinType::Overload) { if (getLangOpts().CPlusPlus && (LHSExpr->isTypeDependent() || RHSExpr->isTypeDependent() || LHSExpr->getType()->isOverloadableType())) return BuildOverloadedBinOp(*this, S, OpLoc, Opc, LHSExpr, RHSExpr); return CreateBuiltinBinOp(OpLoc, Opc, LHSExpr, RHSExpr); } // Don't resolve overloads if the other type is overloadable. if (getLangOpts().CPlusPlus && pty->getKind() == BuiltinType::Overload && LHSExpr->getType()->isOverloadableType()) return BuildOverloadedBinOp(*this, S, OpLoc, Opc, LHSExpr, RHSExpr); ExprResult resolvedRHS = CheckPlaceholderExpr(RHSExpr); if (!resolvedRHS.isUsable()) return ExprError(); RHSExpr = resolvedRHS.get(); } if (getLangOpts().CPlusPlus) { // If either expression is type-dependent, always build an // overloaded op. if (LHSExpr->isTypeDependent() || RHSExpr->isTypeDependent()) return BuildOverloadedBinOp(*this, S, OpLoc, Opc, LHSExpr, RHSExpr); // Otherwise, build an overloaded op if either expression has an // overloadable type. if (LHSExpr->getType()->isOverloadableType() || RHSExpr->getType()->isOverloadableType()) return BuildOverloadedBinOp(*this, S, OpLoc, Opc, LHSExpr, RHSExpr); } // Build a built-in binary operation. return CreateBuiltinBinOp(OpLoc, Opc, LHSExpr, RHSExpr); } ExprResult Sema::CreateBuiltinUnaryOp(SourceLocation OpLoc, UnaryOperatorKind Opc, Expr *InputExpr) { ExprResult Input = InputExpr; ExprValueKind VK = VK_RValue; ExprObjectKind OK = OK_Ordinary; QualType resultType; bool ConvertHalfVec = false; if (getLangOpts().OpenCL) { QualType Ty = InputExpr->getType(); // The only legal unary operation for atomics is '&'. if ((Opc != UO_AddrOf && Ty->isAtomicType()) || // OpenCL special types - image, sampler, pipe, and blocks are to be used // only with a builtin functions and therefore should be disallowed here. (Ty->isImageType() || Ty->isSamplerT() || Ty->isPipeType() || Ty->isBlockPointerType())) { return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr) << InputExpr->getType() << Input.get()->getSourceRange()); } } switch (Opc) { case UO_PreInc: case UO_PreDec: case UO_PostInc: case UO_PostDec: resultType = CheckIncrementDecrementOperand(*this, Input.get(), VK, OK, OpLoc, Opc == UO_PreInc || Opc == UO_PostInc, Opc == UO_PreInc || Opc == UO_PreDec); break; case UO_AddrOf: resultType = CheckAddressOfOperand(Input, OpLoc); RecordModifiableNonNullParam(*this, InputExpr); break; case UO_Deref: { Input = DefaultFunctionArrayLvalueConversion(Input.get()); if (Input.isInvalid()) return ExprError(); resultType = CheckIndirectionOperand(*this, Input.get(), VK, OpLoc); break; } case UO_Plus: case UO_Minus: Input = UsualUnaryConversions(Input.get()); if (Input.isInvalid()) return ExprError(); // Unary plus and minus require promoting an operand of half vector to a // float vector and truncating the result back to a half vector. For now, we // do this only when HalfArgsAndReturns is set (that is, when the target is // arm or arm64). ConvertHalfVec = needsConversionOfHalfVec(true, Context, Input.get()->getType()); // If the operand is a half vector, promote it to a float vector. if (ConvertHalfVec) Input = convertVector(Input.get(), Context.FloatTy, *this); resultType = Input.get()->getType(); if (resultType->isDependentType()) break; if (resultType->isArithmeticType()) // C99 6.5.3.3p1 break; else if (resultType->isVectorType() && // The z vector extensions don't allow + or - with bool vectors. (!Context.getLangOpts().ZVector || resultType->getAs()->getVectorKind() != VectorType::AltiVecBool)) break; else if (getLangOpts().CPlusPlus && // C++ [expr.unary.op]p6 Opc == UO_Plus && resultType->isPointerType()) break; return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr) << resultType << Input.get()->getSourceRange()); case UO_Not: // bitwise complement Input = UsualUnaryConversions(Input.get()); if (Input.isInvalid()) return ExprError(); resultType = Input.get()->getType(); if (resultType->isDependentType()) break; // C99 6.5.3.3p1. We allow complex int and float as a GCC extension. if (resultType->isComplexType() || resultType->isComplexIntegerType()) // C99 does not support '~' for complex conjugation. Diag(OpLoc, diag::ext_integer_complement_complex) << resultType << Input.get()->getSourceRange(); else if (resultType->hasIntegerRepresentation()) break; else if (resultType->isExtVectorType() && Context.getLangOpts().OpenCL) { // OpenCL v1.1 s6.3.f: The bitwise operator not (~) does not operate // on vector float types. QualType T = resultType->getAs()->getElementType(); if (!T->isIntegerType()) return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr) << resultType << Input.get()->getSourceRange()); } else { return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr) << resultType << Input.get()->getSourceRange()); } break; case UO_LNot: // logical negation // Unlike +/-/~, integer promotions aren't done here (C99 6.5.3.3p5). Input = DefaultFunctionArrayLvalueConversion(Input.get()); if (Input.isInvalid()) return ExprError(); resultType = Input.get()->getType(); // Though we still have to promote half FP to float... if (resultType->isHalfType() && !Context.getLangOpts().NativeHalfType) { Input = ImpCastExprToType(Input.get(), Context.FloatTy, CK_FloatingCast).get(); resultType = Context.FloatTy; } if (resultType->isDependentType()) break; if (resultType->isScalarType() && !isScopedEnumerationType(resultType)) { // C99 6.5.3.3p1: ok, fallthrough; if (Context.getLangOpts().CPlusPlus) { // C++03 [expr.unary.op]p8, C++0x [expr.unary.op]p9: // operand contextually converted to bool. Input = ImpCastExprToType(Input.get(), Context.BoolTy, ScalarTypeToBooleanCastKind(resultType)); } else if (Context.getLangOpts().OpenCL && Context.getLangOpts().OpenCLVersion < 120) { // OpenCL v1.1 6.3.h: The logical operator not (!) does not // operate on scalar float types. if (!resultType->isIntegerType() && !resultType->isPointerType()) return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr) << resultType << Input.get()->getSourceRange()); } } else if (resultType->isExtVectorType()) { if (Context.getLangOpts().OpenCL && Context.getLangOpts().OpenCLVersion < 120) { // OpenCL v1.1 6.3.h: The logical operator not (!) does not // operate on vector float types. QualType T = resultType->getAs()->getElementType(); if (!T->isIntegerType()) return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr) << resultType << Input.get()->getSourceRange()); } // Vector logical not returns the signed variant of the operand type. resultType = GetSignedVectorType(resultType); break; } else { // FIXME: GCC's vector extension permits the usage of '!' with a vector // type in C++. We should allow that here too. return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr) << resultType << Input.get()->getSourceRange()); } // LNot always has type int. C99 6.5.3.3p5. // In C++, it's bool. C++ 5.3.1p8 resultType = Context.getLogicalOperationType(); break; case UO_Real: case UO_Imag: resultType = CheckRealImagOperand(*this, Input, OpLoc, Opc == UO_Real); // _Real maps ordinary l-values into ordinary l-values. _Imag maps ordinary // complex l-values to ordinary l-values and all other values to r-values. if (Input.isInvalid()) return ExprError(); if (Opc == UO_Real || Input.get()->getType()->isAnyComplexType()) { if (Input.get()->getValueKind() != VK_RValue && Input.get()->getObjectKind() == OK_Ordinary) VK = Input.get()->getValueKind(); } else if (!getLangOpts().CPlusPlus) { // In C, a volatile scalar is read by __imag. In C++, it is not. Input = DefaultLvalueConversion(Input.get()); } break; case UO_Extension: resultType = Input.get()->getType(); VK = Input.get()->getValueKind(); OK = Input.get()->getObjectKind(); break; case UO_Coawait: // It's unnessesary to represent the pass-through operator co_await in the // AST; just return the input expression instead. assert(!Input.get()->getType()->isDependentType() && "the co_await expression must be non-dependant before " "building operator co_await"); return Input; } if (resultType.isNull() || Input.isInvalid()) return ExprError(); // Check for array bounds violations in the operand of the UnaryOperator, // except for the '*' and '&' operators that have to be handled specially // by CheckArrayAccess (as there are special cases like &array[arraysize] // that are explicitly defined as valid by the standard). if (Opc != UO_AddrOf && Opc != UO_Deref) CheckArrayAccess(Input.get()); auto *UO = new (Context) UnaryOperator(Input.get(), Opc, resultType, VK, OK, OpLoc); // Convert the result back to a half vector. if (ConvertHalfVec) return convertVector(UO, Context.HalfTy, *this); return UO; } /// \brief Determine whether the given expression is a qualified member /// access expression, of a form that could be turned into a pointer to member /// with the address-of operator. static bool isQualifiedMemberAccess(Expr *E) { if (DeclRefExpr *DRE = dyn_cast(E)) { if (!DRE->getQualifier()) return false; ValueDecl *VD = DRE->getDecl(); if (!VD->isCXXClassMember()) return false; if (isa(VD) || isa(VD)) return true; if (CXXMethodDecl *Method = dyn_cast(VD)) return Method->isInstance(); return false; } if (UnresolvedLookupExpr *ULE = dyn_cast(E)) { if (!ULE->getQualifier()) return false; for (NamedDecl *D : ULE->decls()) { if (CXXMethodDecl *Method = dyn_cast(D)) { if (Method->isInstance()) return true; } else { // Overload set does not contain methods. break; } } return false; } return false; } ExprResult Sema::BuildUnaryOp(Scope *S, SourceLocation OpLoc, UnaryOperatorKind Opc, Expr *Input) { // First things first: handle placeholders so that the // overloaded-operator check considers the right type. if (const BuiltinType *pty = Input->getType()->getAsPlaceholderType()) { // Increment and decrement of pseudo-object references. if (pty->getKind() == BuiltinType::PseudoObject && UnaryOperator::isIncrementDecrementOp(Opc)) return checkPseudoObjectIncDec(S, OpLoc, Opc, Input); // extension is always a builtin operator. if (Opc == UO_Extension) return CreateBuiltinUnaryOp(OpLoc, Opc, Input); // & gets special logic for several kinds of placeholder. // The builtin code knows what to do. if (Opc == UO_AddrOf && (pty->getKind() == BuiltinType::Overload || pty->getKind() == BuiltinType::UnknownAny || pty->getKind() == BuiltinType::BoundMember)) return CreateBuiltinUnaryOp(OpLoc, Opc, Input); // Anything else needs to be handled now. ExprResult Result = CheckPlaceholderExpr(Input); if (Result.isInvalid()) return ExprError(); Input = Result.get(); } if (getLangOpts().CPlusPlus && Input->getType()->isOverloadableType() && UnaryOperator::getOverloadedOperator(Opc) != OO_None && !(Opc == UO_AddrOf && isQualifiedMemberAccess(Input))) { // Find all of the overloaded operators visible from this // point. We perform both an operator-name lookup from the local // scope and an argument-dependent lookup based on the types of // the arguments. UnresolvedSet<16> Functions; OverloadedOperatorKind OverOp = UnaryOperator::getOverloadedOperator(Opc); if (S && OverOp != OO_None) LookupOverloadedOperatorName(OverOp, S, Input->getType(), QualType(), Functions); return CreateOverloadedUnaryOp(OpLoc, Opc, Functions, Input); } return CreateBuiltinUnaryOp(OpLoc, Opc, Input); } // Unary Operators. 'Tok' is the token for the operator. ExprResult Sema::ActOnUnaryOp(Scope *S, SourceLocation OpLoc, tok::TokenKind Op, Expr *Input) { return BuildUnaryOp(S, OpLoc, ConvertTokenKindToUnaryOpcode(Op), Input); } /// ActOnAddrLabel - Parse the GNU address of label extension: "&&foo". ExprResult Sema::ActOnAddrLabel(SourceLocation OpLoc, SourceLocation LabLoc, LabelDecl *TheDecl) { TheDecl->markUsed(Context); // Create the AST node. The address of a label always has type 'void*'. return new (Context) AddrLabelExpr(OpLoc, LabLoc, TheDecl, Context.getPointerType(Context.VoidTy)); } /// Given the last statement in a statement-expression, check whether /// the result is a producing expression (like a call to an /// ns_returns_retained function) and, if so, rebuild it to hoist the /// release out of the full-expression. Otherwise, return null. /// Cannot fail. static Expr *maybeRebuildARCConsumingStmt(Stmt *Statement) { // Should always be wrapped with one of these. ExprWithCleanups *cleanups = dyn_cast(Statement); if (!cleanups) return nullptr; ImplicitCastExpr *cast = dyn_cast(cleanups->getSubExpr()); if (!cast || cast->getCastKind() != CK_ARCConsumeObject) return nullptr; // Splice out the cast. This shouldn't modify any interesting // features of the statement. Expr *producer = cast->getSubExpr(); assert(producer->getType() == cast->getType()); assert(producer->getValueKind() == cast->getValueKind()); cleanups->setSubExpr(producer); return cleanups; } void Sema::ActOnStartStmtExpr() { PushExpressionEvaluationContext(ExprEvalContexts.back().Context); } void Sema::ActOnStmtExprError() { // Note that function is also called by TreeTransform when leaving a // StmtExpr scope without rebuilding anything. DiscardCleanupsInEvaluationContext(); PopExpressionEvaluationContext(); } ExprResult Sema::ActOnStmtExpr(SourceLocation LPLoc, Stmt *SubStmt, SourceLocation RPLoc) { // "({..})" assert(SubStmt && isa(SubStmt) && "Invalid action invocation!"); CompoundStmt *Compound = cast(SubStmt); if (hasAnyUnrecoverableErrorsInThisFunction()) DiscardCleanupsInEvaluationContext(); assert(!Cleanup.exprNeedsCleanups() && "cleanups within StmtExpr not correctly bound!"); PopExpressionEvaluationContext(); // FIXME: there are a variety of strange constraints to enforce here, for // example, it is not possible to goto into a stmt expression apparently. // More semantic analysis is needed. // If there are sub-stmts in the compound stmt, take the type of the last one // as the type of the stmtexpr. QualType Ty = Context.VoidTy; bool StmtExprMayBindToTemp = false; if (!Compound->body_empty()) { Stmt *LastStmt = Compound->body_back(); LabelStmt *LastLabelStmt = nullptr; // If LastStmt is a label, skip down through into the body. while (LabelStmt *Label = dyn_cast(LastStmt)) { LastLabelStmt = Label; LastStmt = Label->getSubStmt(); } if (Expr *LastE = dyn_cast(LastStmt)) { // Do function/array conversion on the last expression, but not // lvalue-to-rvalue. However, initialize an unqualified type. ExprResult LastExpr = DefaultFunctionArrayConversion(LastE); if (LastExpr.isInvalid()) return ExprError(); Ty = LastExpr.get()->getType().getUnqualifiedType(); if (!Ty->isDependentType() && !LastExpr.get()->isTypeDependent()) { // In ARC, if the final expression ends in a consume, splice // the consume out and bind it later. In the alternate case // (when dealing with a retainable type), the result // initialization will create a produce. In both cases the // result will be +1, and we'll need to balance that out with // a bind. if (Expr *rebuiltLastStmt = maybeRebuildARCConsumingStmt(LastExpr.get())) { LastExpr = rebuiltLastStmt; } else { LastExpr = PerformCopyInitialization( InitializedEntity::InitializeResult(LPLoc, Ty, false), SourceLocation(), LastExpr); } if (LastExpr.isInvalid()) return ExprError(); if (LastExpr.get() != nullptr) { if (!LastLabelStmt) Compound->setLastStmt(LastExpr.get()); else LastLabelStmt->setSubStmt(LastExpr.get()); StmtExprMayBindToTemp = true; } } } } // FIXME: Check that expression type is complete/non-abstract; statement // expressions are not lvalues. Expr *ResStmtExpr = new (Context) StmtExpr(Compound, Ty, LPLoc, RPLoc); if (StmtExprMayBindToTemp) return MaybeBindToTemporary(ResStmtExpr); return ResStmtExpr; } ExprResult Sema::BuildBuiltinOffsetOf(SourceLocation BuiltinLoc, TypeSourceInfo *TInfo, ArrayRef Components, SourceLocation RParenLoc) { QualType ArgTy = TInfo->getType(); bool Dependent = ArgTy->isDependentType(); SourceRange TypeRange = TInfo->getTypeLoc().getLocalSourceRange(); // We must have at least one component that refers to the type, and the first // one is known to be a field designator. Verify that the ArgTy represents // a struct/union/class. if (!Dependent && !ArgTy->isRecordType()) return ExprError(Diag(BuiltinLoc, diag::err_offsetof_record_type) << ArgTy << TypeRange); // Type must be complete per C99 7.17p3 because a declaring a variable // with an incomplete type would be ill-formed. if (!Dependent && RequireCompleteType(BuiltinLoc, ArgTy, diag::err_offsetof_incomplete_type, TypeRange)) return ExprError(); bool DidWarnAboutNonPOD = false; QualType CurrentType = ArgTy; SmallVector Comps; SmallVector Exprs; for (const OffsetOfComponent &OC : Components) { if (OC.isBrackets) { // Offset of an array sub-field. TODO: Should we allow vector elements? if (!CurrentType->isDependentType()) { const ArrayType *AT = Context.getAsArrayType(CurrentType); if(!AT) return ExprError(Diag(OC.LocEnd, diag::err_offsetof_array_type) << CurrentType); CurrentType = AT->getElementType(); } else CurrentType = Context.DependentTy; ExprResult IdxRval = DefaultLvalueConversion(static_cast(OC.U.E)); if (IdxRval.isInvalid()) return ExprError(); Expr *Idx = IdxRval.get(); // The expression must be an integral expression. // FIXME: An integral constant expression? if (!Idx->isTypeDependent() && !Idx->isValueDependent() && !Idx->getType()->isIntegerType()) return ExprError(Diag(Idx->getLocStart(), diag::err_typecheck_subscript_not_integer) << Idx->getSourceRange()); // Record this array index. Comps.push_back(OffsetOfNode(OC.LocStart, Exprs.size(), OC.LocEnd)); Exprs.push_back(Idx); continue; } // Offset of a field. if (CurrentType->isDependentType()) { // We have the offset of a field, but we can't look into the dependent // type. Just record the identifier of the field. Comps.push_back(OffsetOfNode(OC.LocStart, OC.U.IdentInfo, OC.LocEnd)); CurrentType = Context.DependentTy; continue; } // We need to have a complete type to look into. if (RequireCompleteType(OC.LocStart, CurrentType, diag::err_offsetof_incomplete_type)) return ExprError(); // Look for the designated field. const RecordType *RC = CurrentType->getAs(); if (!RC) return ExprError(Diag(OC.LocEnd, diag::err_offsetof_record_type) << CurrentType); RecordDecl *RD = RC->getDecl(); // C++ [lib.support.types]p5: // The macro offsetof accepts a restricted set of type arguments in this // International Standard. type shall be a POD structure or a POD union // (clause 9). // C++11 [support.types]p4: // If type is not a standard-layout class (Clause 9), the results are // undefined. if (CXXRecordDecl *CRD = dyn_cast(RD)) { bool IsSafe = LangOpts.CPlusPlus11? CRD->isStandardLayout() : CRD->isPOD(); unsigned DiagID = LangOpts.CPlusPlus11? diag::ext_offsetof_non_standardlayout_type : diag::ext_offsetof_non_pod_type; if (!IsSafe && !DidWarnAboutNonPOD && DiagRuntimeBehavior(BuiltinLoc, nullptr, PDiag(DiagID) << SourceRange(Components[0].LocStart, OC.LocEnd) << CurrentType)) DidWarnAboutNonPOD = true; } // Look for the field. LookupResult R(*this, OC.U.IdentInfo, OC.LocStart, LookupMemberName); LookupQualifiedName(R, RD); FieldDecl *MemberDecl = R.getAsSingle(); IndirectFieldDecl *IndirectMemberDecl = nullptr; if (!MemberDecl) { if ((IndirectMemberDecl = R.getAsSingle())) MemberDecl = IndirectMemberDecl->getAnonField(); } if (!MemberDecl) return ExprError(Diag(BuiltinLoc, diag::err_no_member) << OC.U.IdentInfo << RD << SourceRange(OC.LocStart, OC.LocEnd)); // C99 7.17p3: // (If the specified member is a bit-field, the behavior is undefined.) // // We diagnose this as an error. if (MemberDecl->isBitField()) { Diag(OC.LocEnd, diag::err_offsetof_bitfield) << MemberDecl->getDeclName() << SourceRange(BuiltinLoc, RParenLoc); Diag(MemberDecl->getLocation(), diag::note_bitfield_decl); return ExprError(); } RecordDecl *Parent = MemberDecl->getParent(); if (IndirectMemberDecl) Parent = cast(IndirectMemberDecl->getDeclContext()); // If the member was found in a base class, introduce OffsetOfNodes for // the base class indirections. CXXBasePaths Paths; if (IsDerivedFrom(OC.LocStart, CurrentType, Context.getTypeDeclType(Parent), Paths)) { if (Paths.getDetectedVirtual()) { Diag(OC.LocEnd, diag::err_offsetof_field_of_virtual_base) << MemberDecl->getDeclName() << SourceRange(BuiltinLoc, RParenLoc); return ExprError(); } CXXBasePath &Path = Paths.front(); for (const CXXBasePathElement &B : Path) Comps.push_back(OffsetOfNode(B.Base)); } if (IndirectMemberDecl) { for (auto *FI : IndirectMemberDecl->chain()) { assert(isa(FI)); Comps.push_back(OffsetOfNode(OC.LocStart, cast(FI), OC.LocEnd)); } } else Comps.push_back(OffsetOfNode(OC.LocStart, MemberDecl, OC.LocEnd)); CurrentType = MemberDecl->getType().getNonReferenceType(); } return OffsetOfExpr::Create(Context, Context.getSizeType(), BuiltinLoc, TInfo, Comps, Exprs, RParenLoc); } ExprResult Sema::ActOnBuiltinOffsetOf(Scope *S, SourceLocation BuiltinLoc, SourceLocation TypeLoc, ParsedType ParsedArgTy, ArrayRef Components, SourceLocation RParenLoc) { TypeSourceInfo *ArgTInfo; QualType ArgTy = GetTypeFromParser(ParsedArgTy, &ArgTInfo); if (ArgTy.isNull()) return ExprError(); if (!ArgTInfo) ArgTInfo = Context.getTrivialTypeSourceInfo(ArgTy, TypeLoc); return BuildBuiltinOffsetOf(BuiltinLoc, ArgTInfo, Components, RParenLoc); } ExprResult Sema::ActOnChooseExpr(SourceLocation BuiltinLoc, Expr *CondExpr, Expr *LHSExpr, Expr *RHSExpr, SourceLocation RPLoc) { assert((CondExpr && LHSExpr && RHSExpr) && "Missing type argument(s)"); ExprValueKind VK = VK_RValue; ExprObjectKind OK = OK_Ordinary; QualType resType; bool ValueDependent = false; bool CondIsTrue = false; if (CondExpr->isTypeDependent() || CondExpr->isValueDependent()) { resType = Context.DependentTy; ValueDependent = true; } else { // The conditional expression is required to be a constant expression. llvm::APSInt condEval(32); ExprResult CondICE = VerifyIntegerConstantExpression(CondExpr, &condEval, diag::err_typecheck_choose_expr_requires_constant, false); if (CondICE.isInvalid()) return ExprError(); CondExpr = CondICE.get(); CondIsTrue = condEval.getZExtValue(); // If the condition is > zero, then the AST type is the same as the LSHExpr. Expr *ActiveExpr = CondIsTrue ? LHSExpr : RHSExpr; resType = ActiveExpr->getType(); ValueDependent = ActiveExpr->isValueDependent(); VK = ActiveExpr->getValueKind(); OK = ActiveExpr->getObjectKind(); } return new (Context) ChooseExpr(BuiltinLoc, CondExpr, LHSExpr, RHSExpr, resType, VK, OK, RPLoc, CondIsTrue, resType->isDependentType(), ValueDependent); } //===----------------------------------------------------------------------===// // Clang Extensions. //===----------------------------------------------------------------------===// /// ActOnBlockStart - This callback is invoked when a block literal is started. void Sema::ActOnBlockStart(SourceLocation CaretLoc, Scope *CurScope) { BlockDecl *Block = BlockDecl::Create(Context, CurContext, CaretLoc); if (LangOpts.CPlusPlus) { Decl *ManglingContextDecl; if (MangleNumberingContext *MCtx = getCurrentMangleNumberContext(Block->getDeclContext(), ManglingContextDecl)) { unsigned ManglingNumber = MCtx->getManglingNumber(Block); Block->setBlockMangling(ManglingNumber, ManglingContextDecl); } } PushBlockScope(CurScope, Block); CurContext->addDecl(Block); if (CurScope) PushDeclContext(CurScope, Block); else CurContext = Block; getCurBlock()->HasImplicitReturnType = true; // Enter a new evaluation context to insulate the block from any // cleanups from the enclosing full-expression. PushExpressionEvaluationContext( ExpressionEvaluationContext::PotentiallyEvaluated); } void Sema::ActOnBlockArguments(SourceLocation CaretLoc, Declarator &ParamInfo, Scope *CurScope) { assert(ParamInfo.getIdentifier() == nullptr && "block-id should have no identifier!"); assert(ParamInfo.getContext() == DeclaratorContext::BlockLiteralContext); BlockScopeInfo *CurBlock = getCurBlock(); TypeSourceInfo *Sig = GetTypeForDeclarator(ParamInfo, CurScope); QualType T = Sig->getType(); // FIXME: We should allow unexpanded parameter packs here, but that would, // in turn, make the block expression contain unexpanded parameter packs. if (DiagnoseUnexpandedParameterPack(CaretLoc, Sig, UPPC_Block)) { // Drop the parameters. FunctionProtoType::ExtProtoInfo EPI; EPI.HasTrailingReturn = false; EPI.TypeQuals |= DeclSpec::TQ_const; T = Context.getFunctionType(Context.DependentTy, None, EPI); Sig = Context.getTrivialTypeSourceInfo(T); } // GetTypeForDeclarator always produces a function type for a block // literal signature. Furthermore, it is always a FunctionProtoType // unless the function was written with a typedef. assert(T->isFunctionType() && "GetTypeForDeclarator made a non-function block signature"); // Look for an explicit signature in that function type. FunctionProtoTypeLoc ExplicitSignature; if ((ExplicitSignature = Sig->getTypeLoc().getAsAdjusted())) { // Check whether that explicit signature was synthesized by // GetTypeForDeclarator. If so, don't save that as part of the // written signature. if (ExplicitSignature.getLocalRangeBegin() == ExplicitSignature.getLocalRangeEnd()) { // This would be much cheaper if we stored TypeLocs instead of // TypeSourceInfos. TypeLoc Result = ExplicitSignature.getReturnLoc(); unsigned Size = Result.getFullDataSize(); Sig = Context.CreateTypeSourceInfo(Result.getType(), Size); Sig->getTypeLoc().initializeFullCopy(Result, Size); ExplicitSignature = FunctionProtoTypeLoc(); } } CurBlock->TheDecl->setSignatureAsWritten(Sig); CurBlock->FunctionType = T; const FunctionType *Fn = T->getAs(); QualType RetTy = Fn->getReturnType(); bool isVariadic = (isa(Fn) && cast(Fn)->isVariadic()); CurBlock->TheDecl->setIsVariadic(isVariadic); // Context.DependentTy is used as a placeholder for a missing block // return type. TODO: what should we do with declarators like: // ^ * { ... } // If the answer is "apply template argument deduction".... if (RetTy != Context.DependentTy) { CurBlock->ReturnType = RetTy; CurBlock->TheDecl->setBlockMissingReturnType(false); CurBlock->HasImplicitReturnType = false; } // Push block parameters from the declarator if we had them. SmallVector Params; if (ExplicitSignature) { for (unsigned I = 0, E = ExplicitSignature.getNumParams(); I != E; ++I) { ParmVarDecl *Param = ExplicitSignature.getParam(I); if (Param->getIdentifier() == nullptr && !Param->isImplicit() && !Param->isInvalidDecl() && !getLangOpts().CPlusPlus) Diag(Param->getLocation(), diag::err_parameter_name_omitted); Params.push_back(Param); } // Fake up parameter variables if we have a typedef, like // ^ fntype { ... } } else if (const FunctionProtoType *Fn = T->getAs()) { for (const auto &I : Fn->param_types()) { ParmVarDecl *Param = BuildParmVarDeclForTypedef( CurBlock->TheDecl, ParamInfo.getLocStart(), I); Params.push_back(Param); } } // Set the parameters on the block decl. if (!Params.empty()) { CurBlock->TheDecl->setParams(Params); CheckParmsForFunctionDef(CurBlock->TheDecl->parameters(), /*CheckParameterNames=*/false); } // Finally we can process decl attributes. ProcessDeclAttributes(CurScope, CurBlock->TheDecl, ParamInfo); // Put the parameter variables in scope. for (auto AI : CurBlock->TheDecl->parameters()) { AI->setOwningFunction(CurBlock->TheDecl); // If this has an identifier, add it to the scope stack. if (AI->getIdentifier()) { CheckShadow(CurBlock->TheScope, AI); PushOnScopeChains(AI, CurBlock->TheScope); } } } /// ActOnBlockError - If there is an error parsing a block, this callback /// is invoked to pop the information about the block from the action impl. void Sema::ActOnBlockError(SourceLocation CaretLoc, Scope *CurScope) { // Leave the expression-evaluation context. DiscardCleanupsInEvaluationContext(); PopExpressionEvaluationContext(); // Pop off CurBlock, handle nested blocks. PopDeclContext(); PopFunctionScopeInfo(); } /// ActOnBlockStmtExpr - This is called when the body of a block statement /// literal was successfully completed. ^(int x){...} ExprResult Sema::ActOnBlockStmtExpr(SourceLocation CaretLoc, Stmt *Body, Scope *CurScope) { // If blocks are disabled, emit an error. if (!LangOpts.Blocks) Diag(CaretLoc, diag::err_blocks_disable) << LangOpts.OpenCL; // Leave the expression-evaluation context. if (hasAnyUnrecoverableErrorsInThisFunction()) DiscardCleanupsInEvaluationContext(); assert(!Cleanup.exprNeedsCleanups() && "cleanups within block not correctly bound!"); PopExpressionEvaluationContext(); BlockScopeInfo *BSI = cast(FunctionScopes.back()); if (BSI->HasImplicitReturnType) deduceClosureReturnType(*BSI); PopDeclContext(); QualType RetTy = Context.VoidTy; if (!BSI->ReturnType.isNull()) RetTy = BSI->ReturnType; bool NoReturn = BSI->TheDecl->hasAttr(); QualType BlockTy; // Set the captured variables on the block. // FIXME: Share capture structure between BlockDecl and CapturingScopeInfo! SmallVector Captures; for (CapturingScopeInfo::Capture &Cap : BSI->Captures) { if (Cap.isThisCapture()) continue; BlockDecl::Capture NewCap(Cap.getVariable(), Cap.isBlockCapture(), Cap.isNested(), Cap.getInitExpr()); Captures.push_back(NewCap); } BSI->TheDecl->setCaptures(Context, Captures, BSI->CXXThisCaptureIndex != 0); // If the user wrote a function type in some form, try to use that. if (!BSI->FunctionType.isNull()) { const FunctionType *FTy = BSI->FunctionType->getAs(); FunctionType::ExtInfo Ext = FTy->getExtInfo(); if (NoReturn && !Ext.getNoReturn()) Ext = Ext.withNoReturn(true); // Turn protoless block types into nullary block types. if (isa(FTy)) { FunctionProtoType::ExtProtoInfo EPI; EPI.ExtInfo = Ext; BlockTy = Context.getFunctionType(RetTy, None, EPI); // Otherwise, if we don't need to change anything about the function type, // preserve its sugar structure. } else if (FTy->getReturnType() == RetTy && (!NoReturn || FTy->getNoReturnAttr())) { BlockTy = BSI->FunctionType; // Otherwise, make the minimal modifications to the function type. } else { const FunctionProtoType *FPT = cast(FTy); FunctionProtoType::ExtProtoInfo EPI = FPT->getExtProtoInfo(); EPI.TypeQuals = 0; // FIXME: silently? EPI.ExtInfo = Ext; BlockTy = Context.getFunctionType(RetTy, FPT->getParamTypes(), EPI); } // If we don't have a function type, just build one from nothing. } else { FunctionProtoType::ExtProtoInfo EPI; EPI.ExtInfo = FunctionType::ExtInfo().withNoReturn(NoReturn); BlockTy = Context.getFunctionType(RetTy, None, EPI); } DiagnoseUnusedParameters(BSI->TheDecl->parameters()); BlockTy = Context.getBlockPointerType(BlockTy); // If needed, diagnose invalid gotos and switches in the block. if (getCurFunction()->NeedsScopeChecking() && !PP.isCodeCompletionEnabled()) DiagnoseInvalidJumps(cast(Body)); BSI->TheDecl->setBody(cast(Body)); if (Body && getCurFunction()->HasPotentialAvailabilityViolations) DiagnoseUnguardedAvailabilityViolations(BSI->TheDecl); // Try to apply the named return value optimization. We have to check again // if we can do this, though, because blocks keep return statements around // to deduce an implicit return type. if (getLangOpts().CPlusPlus && RetTy->isRecordType() && !BSI->TheDecl->isDependentContext()) computeNRVO(Body, BSI); BlockExpr *Result = new (Context) BlockExpr(BSI->TheDecl, BlockTy); AnalysisBasedWarnings::Policy WP = AnalysisWarnings.getDefaultPolicy(); PopFunctionScopeInfo(&WP, Result->getBlockDecl(), Result); // If the block isn't obviously global, i.e. it captures anything at // all, then we need to do a few things in the surrounding context: if (Result->getBlockDecl()->hasCaptures()) { // First, this expression has a new cleanup object. ExprCleanupObjects.push_back(Result->getBlockDecl()); Cleanup.setExprNeedsCleanups(true); // It also gets a branch-protected scope if any of the captured // variables needs destruction. for (const auto &CI : Result->getBlockDecl()->captures()) { const VarDecl *var = CI.getVariable(); if (var->getType().isDestructedType() != QualType::DK_none) { getCurFunction()->setHasBranchProtectedScope(); break; } } } return Result; } ExprResult Sema::ActOnVAArg(SourceLocation BuiltinLoc, Expr *E, ParsedType Ty, SourceLocation RPLoc) { TypeSourceInfo *TInfo; GetTypeFromParser(Ty, &TInfo); return BuildVAArgExpr(BuiltinLoc, E, TInfo, RPLoc); } ExprResult Sema::BuildVAArgExpr(SourceLocation BuiltinLoc, Expr *E, TypeSourceInfo *TInfo, SourceLocation RPLoc) { Expr *OrigExpr = E; bool IsMS = false; // CUDA device code does not support varargs. if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) { if (const FunctionDecl *F = dyn_cast(CurContext)) { CUDAFunctionTarget T = IdentifyCUDATarget(F); if (T == CFT_Global || T == CFT_Device || T == CFT_HostDevice) return ExprError(Diag(E->getLocStart(), diag::err_va_arg_in_device)); } } // It might be a __builtin_ms_va_list. (But don't ever mark a va_arg() // as Microsoft ABI on an actual Microsoft platform, where // __builtin_ms_va_list and __builtin_va_list are the same.) if (!E->isTypeDependent() && Context.getTargetInfo().hasBuiltinMSVaList() && Context.getTargetInfo().getBuiltinVaListKind() != TargetInfo::CharPtrBuiltinVaList) { QualType MSVaListType = Context.getBuiltinMSVaListType(); if (Context.hasSameType(MSVaListType, E->getType())) { if (CheckForModifiableLvalue(E, BuiltinLoc, *this)) return ExprError(); IsMS = true; } } // Get the va_list type QualType VaListType = Context.getBuiltinVaListType(); if (!IsMS) { if (VaListType->isArrayType()) { // Deal with implicit array decay; for example, on x86-64, // va_list is an array, but it's supposed to decay to // a pointer for va_arg. VaListType = Context.getArrayDecayedType(VaListType); // Make sure the input expression also decays appropriately. ExprResult Result = UsualUnaryConversions(E); if (Result.isInvalid()) return ExprError(); E = Result.get(); } else if (VaListType->isRecordType() && getLangOpts().CPlusPlus) { // If va_list is a record type and we are compiling in C++ mode, // check the argument using reference binding. InitializedEntity Entity = InitializedEntity::InitializeParameter( Context, Context.getLValueReferenceType(VaListType), false); ExprResult Init = PerformCopyInitialization(Entity, SourceLocation(), E); if (Init.isInvalid()) return ExprError(); E = Init.getAs(); } else { // Otherwise, the va_list argument must be an l-value because // it is modified by va_arg. if (!E->isTypeDependent() && CheckForModifiableLvalue(E, BuiltinLoc, *this)) return ExprError(); } } if (!IsMS && !E->isTypeDependent() && !Context.hasSameType(VaListType, E->getType())) return ExprError(Diag(E->getLocStart(), diag::err_first_argument_to_va_arg_not_of_type_va_list) << OrigExpr->getType() << E->getSourceRange()); if (!TInfo->getType()->isDependentType()) { if (RequireCompleteType(TInfo->getTypeLoc().getBeginLoc(), TInfo->getType(), diag::err_second_parameter_to_va_arg_incomplete, TInfo->getTypeLoc())) return ExprError(); if (RequireNonAbstractType(TInfo->getTypeLoc().getBeginLoc(), TInfo->getType(), diag::err_second_parameter_to_va_arg_abstract, TInfo->getTypeLoc())) return ExprError(); if (!TInfo->getType().isPODType(Context)) { Diag(TInfo->getTypeLoc().getBeginLoc(), TInfo->getType()->isObjCLifetimeType() ? diag::warn_second_parameter_to_va_arg_ownership_qualified : diag::warn_second_parameter_to_va_arg_not_pod) << TInfo->getType() << TInfo->getTypeLoc().getSourceRange(); } // Check for va_arg where arguments of the given type will be promoted // (i.e. this va_arg is guaranteed to have undefined behavior). QualType PromoteType; if (TInfo->getType()->isPromotableIntegerType()) { PromoteType = Context.getPromotedIntegerType(TInfo->getType()); if (Context.typesAreCompatible(PromoteType, TInfo->getType())) PromoteType = QualType(); } if (TInfo->getType()->isSpecificBuiltinType(BuiltinType::Float)) PromoteType = Context.DoubleTy; if (!PromoteType.isNull()) DiagRuntimeBehavior(TInfo->getTypeLoc().getBeginLoc(), E, PDiag(diag::warn_second_parameter_to_va_arg_never_compatible) << TInfo->getType() << PromoteType << TInfo->getTypeLoc().getSourceRange()); } QualType T = TInfo->getType().getNonLValueExprType(Context); return new (Context) VAArgExpr(BuiltinLoc, E, TInfo, RPLoc, T, IsMS); } ExprResult Sema::ActOnGNUNullExpr(SourceLocation TokenLoc) { // The type of __null will be int or long, depending on the size of // pointers on the target. QualType Ty; unsigned pw = Context.getTargetInfo().getPointerWidth(0); if (pw == Context.getTargetInfo().getIntWidth()) Ty = Context.IntTy; else if (pw == Context.getTargetInfo().getLongWidth()) Ty = Context.LongTy; else if (pw == Context.getTargetInfo().getLongLongWidth()) Ty = Context.LongLongTy; else { llvm_unreachable("I don't know size of pointer!"); } return new (Context) GNUNullExpr(Ty, TokenLoc); } bool Sema::ConversionToObjCStringLiteralCheck(QualType DstType, Expr *&Exp, bool Diagnose) { if (!getLangOpts().ObjC1) return false; const ObjCObjectPointerType *PT = DstType->getAs(); if (!PT) return false; if (!PT->isObjCIdType()) { // Check if the destination is the 'NSString' interface. const ObjCInterfaceDecl *ID = PT->getInterfaceDecl(); if (!ID || !ID->getIdentifier()->isStr("NSString")) return false; } // Ignore any parens, implicit casts (should only be // array-to-pointer decays), and not-so-opaque values. The last is // important for making this trigger for property assignments. Expr *SrcExpr = Exp->IgnoreParenImpCasts(); if (OpaqueValueExpr *OV = dyn_cast(SrcExpr)) if (OV->getSourceExpr()) SrcExpr = OV->getSourceExpr()->IgnoreParenImpCasts(); StringLiteral *SL = dyn_cast(SrcExpr); if (!SL || !SL->isAscii()) return false; if (Diagnose) { Diag(SL->getLocStart(), diag::err_missing_atsign_prefix) << FixItHint::CreateInsertion(SL->getLocStart(), "@"); Exp = BuildObjCStringLiteral(SL->getLocStart(), SL).get(); } return true; } static bool maybeDiagnoseAssignmentToFunction(Sema &S, QualType DstType, const Expr *SrcExpr) { if (!DstType->isFunctionPointerType() || !SrcExpr->getType()->isFunctionType()) return false; auto *DRE = dyn_cast(SrcExpr->IgnoreParenImpCasts()); if (!DRE) return false; auto *FD = dyn_cast(DRE->getDecl()); if (!FD) return false; return !S.checkAddressOfFunctionIsAvailable(FD, /*Complain=*/true, SrcExpr->getLocStart()); } bool Sema::DiagnoseAssignmentResult(AssignConvertType ConvTy, SourceLocation Loc, QualType DstType, QualType SrcType, Expr *SrcExpr, AssignmentAction Action, bool *Complained) { if (Complained) *Complained = false; // Decode the result (notice that AST's are still created for extensions). bool CheckInferredResultType = false; bool isInvalid = false; unsigned DiagKind = 0; FixItHint Hint; ConversionFixItGenerator ConvHints; bool MayHaveConvFixit = false; bool MayHaveFunctionDiff = false; const ObjCInterfaceDecl *IFace = nullptr; const ObjCProtocolDecl *PDecl = nullptr; switch (ConvTy) { case Compatible: DiagnoseAssignmentEnum(DstType, SrcType, SrcExpr); return false; case PointerToInt: DiagKind = diag::ext_typecheck_convert_pointer_int; ConvHints.tryToFixConversion(SrcExpr, SrcType, DstType, *this); MayHaveConvFixit = true; break; case IntToPointer: DiagKind = diag::ext_typecheck_convert_int_pointer; ConvHints.tryToFixConversion(SrcExpr, SrcType, DstType, *this); MayHaveConvFixit = true; break; case IncompatiblePointer: if (Action == AA_Passing_CFAudited) DiagKind = diag::err_arc_typecheck_convert_incompatible_pointer; else if (SrcType->isFunctionPointerType() && DstType->isFunctionPointerType()) DiagKind = diag::ext_typecheck_convert_incompatible_function_pointer; else DiagKind = diag::ext_typecheck_convert_incompatible_pointer; CheckInferredResultType = DstType->isObjCObjectPointerType() && SrcType->isObjCObjectPointerType(); if (Hint.isNull() && !CheckInferredResultType) { ConvHints.tryToFixConversion(SrcExpr, SrcType, DstType, *this); } else if (CheckInferredResultType) { SrcType = SrcType.getUnqualifiedType(); DstType = DstType.getUnqualifiedType(); } MayHaveConvFixit = true; break; case IncompatiblePointerSign: DiagKind = diag::ext_typecheck_convert_incompatible_pointer_sign; break; case FunctionVoidPointer: DiagKind = diag::ext_typecheck_convert_pointer_void_func; break; case IncompatiblePointerDiscardsQualifiers: { // Perform array-to-pointer decay if necessary. if (SrcType->isArrayType()) SrcType = Context.getArrayDecayedType(SrcType); Qualifiers lhq = SrcType->getPointeeType().getQualifiers(); Qualifiers rhq = DstType->getPointeeType().getQualifiers(); if (lhq.getAddressSpace() != rhq.getAddressSpace()) { DiagKind = diag::err_typecheck_incompatible_address_space; break; } else if (lhq.getObjCLifetime() != rhq.getObjCLifetime()) { DiagKind = diag::err_typecheck_incompatible_ownership; break; } llvm_unreachable("unknown error case for discarding qualifiers!"); // fallthrough } case CompatiblePointerDiscardsQualifiers: // If the qualifiers lost were because we were applying the // (deprecated) C++ conversion from a string literal to a char* // (or wchar_t*), then there was no error (C++ 4.2p2). FIXME: // Ideally, this check would be performed in // checkPointerTypesForAssignment. However, that would require a // bit of refactoring (so that the second argument is an // expression, rather than a type), which should be done as part // of a larger effort to fix checkPointerTypesForAssignment for // C++ semantics. if (getLangOpts().CPlusPlus && IsStringLiteralToNonConstPointerConversion(SrcExpr, DstType)) return false; DiagKind = diag::ext_typecheck_convert_discards_qualifiers; break; case IncompatibleNestedPointerQualifiers: DiagKind = diag::ext_nested_pointer_qualifier_mismatch; break; case IntToBlockPointer: DiagKind = diag::err_int_to_block_pointer; break; case IncompatibleBlockPointer: DiagKind = diag::err_typecheck_convert_incompatible_block_pointer; break; case IncompatibleObjCQualifiedId: { if (SrcType->isObjCQualifiedIdType()) { const ObjCObjectPointerType *srcOPT = SrcType->getAs(); for (auto *srcProto : srcOPT->quals()) { PDecl = srcProto; break; } if (const ObjCInterfaceType *IFaceT = DstType->getAs()->getInterfaceType()) IFace = IFaceT->getDecl(); } else if (DstType->isObjCQualifiedIdType()) { const ObjCObjectPointerType *dstOPT = DstType->getAs(); for (auto *dstProto : dstOPT->quals()) { PDecl = dstProto; break; } if (const ObjCInterfaceType *IFaceT = SrcType->getAs()->getInterfaceType()) IFace = IFaceT->getDecl(); } DiagKind = diag::warn_incompatible_qualified_id; break; } case IncompatibleVectors: DiagKind = diag::warn_incompatible_vectors; break; case IncompatibleObjCWeakRef: DiagKind = diag::err_arc_weak_unavailable_assign; break; case Incompatible: if (maybeDiagnoseAssignmentToFunction(*this, DstType, SrcExpr)) { if (Complained) *Complained = true; return true; } DiagKind = diag::err_typecheck_convert_incompatible; ConvHints.tryToFixConversion(SrcExpr, SrcType, DstType, *this); MayHaveConvFixit = true; isInvalid = true; MayHaveFunctionDiff = true; break; } QualType FirstType, SecondType; switch (Action) { case AA_Assigning: case AA_Initializing: // The destination type comes first. FirstType = DstType; SecondType = SrcType; break; case AA_Returning: case AA_Passing: case AA_Passing_CFAudited: case AA_Converting: case AA_Sending: case AA_Casting: // The source type comes first. FirstType = SrcType; SecondType = DstType; break; } PartialDiagnostic FDiag = PDiag(DiagKind); if (Action == AA_Passing_CFAudited) FDiag << FirstType << SecondType << AA_Passing << SrcExpr->getSourceRange(); else FDiag << FirstType << SecondType << Action << SrcExpr->getSourceRange(); // If we can fix the conversion, suggest the FixIts. assert(ConvHints.isNull() || Hint.isNull()); if (!ConvHints.isNull()) { for (FixItHint &H : ConvHints.Hints) FDiag << H; } else { FDiag << Hint; } if (MayHaveConvFixit) { FDiag << (unsigned) (ConvHints.Kind); } if (MayHaveFunctionDiff) HandleFunctionTypeMismatch(FDiag, SecondType, FirstType); Diag(Loc, FDiag); if (DiagKind == diag::warn_incompatible_qualified_id && PDecl && IFace && !IFace->hasDefinition()) Diag(IFace->getLocation(), diag::note_incomplete_class_and_qualified_id) << IFace->getName() << PDecl->getName(); if (SecondType == Context.OverloadTy) NoteAllOverloadCandidates(OverloadExpr::find(SrcExpr).Expression, FirstType, /*TakingAddress=*/true); if (CheckInferredResultType) EmitRelatedResultTypeNote(SrcExpr); if (Action == AA_Returning && ConvTy == IncompatiblePointer) EmitRelatedResultTypeNoteForReturn(DstType); if (Complained) *Complained = true; return isInvalid; } ExprResult Sema::VerifyIntegerConstantExpression(Expr *E, llvm::APSInt *Result) { class SimpleICEDiagnoser : public VerifyICEDiagnoser { public: void diagnoseNotICE(Sema &S, SourceLocation Loc, SourceRange SR) override { S.Diag(Loc, diag::err_expr_not_ice) << S.LangOpts.CPlusPlus << SR; } } Diagnoser; return VerifyIntegerConstantExpression(E, Result, Diagnoser); } ExprResult Sema::VerifyIntegerConstantExpression(Expr *E, llvm::APSInt *Result, unsigned DiagID, bool AllowFold) { class IDDiagnoser : public VerifyICEDiagnoser { unsigned DiagID; public: IDDiagnoser(unsigned DiagID) : VerifyICEDiagnoser(DiagID == 0), DiagID(DiagID) { } void diagnoseNotICE(Sema &S, SourceLocation Loc, SourceRange SR) override { S.Diag(Loc, DiagID) << SR; } } Diagnoser(DiagID); return VerifyIntegerConstantExpression(E, Result, Diagnoser, AllowFold); } void Sema::VerifyICEDiagnoser::diagnoseFold(Sema &S, SourceLocation Loc, SourceRange SR) { S.Diag(Loc, diag::ext_expr_not_ice) << SR << S.LangOpts.CPlusPlus; } ExprResult Sema::VerifyIntegerConstantExpression(Expr *E, llvm::APSInt *Result, VerifyICEDiagnoser &Diagnoser, bool AllowFold) { SourceLocation DiagLoc = E->getLocStart(); if (getLangOpts().CPlusPlus11) { // C++11 [expr.const]p5: // If an expression of literal class type is used in a context where an // integral constant expression is required, then that class type shall // have a single non-explicit conversion function to an integral or // unscoped enumeration type ExprResult Converted; class CXX11ConvertDiagnoser : public ICEConvertDiagnoser { public: CXX11ConvertDiagnoser(bool Silent) : ICEConvertDiagnoser(/*AllowScopedEnumerations*/false, Silent, true) {} SemaDiagnosticBuilder diagnoseNotInt(Sema &S, SourceLocation Loc, QualType T) override { return S.Diag(Loc, diag::err_ice_not_integral) << T; } SemaDiagnosticBuilder diagnoseIncomplete( Sema &S, SourceLocation Loc, QualType T) override { return S.Diag(Loc, diag::err_ice_incomplete_type) << T; } SemaDiagnosticBuilder diagnoseExplicitConv( Sema &S, SourceLocation Loc, QualType T, QualType ConvTy) override { return S.Diag(Loc, diag::err_ice_explicit_conversion) << T << ConvTy; } SemaDiagnosticBuilder noteExplicitConv( Sema &S, CXXConversionDecl *Conv, QualType ConvTy) override { return S.Diag(Conv->getLocation(), diag::note_ice_conversion_here) << ConvTy->isEnumeralType() << ConvTy; } SemaDiagnosticBuilder diagnoseAmbiguous( Sema &S, SourceLocation Loc, QualType T) override { return S.Diag(Loc, diag::err_ice_ambiguous_conversion) << T; } SemaDiagnosticBuilder noteAmbiguous( Sema &S, CXXConversionDecl *Conv, QualType ConvTy) override { return S.Diag(Conv->getLocation(), diag::note_ice_conversion_here) << ConvTy->isEnumeralType() << ConvTy; } SemaDiagnosticBuilder diagnoseConversion( Sema &S, SourceLocation Loc, QualType T, QualType ConvTy) override { llvm_unreachable("conversion functions are permitted"); } } ConvertDiagnoser(Diagnoser.Suppress); Converted = PerformContextualImplicitConversion(DiagLoc, E, ConvertDiagnoser); if (Converted.isInvalid()) return Converted; E = Converted.get(); if (!E->getType()->isIntegralOrUnscopedEnumerationType()) return ExprError(); } else if (!E->getType()->isIntegralOrUnscopedEnumerationType()) { // An ICE must be of integral or unscoped enumeration type. if (!Diagnoser.Suppress) Diagnoser.diagnoseNotICE(*this, DiagLoc, E->getSourceRange()); return ExprError(); } // Circumvent ICE checking in C++11 to avoid evaluating the expression twice // in the non-ICE case. if (!getLangOpts().CPlusPlus11 && E->isIntegerConstantExpr(Context)) { if (Result) *Result = E->EvaluateKnownConstInt(Context); return E; } Expr::EvalResult EvalResult; SmallVector Notes; EvalResult.Diag = &Notes; // Try to evaluate the expression, and produce diagnostics explaining why it's // not a constant expression as a side-effect. bool Folded = E->EvaluateAsRValue(EvalResult, Context) && EvalResult.Val.isInt() && !EvalResult.HasSideEffects; // In C++11, we can rely on diagnostics being produced for any expression // which is not a constant expression. If no diagnostics were produced, then // this is a constant expression. if (Folded && getLangOpts().CPlusPlus11 && Notes.empty()) { if (Result) *Result = EvalResult.Val.getInt(); return E; } // If our only note is the usual "invalid subexpression" note, just point // the caret at its location rather than producing an essentially // redundant note. if (Notes.size() == 1 && Notes[0].second.getDiagID() == diag::note_invalid_subexpr_in_const_expr) { DiagLoc = Notes[0].first; Notes.clear(); } if (!Folded || !AllowFold) { if (!Diagnoser.Suppress) { Diagnoser.diagnoseNotICE(*this, DiagLoc, E->getSourceRange()); for (const PartialDiagnosticAt &Note : Notes) Diag(Note.first, Note.second); } return ExprError(); } Diagnoser.diagnoseFold(*this, DiagLoc, E->getSourceRange()); for (const PartialDiagnosticAt &Note : Notes) Diag(Note.first, Note.second); if (Result) *Result = EvalResult.Val.getInt(); return E; } namespace { // Handle the case where we conclude a expression which we speculatively // considered to be unevaluated is actually evaluated. class TransformToPE : public TreeTransform { typedef TreeTransform BaseTransform; public: TransformToPE(Sema &SemaRef) : BaseTransform(SemaRef) { } // Make sure we redo semantic analysis bool AlwaysRebuild() { return true; } // Make sure we handle LabelStmts correctly. // FIXME: This does the right thing, but maybe we need a more general // fix to TreeTransform? StmtResult TransformLabelStmt(LabelStmt *S) { S->getDecl()->setStmt(nullptr); return BaseTransform::TransformLabelStmt(S); } // We need to special-case DeclRefExprs referring to FieldDecls which // are not part of a member pointer formation; normal TreeTransforming // doesn't catch this case because of the way we represent them in the AST. // FIXME: This is a bit ugly; is it really the best way to handle this // case? // // Error on DeclRefExprs referring to FieldDecls. ExprResult TransformDeclRefExpr(DeclRefExpr *E) { if (isa(E->getDecl()) && !SemaRef.isUnevaluatedContext()) return SemaRef.Diag(E->getLocation(), diag::err_invalid_non_static_member_use) << E->getDecl() << E->getSourceRange(); return BaseTransform::TransformDeclRefExpr(E); } // Exception: filter out member pointer formation ExprResult TransformUnaryOperator(UnaryOperator *E) { if (E->getOpcode() == UO_AddrOf && E->getType()->isMemberPointerType()) return E; return BaseTransform::TransformUnaryOperator(E); } ExprResult TransformLambdaExpr(LambdaExpr *E) { // Lambdas never need to be transformed. return E; } }; } ExprResult Sema::TransformToPotentiallyEvaluated(Expr *E) { assert(isUnevaluatedContext() && "Should only transform unevaluated expressions"); ExprEvalContexts.back().Context = ExprEvalContexts[ExprEvalContexts.size()-2].Context; if (isUnevaluatedContext()) return E; return TransformToPE(*this).TransformExpr(E); } void Sema::PushExpressionEvaluationContext(ExpressionEvaluationContext NewContext, Decl *LambdaContextDecl, bool IsDecltype) { ExprEvalContexts.emplace_back(NewContext, ExprCleanupObjects.size(), Cleanup, LambdaContextDecl, IsDecltype); Cleanup.reset(); if (!MaybeODRUseExprs.empty()) std::swap(MaybeODRUseExprs, ExprEvalContexts.back().SavedMaybeODRUseExprs); } void Sema::PushExpressionEvaluationContext(ExpressionEvaluationContext NewContext, ReuseLambdaContextDecl_t, bool IsDecltype) { Decl *ClosureContextDecl = ExprEvalContexts.back().ManglingContextDecl; PushExpressionEvaluationContext(NewContext, ClosureContextDecl, IsDecltype); } void Sema::PopExpressionEvaluationContext() { ExpressionEvaluationContextRecord& Rec = ExprEvalContexts.back(); unsigned NumTypos = Rec.NumTypos; if (!Rec.Lambdas.empty()) { if (Rec.isUnevaluated() || Rec.isConstantEvaluated()) { unsigned D; if (Rec.isUnevaluated()) { // C++11 [expr.prim.lambda]p2: // A lambda-expression shall not appear in an unevaluated operand // (Clause 5). D = diag::err_lambda_unevaluated_operand; } else { // C++1y [expr.const]p2: // A conditional-expression e is a core constant expression unless the // evaluation of e, following the rules of the abstract machine, would // evaluate [...] a lambda-expression. D = diag::err_lambda_in_constant_expression; } // C++1z allows lambda expressions as core constant expressions. // FIXME: In C++1z, reinstate the restrictions on lambda expressions (CWG // 1607) from appearing within template-arguments and array-bounds that // are part of function-signatures. Be mindful that P0315 (Lambdas in // unevaluated contexts) might lift some of these restrictions in a // future version. if (!Rec.isConstantEvaluated() || !getLangOpts().CPlusPlus17) for (const auto *L : Rec.Lambdas) Diag(L->getLocStart(), D); } else { // Mark the capture expressions odr-used. This was deferred // during lambda expression creation. for (auto *Lambda : Rec.Lambdas) { for (auto *C : Lambda->capture_inits()) MarkDeclarationsReferencedInExpr(C); } } } // When are coming out of an unevaluated context, clear out any // temporaries that we may have created as part of the evaluation of // the expression in that context: they aren't relevant because they // will never be constructed. if (Rec.isUnevaluated() || Rec.isConstantEvaluated()) { ExprCleanupObjects.erase(ExprCleanupObjects.begin() + Rec.NumCleanupObjects, ExprCleanupObjects.end()); Cleanup = Rec.ParentCleanup; CleanupVarDeclMarking(); std::swap(MaybeODRUseExprs, Rec.SavedMaybeODRUseExprs); // Otherwise, merge the contexts together. } else { Cleanup.mergeFrom(Rec.ParentCleanup); MaybeODRUseExprs.insert(Rec.SavedMaybeODRUseExprs.begin(), Rec.SavedMaybeODRUseExprs.end()); } // Pop the current expression evaluation context off the stack. ExprEvalContexts.pop_back(); if (!ExprEvalContexts.empty()) ExprEvalContexts.back().NumTypos += NumTypos; else assert(NumTypos == 0 && "There are outstanding typos after popping the " "last ExpressionEvaluationContextRecord"); } void Sema::DiscardCleanupsInEvaluationContext() { ExprCleanupObjects.erase( ExprCleanupObjects.begin() + ExprEvalContexts.back().NumCleanupObjects, ExprCleanupObjects.end()); Cleanup.reset(); MaybeODRUseExprs.clear(); } ExprResult Sema::HandleExprEvaluationContextForTypeof(Expr *E) { if (!E->getType()->isVariablyModifiedType()) return E; return TransformToPotentiallyEvaluated(E); } /// Are we within a context in which some evaluation could be performed (be it /// constant evaluation or runtime evaluation)? Sadly, this notion is not quite /// captured by C++'s idea of an "unevaluated context". static bool isEvaluatableContext(Sema &SemaRef) { switch (SemaRef.ExprEvalContexts.back().Context) { case Sema::ExpressionEvaluationContext::Unevaluated: case Sema::ExpressionEvaluationContext::UnevaluatedAbstract: case Sema::ExpressionEvaluationContext::DiscardedStatement: // Expressions in this context are never evaluated. return false; case Sema::ExpressionEvaluationContext::UnevaluatedList: case Sema::ExpressionEvaluationContext::ConstantEvaluated: case Sema::ExpressionEvaluationContext::PotentiallyEvaluated: // Expressions in this context could be evaluated. return true; case Sema::ExpressionEvaluationContext::PotentiallyEvaluatedIfUsed: // Referenced declarations will only be used if the construct in the // containing expression is used, at which point we'll be given another // turn to mark them. return false; } llvm_unreachable("Invalid context"); } /// Are we within a context in which references to resolved functions or to /// variables result in odr-use? static bool isOdrUseContext(Sema &SemaRef, bool SkipDependentUses = true) { // An expression in a template is not really an expression until it's been // instantiated, so it doesn't trigger odr-use. if (SkipDependentUses && SemaRef.CurContext->isDependentContext()) return false; switch (SemaRef.ExprEvalContexts.back().Context) { case Sema::ExpressionEvaluationContext::Unevaluated: case Sema::ExpressionEvaluationContext::UnevaluatedList: case Sema::ExpressionEvaluationContext::UnevaluatedAbstract: case Sema::ExpressionEvaluationContext::DiscardedStatement: return false; case Sema::ExpressionEvaluationContext::ConstantEvaluated: case Sema::ExpressionEvaluationContext::PotentiallyEvaluated: return true; case Sema::ExpressionEvaluationContext::PotentiallyEvaluatedIfUsed: return false; } llvm_unreachable("Invalid context"); } static bool isImplicitlyDefinableConstexprFunction(FunctionDecl *Func) { CXXMethodDecl *MD = dyn_cast(Func); return Func->isConstexpr() && (Func->isImplicitlyInstantiable() || (MD && !MD->isUserProvided())); } /// \brief Mark a function referenced, and check whether it is odr-used /// (C++ [basic.def.odr]p2, C99 6.9p3) void Sema::MarkFunctionReferenced(SourceLocation Loc, FunctionDecl *Func, bool MightBeOdrUse) { assert(Func && "No function?"); Func->setReferenced(); // C++11 [basic.def.odr]p3: // A function whose name appears as a potentially-evaluated expression is // odr-used if it is the unique lookup result or the selected member of a // set of overloaded functions [...]. // // We (incorrectly) mark overload resolution as an unevaluated context, so we // can just check that here. bool OdrUse = MightBeOdrUse && isOdrUseContext(*this); // Determine whether we require a function definition to exist, per // C++11 [temp.inst]p3: // Unless a function template specialization has been explicitly // instantiated or explicitly specialized, the function template // specialization is implicitly instantiated when the specialization is // referenced in a context that requires a function definition to exist. // // That is either when this is an odr-use, or when a usage of a constexpr // function occurs within an evaluatable context. bool NeedDefinition = OdrUse || (isEvaluatableContext(*this) && isImplicitlyDefinableConstexprFunction(Func)); // C++14 [temp.expl.spec]p6: // If a template [...] is explicitly specialized then that specialization // shall be declared before the first use of that specialization that would // cause an implicit instantiation to take place, in every translation unit // in which such a use occurs if (NeedDefinition && (Func->getTemplateSpecializationKind() != TSK_Undeclared || Func->getMemberSpecializationInfo())) checkSpecializationVisibility(Loc, Func); // C++14 [except.spec]p17: // An exception-specification is considered to be needed when: // - the function is odr-used or, if it appears in an unevaluated operand, // would be odr-used if the expression were potentially-evaluated; // // Note, we do this even if MightBeOdrUse is false. That indicates that the // function is a pure virtual function we're calling, and in that case the // function was selected by overload resolution and we need to resolve its // exception specification for a different reason. const FunctionProtoType *FPT = Func->getType()->getAs(); if (FPT && isUnresolvedExceptionSpec(FPT->getExceptionSpecType())) ResolveExceptionSpec(Loc, FPT); // If we don't need to mark the function as used, and we don't need to // try to provide a definition, there's nothing more to do. if ((Func->isUsed(/*CheckUsedAttr=*/false) || !OdrUse) && (!NeedDefinition || Func->getBody())) return; // Note that this declaration has been used. if (CXXConstructorDecl *Constructor = dyn_cast(Func)) { Constructor = cast(Constructor->getFirstDecl()); if (Constructor->isDefaulted() && !Constructor->isDeleted()) { if (Constructor->isDefaultConstructor()) { if (Constructor->isTrivial() && !Constructor->hasAttr()) return; DefineImplicitDefaultConstructor(Loc, Constructor); } else if (Constructor->isCopyConstructor()) { DefineImplicitCopyConstructor(Loc, Constructor); } else if (Constructor->isMoveConstructor()) { DefineImplicitMoveConstructor(Loc, Constructor); } } else if (Constructor->getInheritedConstructor()) { DefineInheritingConstructor(Loc, Constructor); } } else if (CXXDestructorDecl *Destructor = dyn_cast(Func)) { Destructor = cast(Destructor->getFirstDecl()); if (Destructor->isDefaulted() && !Destructor->isDeleted()) { if (Destructor->isTrivial() && !Destructor->hasAttr()) return; DefineImplicitDestructor(Loc, Destructor); } if (Destructor->isVirtual() && getLangOpts().AppleKext) MarkVTableUsed(Loc, Destructor->getParent()); } else if (CXXMethodDecl *MethodDecl = dyn_cast(Func)) { if (MethodDecl->isOverloadedOperator() && MethodDecl->getOverloadedOperator() == OO_Equal) { MethodDecl = cast(MethodDecl->getFirstDecl()); if (MethodDecl->isDefaulted() && !MethodDecl->isDeleted()) { if (MethodDecl->isCopyAssignmentOperator()) DefineImplicitCopyAssignment(Loc, MethodDecl); else if (MethodDecl->isMoveAssignmentOperator()) DefineImplicitMoveAssignment(Loc, MethodDecl); } } else if (isa(MethodDecl) && MethodDecl->getParent()->isLambda()) { CXXConversionDecl *Conversion = cast(MethodDecl->getFirstDecl()); if (Conversion->isLambdaToBlockPointerConversion()) DefineImplicitLambdaToBlockPointerConversion(Loc, Conversion); else DefineImplicitLambdaToFunctionPointerConversion(Loc, Conversion); } else if (MethodDecl->isVirtual() && getLangOpts().AppleKext) MarkVTableUsed(Loc, MethodDecl->getParent()); } // Recursive functions should be marked when used from another function. // FIXME: Is this really right? if (CurContext == Func) return; // Implicit instantiation of function templates and member functions of // class templates. if (Func->isImplicitlyInstantiable()) { TemplateSpecializationKind TSK = Func->getTemplateSpecializationKind(); SourceLocation PointOfInstantiation = Func->getPointOfInstantiation(); bool FirstInstantiation = PointOfInstantiation.isInvalid(); if (FirstInstantiation) { PointOfInstantiation = Loc; Func->setTemplateSpecializationKind(TSK, PointOfInstantiation); } else if (TSK != TSK_ImplicitInstantiation) { // Use the point of use as the point of instantiation, instead of the // point of explicit instantiation (which we track as the actual point of // instantiation). This gives better backtraces in diagnostics. PointOfInstantiation = Loc; } if (FirstInstantiation || TSK != TSK_ImplicitInstantiation || Func->isConstexpr()) { if (isa(Func->getDeclContext()) && cast(Func->getDeclContext())->isLocalClass() && CodeSynthesisContexts.size()) PendingLocalImplicitInstantiations.push_back( std::make_pair(Func, PointOfInstantiation)); else if (Func->isConstexpr()) // Do not defer instantiations of constexpr functions, to avoid the // expression evaluator needing to call back into Sema if it sees a // call to such a function. InstantiateFunctionDefinition(PointOfInstantiation, Func); else { Func->setInstantiationIsPending(true); PendingInstantiations.push_back(std::make_pair(Func, PointOfInstantiation)); // Notify the consumer that a function was implicitly instantiated. Consumer.HandleCXXImplicitFunctionInstantiation(Func); } } } else { // Walk redefinitions, as some of them may be instantiable. for (auto i : Func->redecls()) { if (!i->isUsed(false) && i->isImplicitlyInstantiable()) MarkFunctionReferenced(Loc, i, OdrUse); } } if (!OdrUse) return; // Keep track of used but undefined functions. if (!Func->isDefined()) { if (mightHaveNonExternalLinkage(Func)) UndefinedButUsed.insert(std::make_pair(Func->getCanonicalDecl(), Loc)); else if (Func->getMostRecentDecl()->isInlined() && !LangOpts.GNUInline && !Func->getMostRecentDecl()->hasAttr()) UndefinedButUsed.insert(std::make_pair(Func->getCanonicalDecl(), Loc)); else if (isExternalWithNoLinkageType(Func)) UndefinedButUsed.insert(std::make_pair(Func->getCanonicalDecl(), Loc)); } Func->markUsed(Context); } static void diagnoseUncapturableValueReference(Sema &S, SourceLocation loc, ValueDecl *var, DeclContext *DC) { DeclContext *VarDC = var->getDeclContext(); // If the parameter still belongs to the translation unit, then // we're actually just using one parameter in the declaration of // the next. if (isa(var) && isa(VarDC)) return; // For C code, don't diagnose about capture if we're not actually in code // right now; it's impossible to write a non-constant expression outside of // function context, so we'll get other (more useful) diagnostics later. // // For C++, things get a bit more nasty... it would be nice to suppress this // diagnostic for certain cases like using a local variable in an array bound // for a member of a local class, but the correct predicate is not obvious. if (!S.getLangOpts().CPlusPlus && !S.CurContext->isFunctionOrMethod()) return; unsigned ValueKind = isa(var) ? 1 : 0; unsigned ContextKind = 3; // unknown if (isa(VarDC) && cast(VarDC->getParent())->isLambda()) { ContextKind = 2; } else if (isa(VarDC)) { ContextKind = 0; } else if (isa(VarDC)) { ContextKind = 1; } S.Diag(loc, diag::err_reference_to_local_in_enclosing_context) << var << ValueKind << ContextKind << VarDC; S.Diag(var->getLocation(), diag::note_entity_declared_at) << var; // FIXME: Add additional diagnostic info about class etc. which prevents // capture. } static bool isVariableAlreadyCapturedInScopeInfo(CapturingScopeInfo *CSI, VarDecl *Var, bool &SubCapturesAreNested, QualType &CaptureType, QualType &DeclRefType) { // Check whether we've already captured it. if (CSI->CaptureMap.count(Var)) { // If we found a capture, any subcaptures are nested. SubCapturesAreNested = true; // Retrieve the capture type for this variable. CaptureType = CSI->getCapture(Var).getCaptureType(); // Compute the type of an expression that refers to this variable. DeclRefType = CaptureType.getNonReferenceType(); // Similarly to mutable captures in lambda, all the OpenMP captures by copy // are mutable in the sense that user can change their value - they are // private instances of the captured declarations. const CapturingScopeInfo::Capture &Cap = CSI->getCapture(Var); if (Cap.isCopyCapture() && !(isa(CSI) && cast(CSI)->Mutable) && !(isa(CSI) && cast(CSI)->CapRegionKind == CR_OpenMP)) DeclRefType.addConst(); return true; } return false; } // Only block literals, captured statements, and lambda expressions can // capture; other scopes don't work. static DeclContext *getParentOfCapturingContextOrNull(DeclContext *DC, VarDecl *Var, SourceLocation Loc, const bool Diagnose, Sema &S) { if (isa(DC) || isa(DC) || isLambdaCallOperator(DC)) return getLambdaAwareParentOfDeclContext(DC); else if (Var->hasLocalStorage()) { if (Diagnose) diagnoseUncapturableValueReference(S, Loc, Var, DC); } return nullptr; } // Certain capturing entities (lambdas, blocks etc.) are not allowed to capture // certain types of variables (unnamed, variably modified types etc.) // so check for eligibility. static bool isVariableCapturable(CapturingScopeInfo *CSI, VarDecl *Var, SourceLocation Loc, const bool Diagnose, Sema &S) { bool IsBlock = isa(CSI); bool IsLambda = isa(CSI); // Lambdas are not allowed to capture unnamed variables // (e.g. anonymous unions). // FIXME: The C++11 rule don't actually state this explicitly, but I'm // assuming that's the intent. if (IsLambda && !Var->getDeclName()) { if (Diagnose) { S.Diag(Loc, diag::err_lambda_capture_anonymous_var); S.Diag(Var->getLocation(), diag::note_declared_at); } return false; } // Prohibit variably-modified types in blocks; they're difficult to deal with. if (Var->getType()->isVariablyModifiedType() && IsBlock) { if (Diagnose) { S.Diag(Loc, diag::err_ref_vm_type); S.Diag(Var->getLocation(), diag::note_previous_decl) << Var->getDeclName(); } return false; } // Prohibit structs with flexible array members too. // We cannot capture what is in the tail end of the struct. if (const RecordType *VTTy = Var->getType()->getAs()) { if (VTTy->getDecl()->hasFlexibleArrayMember()) { if (Diagnose) { if (IsBlock) S.Diag(Loc, diag::err_ref_flexarray_type); else S.Diag(Loc, diag::err_lambda_capture_flexarray_type) << Var->getDeclName(); S.Diag(Var->getLocation(), diag::note_previous_decl) << Var->getDeclName(); } return false; } } const bool HasBlocksAttr = Var->hasAttr(); // Lambdas and captured statements are not allowed to capture __block // variables; they don't support the expected semantics. if (HasBlocksAttr && (IsLambda || isa(CSI))) { if (Diagnose) { S.Diag(Loc, diag::err_capture_block_variable) << Var->getDeclName() << !IsLambda; S.Diag(Var->getLocation(), diag::note_previous_decl) << Var->getDeclName(); } return false; } // OpenCL v2.0 s6.12.5: Blocks cannot reference/capture other blocks if (S.getLangOpts().OpenCL && IsBlock && Var->getType()->isBlockPointerType()) { if (Diagnose) S.Diag(Loc, diag::err_opencl_block_ref_block); return false; } return true; } // Returns true if the capture by block was successful. static bool captureInBlock(BlockScopeInfo *BSI, VarDecl *Var, SourceLocation Loc, const bool BuildAndDiagnose, QualType &CaptureType, QualType &DeclRefType, const bool Nested, Sema &S) { Expr *CopyExpr = nullptr; bool ByRef = false; // Blocks are not allowed to capture arrays. if (CaptureType->isArrayType()) { if (BuildAndDiagnose) { S.Diag(Loc, diag::err_ref_array_type); S.Diag(Var->getLocation(), diag::note_previous_decl) << Var->getDeclName(); } return false; } // Forbid the block-capture of autoreleasing variables. if (CaptureType.getObjCLifetime() == Qualifiers::OCL_Autoreleasing) { if (BuildAndDiagnose) { S.Diag(Loc, diag::err_arc_autoreleasing_capture) << /*block*/ 0; S.Diag(Var->getLocation(), diag::note_previous_decl) << Var->getDeclName(); } return false; } // Warn about implicitly autoreleasing indirect parameters captured by blocks. if (const auto *PT = CaptureType->getAs()) { // This function finds out whether there is an AttributedType of kind // attr_objc_ownership in Ty. The existence of AttributedType of kind // attr_objc_ownership implies __autoreleasing was explicitly specified // rather than being added implicitly by the compiler. auto IsObjCOwnershipAttributedType = [](QualType Ty) { while (const auto *AttrTy = Ty->getAs()) { if (AttrTy->getAttrKind() == AttributedType::attr_objc_ownership) return true; // Peel off AttributedTypes that are not of kind objc_ownership. Ty = AttrTy->getModifiedType(); } return false; }; QualType PointeeTy = PT->getPointeeType(); if (PointeeTy->getAs() && PointeeTy.getObjCLifetime() == Qualifiers::OCL_Autoreleasing && !IsObjCOwnershipAttributedType(PointeeTy)) { if (BuildAndDiagnose) { SourceLocation VarLoc = Var->getLocation(); S.Diag(Loc, diag::warn_block_capture_autoreleasing); { auto AddAutoreleaseNote = S.Diag(VarLoc, diag::note_declare_parameter_autoreleasing); // Provide a fix-it for the '__autoreleasing' keyword at the // appropriate location in the variable's type. if (const auto *TSI = Var->getTypeSourceInfo()) { PointerTypeLoc PTL = TSI->getTypeLoc().getAsAdjusted(); if (PTL) { SourceLocation Loc = PTL.getPointeeLoc().getEndLoc(); Loc = Lexer::getLocForEndOfToken(Loc, 0, S.getSourceManager(), S.getLangOpts()); if (Loc.isValid()) { StringRef CharAtLoc = Lexer::getSourceText( CharSourceRange::getCharRange(Loc, Loc.getLocWithOffset(1)), S.getSourceManager(), S.getLangOpts()); AddAutoreleaseNote << FixItHint::CreateInsertion( Loc, CharAtLoc.empty() || !isWhitespace(CharAtLoc[0]) ? " __autoreleasing " : " __autoreleasing"); } } } } S.Diag(VarLoc, diag::note_declare_parameter_strong); } } } const bool HasBlocksAttr = Var->hasAttr(); if (HasBlocksAttr || CaptureType->isReferenceType() || (S.getLangOpts().OpenMP && S.IsOpenMPCapturedDecl(Var))) { // Block capture by reference does not change the capture or // declaration reference types. ByRef = true; } else { // Block capture by copy introduces 'const'. CaptureType = CaptureType.getNonReferenceType().withConst(); DeclRefType = CaptureType; if (S.getLangOpts().CPlusPlus && BuildAndDiagnose) { if (const RecordType *Record = DeclRefType->getAs()) { // The capture logic needs the destructor, so make sure we mark it. // Usually this is unnecessary because most local variables have // their destructors marked at declaration time, but parameters are // an exception because it's technically only the call site that // actually requires the destructor. if (isa(Var)) S.FinalizeVarWithDestructor(Var, Record); // Enter a new evaluation context to insulate the copy // full-expression. EnterExpressionEvaluationContext scope( S, Sema::ExpressionEvaluationContext::PotentiallyEvaluated); // According to the blocks spec, the capture of a variable from // the stack requires a const copy constructor. This is not true // of the copy/move done to move a __block variable to the heap. Expr *DeclRef = new (S.Context) DeclRefExpr(Var, Nested, DeclRefType.withConst(), VK_LValue, Loc); ExprResult Result = S.PerformCopyInitialization( InitializedEntity::InitializeBlock(Var->getLocation(), CaptureType, false), Loc, DeclRef); // Build a full-expression copy expression if initialization // succeeded and used a non-trivial constructor. Recover from // errors by pretending that the copy isn't necessary. if (!Result.isInvalid() && !cast(Result.get())->getConstructor() ->isTrivial()) { Result = S.MaybeCreateExprWithCleanups(Result); CopyExpr = Result.get(); } } } } // Actually capture the variable. if (BuildAndDiagnose) BSI->addCapture(Var, HasBlocksAttr, ByRef, Nested, Loc, SourceLocation(), CaptureType, CopyExpr); return true; } /// \brief Capture the given variable in the captured region. static bool captureInCapturedRegion(CapturedRegionScopeInfo *RSI, VarDecl *Var, SourceLocation Loc, const bool BuildAndDiagnose, QualType &CaptureType, QualType &DeclRefType, const bool RefersToCapturedVariable, Sema &S) { // By default, capture variables by reference. bool ByRef = true; // Using an LValue reference type is consistent with Lambdas (see below). if (S.getLangOpts().OpenMP && RSI->CapRegionKind == CR_OpenMP) { if (S.IsOpenMPCapturedDecl(Var)) { bool HasConst = DeclRefType.isConstQualified(); DeclRefType = DeclRefType.getUnqualifiedType(); // Don't lose diagnostics about assignments to const. if (HasConst) DeclRefType.addConst(); } ByRef = S.IsOpenMPCapturedByRef(Var, RSI->OpenMPLevel); } if (ByRef) CaptureType = S.Context.getLValueReferenceType(DeclRefType); else CaptureType = DeclRefType; Expr *CopyExpr = nullptr; if (BuildAndDiagnose) { // The current implementation assumes that all variables are captured // by references. Since there is no capture by copy, no expression // evaluation will be needed. RecordDecl *RD = RSI->TheRecordDecl; FieldDecl *Field = FieldDecl::Create(S.Context, RD, Loc, Loc, nullptr, CaptureType, S.Context.getTrivialTypeSourceInfo(CaptureType, Loc), nullptr, false, ICIS_NoInit); Field->setImplicit(true); Field->setAccess(AS_private); RD->addDecl(Field); if (S.getLangOpts().OpenMP && RSI->CapRegionKind == CR_OpenMP) S.setOpenMPCaptureKind(Field, Var, RSI->OpenMPLevel); CopyExpr = new (S.Context) DeclRefExpr(Var, RefersToCapturedVariable, DeclRefType, VK_LValue, Loc); Var->setReferenced(true); Var->markUsed(S.Context); } // Actually capture the variable. if (BuildAndDiagnose) RSI->addCapture(Var, /*isBlock*/false, ByRef, RefersToCapturedVariable, Loc, SourceLocation(), CaptureType, CopyExpr); return true; } /// \brief Create a field within the lambda class for the variable /// being captured. static void addAsFieldToClosureType(Sema &S, LambdaScopeInfo *LSI, QualType FieldType, QualType DeclRefType, SourceLocation Loc, bool RefersToCapturedVariable) { CXXRecordDecl *Lambda = LSI->Lambda; // Build the non-static data member. FieldDecl *Field = FieldDecl::Create(S.Context, Lambda, Loc, Loc, nullptr, FieldType, S.Context.getTrivialTypeSourceInfo(FieldType, Loc), nullptr, false, ICIS_NoInit); Field->setImplicit(true); Field->setAccess(AS_private); Lambda->addDecl(Field); } /// \brief Capture the given variable in the lambda. static bool captureInLambda(LambdaScopeInfo *LSI, VarDecl *Var, SourceLocation Loc, const bool BuildAndDiagnose, QualType &CaptureType, QualType &DeclRefType, const bool RefersToCapturedVariable, const Sema::TryCaptureKind Kind, SourceLocation EllipsisLoc, const bool IsTopScope, Sema &S) { // Determine whether we are capturing by reference or by value. bool ByRef = false; if (IsTopScope && Kind != Sema::TryCapture_Implicit) { ByRef = (Kind == Sema::TryCapture_ExplicitByRef); } else { ByRef = (LSI->ImpCaptureStyle == LambdaScopeInfo::ImpCap_LambdaByref); } // Compute the type of the field that will capture this variable. if (ByRef) { // C++11 [expr.prim.lambda]p15: // An entity is captured by reference if it is implicitly or // explicitly captured but not captured by copy. It is // unspecified whether additional unnamed non-static data // members are declared in the closure type for entities // captured by reference. // // FIXME: It is not clear whether we want to build an lvalue reference // to the DeclRefType or to CaptureType.getNonReferenceType(). GCC appears // to do the former, while EDG does the latter. Core issue 1249 will // clarify, but for now we follow GCC because it's a more permissive and // easily defensible position. CaptureType = S.Context.getLValueReferenceType(DeclRefType); } else { // C++11 [expr.prim.lambda]p14: // For each entity captured by copy, an unnamed non-static // data member is declared in the closure type. The // declaration order of these members is unspecified. The type // of such a data member is the type of the corresponding // captured entity if the entity is not a reference to an // object, or the referenced type otherwise. [Note: If the // captured entity is a reference to a function, the // corresponding data member is also a reference to a // function. - end note ] if (const ReferenceType *RefType = CaptureType->getAs()){ if (!RefType->getPointeeType()->isFunctionType()) CaptureType = RefType->getPointeeType(); } // Forbid the lambda copy-capture of autoreleasing variables. if (CaptureType.getObjCLifetime() == Qualifiers::OCL_Autoreleasing) { if (BuildAndDiagnose) { S.Diag(Loc, diag::err_arc_autoreleasing_capture) << /*lambda*/ 1; S.Diag(Var->getLocation(), diag::note_previous_decl) << Var->getDeclName(); } return false; } // Make sure that by-copy captures are of a complete and non-abstract type. if (BuildAndDiagnose) { if (!CaptureType->isDependentType() && S.RequireCompleteType(Loc, CaptureType, diag::err_capture_of_incomplete_type, Var->getDeclName())) return false; if (S.RequireNonAbstractType(Loc, CaptureType, diag::err_capture_of_abstract_type)) return false; } } // Capture this variable in the lambda. if (BuildAndDiagnose) addAsFieldToClosureType(S, LSI, CaptureType, DeclRefType, Loc, RefersToCapturedVariable); // Compute the type of a reference to this captured variable. if (ByRef) DeclRefType = CaptureType.getNonReferenceType(); else { // C++ [expr.prim.lambda]p5: // The closure type for a lambda-expression has a public inline // function call operator [...]. This function call operator is // declared const (9.3.1) if and only if the lambda-expression's // parameter-declaration-clause is not followed by mutable. DeclRefType = CaptureType.getNonReferenceType(); if (!LSI->Mutable && !CaptureType->isReferenceType()) DeclRefType.addConst(); } // Add the capture. if (BuildAndDiagnose) LSI->addCapture(Var, /*IsBlock=*/false, ByRef, RefersToCapturedVariable, Loc, EllipsisLoc, CaptureType, /*CopyExpr=*/nullptr); return true; } bool Sema::tryCaptureVariable( VarDecl *Var, SourceLocation ExprLoc, TryCaptureKind Kind, SourceLocation EllipsisLoc, bool BuildAndDiagnose, QualType &CaptureType, QualType &DeclRefType, const unsigned *const FunctionScopeIndexToStopAt) { // An init-capture is notionally from the context surrounding its // declaration, but its parent DC is the lambda class. DeclContext *VarDC = Var->getDeclContext(); if (Var->isInitCapture()) VarDC = VarDC->getParent(); DeclContext *DC = CurContext; const unsigned MaxFunctionScopesIndex = FunctionScopeIndexToStopAt ? *FunctionScopeIndexToStopAt : FunctionScopes.size() - 1; // We need to sync up the Declaration Context with the // FunctionScopeIndexToStopAt if (FunctionScopeIndexToStopAt) { unsigned FSIndex = FunctionScopes.size() - 1; while (FSIndex != MaxFunctionScopesIndex) { DC = getLambdaAwareParentOfDeclContext(DC); --FSIndex; } } // If the variable is declared in the current context, there is no need to // capture it. if (VarDC == DC) return true; // Capture global variables if it is required to use private copy of this // variable. bool IsGlobal = !Var->hasLocalStorage(); if (IsGlobal && !(LangOpts.OpenMP && IsOpenMPCapturedDecl(Var))) return true; Var = Var->getCanonicalDecl(); // Walk up the stack to determine whether we can capture the variable, // performing the "simple" checks that don't depend on type. We stop when // we've either hit the declared scope of the variable or find an existing // capture of that variable. We start from the innermost capturing-entity // (the DC) and ensure that all intervening capturing-entities // (blocks/lambdas etc.) between the innermost capturer and the variable`s // declcontext can either capture the variable or have already captured // the variable. CaptureType = Var->getType(); DeclRefType = CaptureType.getNonReferenceType(); bool Nested = false; bool Explicit = (Kind != TryCapture_Implicit); unsigned FunctionScopesIndex = MaxFunctionScopesIndex; do { // Only block literals, captured statements, and lambda expressions can // capture; other scopes don't work. DeclContext *ParentDC = getParentOfCapturingContextOrNull(DC, Var, ExprLoc, BuildAndDiagnose, *this); // We need to check for the parent *first* because, if we *have* // private-captured a global variable, we need to recursively capture it in // intermediate blocks, lambdas, etc. if (!ParentDC) { if (IsGlobal) { FunctionScopesIndex = MaxFunctionScopesIndex - 1; break; } return true; } FunctionScopeInfo *FSI = FunctionScopes[FunctionScopesIndex]; CapturingScopeInfo *CSI = cast(FSI); // Check whether we've already captured it. if (isVariableAlreadyCapturedInScopeInfo(CSI, Var, Nested, CaptureType, DeclRefType)) { CSI->getCapture(Var).markUsed(BuildAndDiagnose); break; } // If we are instantiating a generic lambda call operator body, // we do not want to capture new variables. What was captured // during either a lambdas transformation or initial parsing // should be used. if (isGenericLambdaCallOperatorSpecialization(DC)) { if (BuildAndDiagnose) { LambdaScopeInfo *LSI = cast(CSI); if (LSI->ImpCaptureStyle == CapturingScopeInfo::ImpCap_None) { Diag(ExprLoc, diag::err_lambda_impcap) << Var->getDeclName(); Diag(Var->getLocation(), diag::note_previous_decl) << Var->getDeclName(); Diag(LSI->Lambda->getLocStart(), diag::note_lambda_decl); } else diagnoseUncapturableValueReference(*this, ExprLoc, Var, DC); } return true; } // Certain capturing entities (lambdas, blocks etc.) are not allowed to capture // certain types of variables (unnamed, variably modified types etc.) // so check for eligibility. if (!isVariableCapturable(CSI, Var, ExprLoc, BuildAndDiagnose, *this)) return true; // Try to capture variable-length arrays types. if (Var->getType()->isVariablyModifiedType()) { // We're going to walk down into the type and look for VLA // expressions. QualType QTy = Var->getType(); if (ParmVarDecl *PVD = dyn_cast_or_null(Var)) QTy = PVD->getOriginalType(); captureVariablyModifiedType(Context, QTy, CSI); } if (getLangOpts().OpenMP) { if (auto *RSI = dyn_cast(CSI)) { // OpenMP private variables should not be captured in outer scope, so // just break here. Similarly, global variables that are captured in a // target region should not be captured outside the scope of the region. if (RSI->CapRegionKind == CR_OpenMP) { bool IsOpenMPPrivateDecl = isOpenMPPrivateDecl(Var, RSI->OpenMPLevel); auto IsTargetCap = !IsOpenMPPrivateDecl && isOpenMPTargetCapturedDecl(Var, RSI->OpenMPLevel); // When we detect target captures we are looking from inside the // target region, therefore we need to propagate the capture from the // enclosing region. Therefore, the capture is not initially nested. if (IsTargetCap) adjustOpenMPTargetScopeIndex(FunctionScopesIndex, RSI->OpenMPLevel); if (IsTargetCap || IsOpenMPPrivateDecl) { Nested = !IsTargetCap; DeclRefType = DeclRefType.getUnqualifiedType(); CaptureType = Context.getLValueReferenceType(DeclRefType); break; } } } } if (CSI->ImpCaptureStyle == CapturingScopeInfo::ImpCap_None && !Explicit) { // No capture-default, and this is not an explicit capture // so cannot capture this variable. if (BuildAndDiagnose) { Diag(ExprLoc, diag::err_lambda_impcap) << Var->getDeclName(); Diag(Var->getLocation(), diag::note_previous_decl) << Var->getDeclName(); if (cast(CSI)->Lambda) Diag(cast(CSI)->Lambda->getLocStart(), diag::note_lambda_decl); // FIXME: If we error out because an outer lambda can not implicitly // capture a variable that an inner lambda explicitly captures, we // should have the inner lambda do the explicit capture - because // it makes for cleaner diagnostics later. This would purely be done // so that the diagnostic does not misleadingly claim that a variable // can not be captured by a lambda implicitly even though it is captured // explicitly. Suggestion: // - create const bool VariableCaptureWasInitiallyExplicit = Explicit // at the function head // - cache the StartingDeclContext - this must be a lambda // - captureInLambda in the innermost lambda the variable. } return true; } FunctionScopesIndex--; DC = ParentDC; Explicit = false; } while (!VarDC->Equals(DC)); // Walk back down the scope stack, (e.g. from outer lambda to inner lambda) // computing the type of the capture at each step, checking type-specific // requirements, and adding captures if requested. // If the variable had already been captured previously, we start capturing // at the lambda nested within that one. for (unsigned I = ++FunctionScopesIndex, N = MaxFunctionScopesIndex + 1; I != N; ++I) { CapturingScopeInfo *CSI = cast(FunctionScopes[I]); if (BlockScopeInfo *BSI = dyn_cast(CSI)) { if (!captureInBlock(BSI, Var, ExprLoc, BuildAndDiagnose, CaptureType, DeclRefType, Nested, *this)) return true; Nested = true; } else if (CapturedRegionScopeInfo *RSI = dyn_cast(CSI)) { if (!captureInCapturedRegion(RSI, Var, ExprLoc, BuildAndDiagnose, CaptureType, DeclRefType, Nested, *this)) return true; Nested = true; } else { LambdaScopeInfo *LSI = cast(CSI); if (!captureInLambda(LSI, Var, ExprLoc, BuildAndDiagnose, CaptureType, DeclRefType, Nested, Kind, EllipsisLoc, /*IsTopScope*/I == N - 1, *this)) return true; Nested = true; } } return false; } bool Sema::tryCaptureVariable(VarDecl *Var, SourceLocation Loc, TryCaptureKind Kind, SourceLocation EllipsisLoc) { QualType CaptureType; QualType DeclRefType; return tryCaptureVariable(Var, Loc, Kind, EllipsisLoc, /*BuildAndDiagnose=*/true, CaptureType, DeclRefType, nullptr); } bool Sema::NeedToCaptureVariable(VarDecl *Var, SourceLocation Loc) { QualType CaptureType; QualType DeclRefType; return !tryCaptureVariable(Var, Loc, TryCapture_Implicit, SourceLocation(), /*BuildAndDiagnose=*/false, CaptureType, DeclRefType, nullptr); } QualType Sema::getCapturedDeclRefType(VarDecl *Var, SourceLocation Loc) { QualType CaptureType; QualType DeclRefType; // Determine whether we can capture this variable. if (tryCaptureVariable(Var, Loc, TryCapture_Implicit, SourceLocation(), /*BuildAndDiagnose=*/false, CaptureType, DeclRefType, nullptr)) return QualType(); return DeclRefType; } // If either the type of the variable or the initializer is dependent, // return false. Otherwise, determine whether the variable is a constant // expression. Use this if you need to know if a variable that might or // might not be dependent is truly a constant expression. static inline bool IsVariableNonDependentAndAConstantExpression(VarDecl *Var, ASTContext &Context) { if (Var->getType()->isDependentType()) return false; const VarDecl *DefVD = nullptr; Var->getAnyInitializer(DefVD); if (!DefVD) return false; EvaluatedStmt *Eval = DefVD->ensureEvaluatedStmt(); Expr *Init = cast(Eval->Value); if (Init->isValueDependent()) return false; return IsVariableAConstantExpression(Var, Context); } void Sema::UpdateMarkingForLValueToRValue(Expr *E) { // Per C++11 [basic.def.odr], a variable is odr-used "unless it is // an object that satisfies the requirements for appearing in a // constant expression (5.19) and the lvalue-to-rvalue conversion (4.1) // is immediately applied." This function handles the lvalue-to-rvalue // conversion part. MaybeODRUseExprs.erase(E->IgnoreParens()); // If we are in a lambda, check if this DeclRefExpr or MemberExpr refers // to a variable that is a constant expression, and if so, identify it as // a reference to a variable that does not involve an odr-use of that // variable. if (LambdaScopeInfo *LSI = getCurLambda()) { Expr *SansParensExpr = E->IgnoreParens(); VarDecl *Var = nullptr; if (DeclRefExpr *DRE = dyn_cast(SansParensExpr)) Var = dyn_cast(DRE->getFoundDecl()); else if (MemberExpr *ME = dyn_cast(SansParensExpr)) Var = dyn_cast(ME->getMemberDecl()); if (Var && IsVariableNonDependentAndAConstantExpression(Var, Context)) LSI->markVariableExprAsNonODRUsed(SansParensExpr); } } ExprResult Sema::ActOnConstantExpression(ExprResult Res) { Res = CorrectDelayedTyposInExpr(Res); if (!Res.isUsable()) return Res; // If a constant-expression is a reference to a variable where we delay // deciding whether it is an odr-use, just assume we will apply the // lvalue-to-rvalue conversion. In the one case where this doesn't happen // (a non-type template argument), we have special handling anyway. UpdateMarkingForLValueToRValue(Res.get()); return Res; } void Sema::CleanupVarDeclMarking() { for (Expr *E : MaybeODRUseExprs) { VarDecl *Var; SourceLocation Loc; if (DeclRefExpr *DRE = dyn_cast(E)) { Var = cast(DRE->getDecl()); Loc = DRE->getLocation(); } else if (MemberExpr *ME = dyn_cast(E)) { Var = cast(ME->getMemberDecl()); Loc = ME->getMemberLoc(); } else { llvm_unreachable("Unexpected expression"); } MarkVarDeclODRUsed(Var, Loc, *this, /*MaxFunctionScopeIndex Pointer*/ nullptr); } MaybeODRUseExprs.clear(); } static void DoMarkVarDeclReferenced(Sema &SemaRef, SourceLocation Loc, VarDecl *Var, Expr *E) { assert((!E || isa(E) || isa(E)) && "Invalid Expr argument to DoMarkVarDeclReferenced"); Var->setReferenced(); TemplateSpecializationKind TSK = Var->getTemplateSpecializationKind(); bool OdrUseContext = isOdrUseContext(SemaRef); bool UsableInConstantExpr = Var->isUsableInConstantExpressions(SemaRef.Context); bool NeedDefinition = OdrUseContext || (isEvaluatableContext(SemaRef) && UsableInConstantExpr); VarTemplateSpecializationDecl *VarSpec = dyn_cast(Var); assert(!isa(Var) && "Can't instantiate a partial template specialization."); // If this might be a member specialization of a static data member, check // the specialization is visible. We already did the checks for variable // template specializations when we created them. if (NeedDefinition && TSK != TSK_Undeclared && !isa(Var)) SemaRef.checkSpecializationVisibility(Loc, Var); // Perform implicit instantiation of static data members, static data member // templates of class templates, and variable template specializations. Delay // instantiations of variable templates, except for those that could be used // in a constant expression. if (NeedDefinition && isTemplateInstantiation(TSK)) { // Per C++17 [temp.explicit]p10, we may instantiate despite an explicit // instantiation declaration if a variable is usable in a constant // expression (among other cases). bool TryInstantiating = TSK == TSK_ImplicitInstantiation || (TSK == TSK_ExplicitInstantiationDeclaration && UsableInConstantExpr); if (TryInstantiating) { SourceLocation PointOfInstantiation = Var->getPointOfInstantiation(); bool FirstInstantiation = PointOfInstantiation.isInvalid(); if (FirstInstantiation) { PointOfInstantiation = Loc; Var->setTemplateSpecializationKind(TSK, PointOfInstantiation); } bool InstantiationDependent = false; bool IsNonDependent = VarSpec ? !TemplateSpecializationType::anyDependentTemplateArguments( VarSpec->getTemplateArgsInfo(), InstantiationDependent) : true; // Do not instantiate specializations that are still type-dependent. if (IsNonDependent) { if (UsableInConstantExpr) { // Do not defer instantiations of variables that could be used in a // constant expression. SemaRef.InstantiateVariableDefinition(PointOfInstantiation, Var); } else if (FirstInstantiation || isa(Var)) { // FIXME: For a specialization of a variable template, we don't // distinguish between "declaration and type implicitly instantiated" // and "implicit instantiation of definition requested", so we have // no direct way to avoid enqueueing the pending instantiation // multiple times. SemaRef.PendingInstantiations .push_back(std::make_pair(Var, PointOfInstantiation)); } } } } // Per C++11 [basic.def.odr], a variable is odr-used "unless it satisfies // the requirements for appearing in a constant expression (5.19) and, if // it is an object, the lvalue-to-rvalue conversion (4.1) // is immediately applied." We check the first part here, and // Sema::UpdateMarkingForLValueToRValue deals with the second part. // Note that we use the C++11 definition everywhere because nothing in // C++03 depends on whether we get the C++03 version correct. The second // part does not apply to references, since they are not objects. if (OdrUseContext && E && IsVariableAConstantExpression(Var, SemaRef.Context)) { // A reference initialized by a constant expression can never be // odr-used, so simply ignore it. if (!Var->getType()->isReferenceType() || (SemaRef.LangOpts.OpenMP && SemaRef.IsOpenMPCapturedDecl(Var))) SemaRef.MaybeODRUseExprs.insert(E); } else if (OdrUseContext) { MarkVarDeclODRUsed(Var, Loc, SemaRef, /*MaxFunctionScopeIndex ptr*/ nullptr); } else if (isOdrUseContext(SemaRef, /*SkipDependentUses*/false)) { // If this is a dependent context, we don't need to mark variables as // odr-used, but we may still need to track them for lambda capture. // FIXME: Do we also need to do this inside dependent typeid expressions // (which are modeled as unevaluated at this point)? const bool RefersToEnclosingScope = (SemaRef.CurContext != Var->getDeclContext() && Var->getDeclContext()->isFunctionOrMethod() && Var->hasLocalStorage()); if (RefersToEnclosingScope) { LambdaScopeInfo *const LSI = SemaRef.getCurLambda(/*IgnoreNonLambdaCapturingScope=*/true); - if (LSI && !LSI->CallOperator->Encloses(Var->getDeclContext())) { + if (LSI && (!LSI->CallOperator || + !LSI->CallOperator->Encloses(Var->getDeclContext()))) { // If a variable could potentially be odr-used, defer marking it so // until we finish analyzing the full expression for any // lvalue-to-rvalue // or discarded value conversions that would obviate odr-use. // Add it to the list of potential captures that will be analyzed // later (ActOnFinishFullExpr) for eventual capture and odr-use marking // unless the variable is a reference that was initialized by a constant // expression (this will never need to be captured or odr-used). assert(E && "Capture variable should be used in an expression."); if (!Var->getType()->isReferenceType() || !IsVariableNonDependentAndAConstantExpression(Var, SemaRef.Context)) LSI->addPotentialCapture(E->IgnoreParens()); } } } } /// \brief Mark a variable referenced, and check whether it is odr-used /// (C++ [basic.def.odr]p2, C99 6.9p3). Note that this should not be /// used directly for normal expressions referring to VarDecl. void Sema::MarkVariableReferenced(SourceLocation Loc, VarDecl *Var) { DoMarkVarDeclReferenced(*this, Loc, Var, nullptr); } static void MarkExprReferenced(Sema &SemaRef, SourceLocation Loc, Decl *D, Expr *E, bool MightBeOdrUse) { if (SemaRef.isInOpenMPDeclareTargetContext()) SemaRef.checkDeclIsAllowedInOpenMPTarget(E, D); if (VarDecl *Var = dyn_cast(D)) { DoMarkVarDeclReferenced(SemaRef, Loc, Var, E); return; } SemaRef.MarkAnyDeclReferenced(Loc, D, MightBeOdrUse); // If this is a call to a method via a cast, also mark the method in the // derived class used in case codegen can devirtualize the call. const MemberExpr *ME = dyn_cast(E); if (!ME) return; CXXMethodDecl *MD = dyn_cast(ME->getMemberDecl()); if (!MD) return; // Only attempt to devirtualize if this is truly a virtual call. bool IsVirtualCall = MD->isVirtual() && ME->performsVirtualDispatch(SemaRef.getLangOpts()); if (!IsVirtualCall) return; // If it's possible to devirtualize the call, mark the called function // referenced. CXXMethodDecl *DM = MD->getDevirtualizedMethod( ME->getBase(), SemaRef.getLangOpts().AppleKext); if (DM) SemaRef.MarkAnyDeclReferenced(Loc, DM, MightBeOdrUse); } /// \brief Perform reference-marking and odr-use handling for a DeclRefExpr. void Sema::MarkDeclRefReferenced(DeclRefExpr *E, const Expr *Base) { // TODO: update this with DR# once a defect report is filed. // C++11 defect. The address of a pure member should not be an ODR use, even // if it's a qualified reference. bool OdrUse = true; if (const CXXMethodDecl *Method = dyn_cast(E->getDecl())) if (Method->isVirtual() && !Method->getDevirtualizedMethod(Base, getLangOpts().AppleKext)) OdrUse = false; MarkExprReferenced(*this, E->getLocation(), E->getDecl(), E, OdrUse); } /// \brief Perform reference-marking and odr-use handling for a MemberExpr. void Sema::MarkMemberReferenced(MemberExpr *E) { // C++11 [basic.def.odr]p2: // A non-overloaded function whose name appears as a potentially-evaluated // expression or a member of a set of candidate functions, if selected by // overload resolution when referred to from a potentially-evaluated // expression, is odr-used, unless it is a pure virtual function and its // name is not explicitly qualified. bool MightBeOdrUse = true; if (E->performsVirtualDispatch(getLangOpts())) { if (CXXMethodDecl *Method = dyn_cast(E->getMemberDecl())) if (Method->isPure()) MightBeOdrUse = false; } SourceLocation Loc = E->getMemberLoc().isValid() ? E->getMemberLoc() : E->getLocStart(); MarkExprReferenced(*this, Loc, E->getMemberDecl(), E, MightBeOdrUse); } /// \brief Perform marking for a reference to an arbitrary declaration. It /// marks the declaration referenced, and performs odr-use checking for /// functions and variables. This method should not be used when building a /// normal expression which refers to a variable. void Sema::MarkAnyDeclReferenced(SourceLocation Loc, Decl *D, bool MightBeOdrUse) { if (MightBeOdrUse) { if (auto *VD = dyn_cast(D)) { MarkVariableReferenced(Loc, VD); return; } } if (auto *FD = dyn_cast(D)) { MarkFunctionReferenced(Loc, FD, MightBeOdrUse); return; } D->setReferenced(); } namespace { // Mark all of the declarations used by a type as referenced. // FIXME: Not fully implemented yet! We need to have a better understanding // of when we're entering a context we should not recurse into. // FIXME: This is and EvaluatedExprMarker are more-or-less equivalent to // TreeTransforms rebuilding the type in a new context. Rather than // duplicating the TreeTransform logic, we should consider reusing it here. // Currently that causes problems when rebuilding LambdaExprs. class MarkReferencedDecls : public RecursiveASTVisitor { Sema &S; SourceLocation Loc; public: typedef RecursiveASTVisitor Inherited; MarkReferencedDecls(Sema &S, SourceLocation Loc) : S(S), Loc(Loc) { } bool TraverseTemplateArgument(const TemplateArgument &Arg); }; } bool MarkReferencedDecls::TraverseTemplateArgument( const TemplateArgument &Arg) { { // A non-type template argument is a constant-evaluated context. EnterExpressionEvaluationContext Evaluated( S, Sema::ExpressionEvaluationContext::ConstantEvaluated); if (Arg.getKind() == TemplateArgument::Declaration) { if (Decl *D = Arg.getAsDecl()) S.MarkAnyDeclReferenced(Loc, D, true); } else if (Arg.getKind() == TemplateArgument::Expression) { S.MarkDeclarationsReferencedInExpr(Arg.getAsExpr(), false); } } return Inherited::TraverseTemplateArgument(Arg); } void Sema::MarkDeclarationsReferencedInType(SourceLocation Loc, QualType T) { MarkReferencedDecls Marker(*this, Loc); Marker.TraverseType(T); } namespace { /// \brief Helper class that marks all of the declarations referenced by /// potentially-evaluated subexpressions as "referenced". class EvaluatedExprMarker : public EvaluatedExprVisitor { Sema &S; bool SkipLocalVariables; public: typedef EvaluatedExprVisitor Inherited; EvaluatedExprMarker(Sema &S, bool SkipLocalVariables) : Inherited(S.Context), S(S), SkipLocalVariables(SkipLocalVariables) { } void VisitDeclRefExpr(DeclRefExpr *E) { // If we were asked not to visit local variables, don't. if (SkipLocalVariables) { if (VarDecl *VD = dyn_cast(E->getDecl())) if (VD->hasLocalStorage()) return; } S.MarkDeclRefReferenced(E); } void VisitMemberExpr(MemberExpr *E) { S.MarkMemberReferenced(E); Inherited::VisitMemberExpr(E); } void VisitCXXBindTemporaryExpr(CXXBindTemporaryExpr *E) { S.MarkFunctionReferenced(E->getLocStart(), const_cast(E->getTemporary()->getDestructor())); Visit(E->getSubExpr()); } void VisitCXXNewExpr(CXXNewExpr *E) { if (E->getOperatorNew()) S.MarkFunctionReferenced(E->getLocStart(), E->getOperatorNew()); if (E->getOperatorDelete()) S.MarkFunctionReferenced(E->getLocStart(), E->getOperatorDelete()); Inherited::VisitCXXNewExpr(E); } void VisitCXXDeleteExpr(CXXDeleteExpr *E) { if (E->getOperatorDelete()) S.MarkFunctionReferenced(E->getLocStart(), E->getOperatorDelete()); QualType Destroyed = S.Context.getBaseElementType(E->getDestroyedType()); if (const RecordType *DestroyedRec = Destroyed->getAs()) { CXXRecordDecl *Record = cast(DestroyedRec->getDecl()); S.MarkFunctionReferenced(E->getLocStart(), S.LookupDestructor(Record)); } Inherited::VisitCXXDeleteExpr(E); } void VisitCXXConstructExpr(CXXConstructExpr *E) { S.MarkFunctionReferenced(E->getLocStart(), E->getConstructor()); Inherited::VisitCXXConstructExpr(E); } void VisitCXXDefaultArgExpr(CXXDefaultArgExpr *E) { Visit(E->getExpr()); } void VisitImplicitCastExpr(ImplicitCastExpr *E) { Inherited::VisitImplicitCastExpr(E); if (E->getCastKind() == CK_LValueToRValue) S.UpdateMarkingForLValueToRValue(E->getSubExpr()); } }; } /// \brief Mark any declarations that appear within this expression or any /// potentially-evaluated subexpressions as "referenced". /// /// \param SkipLocalVariables If true, don't mark local variables as /// 'referenced'. void Sema::MarkDeclarationsReferencedInExpr(Expr *E, bool SkipLocalVariables) { EvaluatedExprMarker(*this, SkipLocalVariables).Visit(E); } /// \brief Emit a diagnostic that describes an effect on the run-time behavior /// of the program being compiled. /// /// This routine emits the given diagnostic when the code currently being /// type-checked is "potentially evaluated", meaning that there is a /// possibility that the code will actually be executable. Code in sizeof() /// expressions, code used only during overload resolution, etc., are not /// potentially evaluated. This routine will suppress such diagnostics or, /// in the absolutely nutty case of potentially potentially evaluated /// expressions (C++ typeid), queue the diagnostic to potentially emit it /// later. /// /// This routine should be used for all diagnostics that describe the run-time /// behavior of a program, such as passing a non-POD value through an ellipsis. /// Failure to do so will likely result in spurious diagnostics or failures /// during overload resolution or within sizeof/alignof/typeof/typeid. bool Sema::DiagRuntimeBehavior(SourceLocation Loc, const Stmt *Statement, const PartialDiagnostic &PD) { switch (ExprEvalContexts.back().Context) { case ExpressionEvaluationContext::Unevaluated: case ExpressionEvaluationContext::UnevaluatedList: case ExpressionEvaluationContext::UnevaluatedAbstract: case ExpressionEvaluationContext::DiscardedStatement: // The argument will never be evaluated, so don't complain. break; case ExpressionEvaluationContext::ConstantEvaluated: // Relevant diagnostics should be produced by constant evaluation. break; case ExpressionEvaluationContext::PotentiallyEvaluated: case ExpressionEvaluationContext::PotentiallyEvaluatedIfUsed: if (Statement && getCurFunctionOrMethodDecl()) { FunctionScopes.back()->PossiblyUnreachableDiags. push_back(sema::PossiblyUnreachableDiag(PD, Loc, Statement)); return true; } // The initializer of a constexpr variable or of the first declaration of a // static data member is not syntactically a constant evaluated constant, // but nonetheless is always required to be a constant expression, so we // can skip diagnosing. // FIXME: Using the mangling context here is a hack. if (auto *VD = dyn_cast_or_null( ExprEvalContexts.back().ManglingContextDecl)) { if (VD->isConstexpr() || (VD->isStaticDataMember() && VD->isFirstDecl() && !VD->isInline())) break; // FIXME: For any other kind of variable, we should build a CFG for its // initializer and check whether the context in question is reachable. } Diag(Loc, PD); return true; } return false; } bool Sema::CheckCallReturnType(QualType ReturnType, SourceLocation Loc, CallExpr *CE, FunctionDecl *FD) { if (ReturnType->isVoidType() || !ReturnType->isIncompleteType()) return false; // If we're inside a decltype's expression, don't check for a valid return // type or construct temporaries until we know whether this is the last call. if (ExprEvalContexts.back().IsDecltype) { ExprEvalContexts.back().DelayedDecltypeCalls.push_back(CE); return false; } class CallReturnIncompleteDiagnoser : public TypeDiagnoser { FunctionDecl *FD; CallExpr *CE; public: CallReturnIncompleteDiagnoser(FunctionDecl *FD, CallExpr *CE) : FD(FD), CE(CE) { } void diagnose(Sema &S, SourceLocation Loc, QualType T) override { if (!FD) { S.Diag(Loc, diag::err_call_incomplete_return) << T << CE->getSourceRange(); return; } S.Diag(Loc, diag::err_call_function_incomplete_return) << CE->getSourceRange() << FD->getDeclName() << T; S.Diag(FD->getLocation(), diag::note_entity_declared_at) << FD->getDeclName(); } } Diagnoser(FD, CE); if (RequireCompleteType(Loc, ReturnType, Diagnoser)) return true; return false; } // Diagnose the s/=/==/ and s/\|=/!=/ typos. Note that adding parentheses // will prevent this condition from triggering, which is what we want. void Sema::DiagnoseAssignmentAsCondition(Expr *E) { SourceLocation Loc; unsigned diagnostic = diag::warn_condition_is_assignment; bool IsOrAssign = false; if (BinaryOperator *Op = dyn_cast(E)) { if (Op->getOpcode() != BO_Assign && Op->getOpcode() != BO_OrAssign) return; IsOrAssign = Op->getOpcode() == BO_OrAssign; // Greylist some idioms by putting them into a warning subcategory. if (ObjCMessageExpr *ME = dyn_cast(Op->getRHS()->IgnoreParenCasts())) { Selector Sel = ME->getSelector(); // self = [ init...] if (isSelfExpr(Op->getLHS()) && ME->getMethodFamily() == OMF_init) diagnostic = diag::warn_condition_is_idiomatic_assignment; // = [ nextObject] else if (Sel.isUnarySelector() && Sel.getNameForSlot(0) == "nextObject") diagnostic = diag::warn_condition_is_idiomatic_assignment; } Loc = Op->getOperatorLoc(); } else if (CXXOperatorCallExpr *Op = dyn_cast(E)) { if (Op->getOperator() != OO_Equal && Op->getOperator() != OO_PipeEqual) return; IsOrAssign = Op->getOperator() == OO_PipeEqual; Loc = Op->getOperatorLoc(); } else if (PseudoObjectExpr *POE = dyn_cast(E)) return DiagnoseAssignmentAsCondition(POE->getSyntacticForm()); else { // Not an assignment. return; } Diag(Loc, diagnostic) << E->getSourceRange(); SourceLocation Open = E->getLocStart(); SourceLocation Close = getLocForEndOfToken(E->getSourceRange().getEnd()); Diag(Loc, diag::note_condition_assign_silence) << FixItHint::CreateInsertion(Open, "(") << FixItHint::CreateInsertion(Close, ")"); if (IsOrAssign) Diag(Loc, diag::note_condition_or_assign_to_comparison) << FixItHint::CreateReplacement(Loc, "!="); else Diag(Loc, diag::note_condition_assign_to_comparison) << FixItHint::CreateReplacement(Loc, "=="); } /// \brief Redundant parentheses over an equality comparison can indicate /// that the user intended an assignment used as condition. void Sema::DiagnoseEqualityWithExtraParens(ParenExpr *ParenE) { // Don't warn if the parens came from a macro. SourceLocation parenLoc = ParenE->getLocStart(); if (parenLoc.isInvalid() || parenLoc.isMacroID()) return; // Don't warn for dependent expressions. if (ParenE->isTypeDependent()) return; Expr *E = ParenE->IgnoreParens(); if (BinaryOperator *opE = dyn_cast(E)) if (opE->getOpcode() == BO_EQ && opE->getLHS()->IgnoreParenImpCasts()->isModifiableLvalue(Context) == Expr::MLV_Valid) { SourceLocation Loc = opE->getOperatorLoc(); Diag(Loc, diag::warn_equality_with_extra_parens) << E->getSourceRange(); SourceRange ParenERange = ParenE->getSourceRange(); Diag(Loc, diag::note_equality_comparison_silence) << FixItHint::CreateRemoval(ParenERange.getBegin()) << FixItHint::CreateRemoval(ParenERange.getEnd()); Diag(Loc, diag::note_equality_comparison_to_assign) << FixItHint::CreateReplacement(Loc, "="); } } ExprResult Sema::CheckBooleanCondition(SourceLocation Loc, Expr *E, bool IsConstexpr) { DiagnoseAssignmentAsCondition(E); if (ParenExpr *parenE = dyn_cast(E)) DiagnoseEqualityWithExtraParens(parenE); ExprResult result = CheckPlaceholderExpr(E); if (result.isInvalid()) return ExprError(); E = result.get(); if (!E->isTypeDependent()) { if (getLangOpts().CPlusPlus) return CheckCXXBooleanCondition(E, IsConstexpr); // C++ 6.4p4 ExprResult ERes = DefaultFunctionArrayLvalueConversion(E); if (ERes.isInvalid()) return ExprError(); E = ERes.get(); QualType T = E->getType(); if (!T->isScalarType()) { // C99 6.8.4.1p1 Diag(Loc, diag::err_typecheck_statement_requires_scalar) << T << E->getSourceRange(); return ExprError(); } CheckBoolLikeConversion(E, Loc); } return E; } Sema::ConditionResult Sema::ActOnCondition(Scope *S, SourceLocation Loc, Expr *SubExpr, ConditionKind CK) { // Empty conditions are valid in for-statements. if (!SubExpr) return ConditionResult(); ExprResult Cond; switch (CK) { case ConditionKind::Boolean: Cond = CheckBooleanCondition(Loc, SubExpr); break; case ConditionKind::ConstexprIf: Cond = CheckBooleanCondition(Loc, SubExpr, true); break; case ConditionKind::Switch: Cond = CheckSwitchCondition(Loc, SubExpr); break; } if (Cond.isInvalid()) return ConditionError(); // FIXME: FullExprArg doesn't have an invalid bit, so check nullness instead. FullExprArg FullExpr = MakeFullExpr(Cond.get(), Loc); if (!FullExpr.get()) return ConditionError(); return ConditionResult(*this, nullptr, FullExpr, CK == ConditionKind::ConstexprIf); } namespace { /// A visitor for rebuilding a call to an __unknown_any expression /// to have an appropriate type. struct RebuildUnknownAnyFunction : StmtVisitor { Sema &S; RebuildUnknownAnyFunction(Sema &S) : S(S) {} ExprResult VisitStmt(Stmt *S) { llvm_unreachable("unexpected statement!"); } ExprResult VisitExpr(Expr *E) { S.Diag(E->getExprLoc(), diag::err_unsupported_unknown_any_call) << E->getSourceRange(); return ExprError(); } /// Rebuild an expression which simply semantically wraps another /// expression which it shares the type and value kind of. template ExprResult rebuildSugarExpr(T *E) { ExprResult SubResult = Visit(E->getSubExpr()); if (SubResult.isInvalid()) return ExprError(); Expr *SubExpr = SubResult.get(); E->setSubExpr(SubExpr); E->setType(SubExpr->getType()); E->setValueKind(SubExpr->getValueKind()); assert(E->getObjectKind() == OK_Ordinary); return E; } ExprResult VisitParenExpr(ParenExpr *E) { return rebuildSugarExpr(E); } ExprResult VisitUnaryExtension(UnaryOperator *E) { return rebuildSugarExpr(E); } ExprResult VisitUnaryAddrOf(UnaryOperator *E) { ExprResult SubResult = Visit(E->getSubExpr()); if (SubResult.isInvalid()) return ExprError(); Expr *SubExpr = SubResult.get(); E->setSubExpr(SubExpr); E->setType(S.Context.getPointerType(SubExpr->getType())); assert(E->getValueKind() == VK_RValue); assert(E->getObjectKind() == OK_Ordinary); return E; } ExprResult resolveDecl(Expr *E, ValueDecl *VD) { if (!isa(VD)) return VisitExpr(E); E->setType(VD->getType()); assert(E->getValueKind() == VK_RValue); if (S.getLangOpts().CPlusPlus && !(isa(VD) && cast(VD)->isInstance())) E->setValueKind(VK_LValue); return E; } ExprResult VisitMemberExpr(MemberExpr *E) { return resolveDecl(E, E->getMemberDecl()); } ExprResult VisitDeclRefExpr(DeclRefExpr *E) { return resolveDecl(E, E->getDecl()); } }; } /// Given a function expression of unknown-any type, try to rebuild it /// to have a function type. static ExprResult rebuildUnknownAnyFunction(Sema &S, Expr *FunctionExpr) { ExprResult Result = RebuildUnknownAnyFunction(S).Visit(FunctionExpr); if (Result.isInvalid()) return ExprError(); return S.DefaultFunctionArrayConversion(Result.get()); } namespace { /// A visitor for rebuilding an expression of type __unknown_anytype /// into one which resolves the type directly on the referring /// expression. Strict preservation of the original source /// structure is not a goal. struct RebuildUnknownAnyExpr : StmtVisitor { Sema &S; /// The current destination type. QualType DestType; RebuildUnknownAnyExpr(Sema &S, QualType CastType) : S(S), DestType(CastType) {} ExprResult VisitStmt(Stmt *S) { llvm_unreachable("unexpected statement!"); } ExprResult VisitExpr(Expr *E) { S.Diag(E->getExprLoc(), diag::err_unsupported_unknown_any_expr) << E->getSourceRange(); return ExprError(); } ExprResult VisitCallExpr(CallExpr *E); ExprResult VisitObjCMessageExpr(ObjCMessageExpr *E); /// Rebuild an expression which simply semantically wraps another /// expression which it shares the type and value kind of. template ExprResult rebuildSugarExpr(T *E) { ExprResult SubResult = Visit(E->getSubExpr()); if (SubResult.isInvalid()) return ExprError(); Expr *SubExpr = SubResult.get(); E->setSubExpr(SubExpr); E->setType(SubExpr->getType()); E->setValueKind(SubExpr->getValueKind()); assert(E->getObjectKind() == OK_Ordinary); return E; } ExprResult VisitParenExpr(ParenExpr *E) { return rebuildSugarExpr(E); } ExprResult VisitUnaryExtension(UnaryOperator *E) { return rebuildSugarExpr(E); } ExprResult VisitUnaryAddrOf(UnaryOperator *E) { const PointerType *Ptr = DestType->getAs(); if (!Ptr) { S.Diag(E->getOperatorLoc(), diag::err_unknown_any_addrof) << E->getSourceRange(); return ExprError(); } if (isa(E->getSubExpr())) { S.Diag(E->getOperatorLoc(), diag::err_unknown_any_addrof_call) << E->getSourceRange(); return ExprError(); } assert(E->getValueKind() == VK_RValue); assert(E->getObjectKind() == OK_Ordinary); E->setType(DestType); // Build the sub-expression as if it were an object of the pointee type. DestType = Ptr->getPointeeType(); ExprResult SubResult = Visit(E->getSubExpr()); if (SubResult.isInvalid()) return ExprError(); E->setSubExpr(SubResult.get()); return E; } ExprResult VisitImplicitCastExpr(ImplicitCastExpr *E); ExprResult resolveDecl(Expr *E, ValueDecl *VD); ExprResult VisitMemberExpr(MemberExpr *E) { return resolveDecl(E, E->getMemberDecl()); } ExprResult VisitDeclRefExpr(DeclRefExpr *E) { return resolveDecl(E, E->getDecl()); } }; } /// Rebuilds a call expression which yielded __unknown_anytype. ExprResult RebuildUnknownAnyExpr::VisitCallExpr(CallExpr *E) { Expr *CalleeExpr = E->getCallee(); enum FnKind { FK_MemberFunction, FK_FunctionPointer, FK_BlockPointer }; FnKind Kind; QualType CalleeType = CalleeExpr->getType(); if (CalleeType == S.Context.BoundMemberTy) { assert(isa(E) || isa(E)); Kind = FK_MemberFunction; CalleeType = Expr::findBoundMemberType(CalleeExpr); } else if (const PointerType *Ptr = CalleeType->getAs()) { CalleeType = Ptr->getPointeeType(); Kind = FK_FunctionPointer; } else { CalleeType = CalleeType->castAs()->getPointeeType(); Kind = FK_BlockPointer; } const FunctionType *FnType = CalleeType->castAs(); // Verify that this is a legal result type of a function. if (DestType->isArrayType() || DestType->isFunctionType()) { unsigned diagID = diag::err_func_returning_array_function; if (Kind == FK_BlockPointer) diagID = diag::err_block_returning_array_function; S.Diag(E->getExprLoc(), diagID) << DestType->isFunctionType() << DestType; return ExprError(); } // Otherwise, go ahead and set DestType as the call's result. E->setType(DestType.getNonLValueExprType(S.Context)); E->setValueKind(Expr::getValueKindForType(DestType)); assert(E->getObjectKind() == OK_Ordinary); // Rebuild the function type, replacing the result type with DestType. const FunctionProtoType *Proto = dyn_cast(FnType); if (Proto) { // __unknown_anytype(...) is a special case used by the debugger when // it has no idea what a function's signature is. // // We want to build this call essentially under the K&R // unprototyped rules, but making a FunctionNoProtoType in C++ // would foul up all sorts of assumptions. However, we cannot // simply pass all arguments as variadic arguments, nor can we // portably just call the function under a non-variadic type; see // the comment on IR-gen's TargetInfo::isNoProtoCallVariadic. // However, it turns out that in practice it is generally safe to // call a function declared as "A foo(B,C,D);" under the prototype // "A foo(B,C,D,...);". The only known exception is with the // Windows ABI, where any variadic function is implicitly cdecl // regardless of its normal CC. Therefore we change the parameter // types to match the types of the arguments. // // This is a hack, but it is far superior to moving the // corresponding target-specific code from IR-gen to Sema/AST. ArrayRef ParamTypes = Proto->getParamTypes(); SmallVector ArgTypes; if (ParamTypes.empty() && Proto->isVariadic()) { // the special case ArgTypes.reserve(E->getNumArgs()); for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) { Expr *Arg = E->getArg(i); QualType ArgType = Arg->getType(); if (E->isLValue()) { ArgType = S.Context.getLValueReferenceType(ArgType); } else if (E->isXValue()) { ArgType = S.Context.getRValueReferenceType(ArgType); } ArgTypes.push_back(ArgType); } ParamTypes = ArgTypes; } DestType = S.Context.getFunctionType(DestType, ParamTypes, Proto->getExtProtoInfo()); } else { DestType = S.Context.getFunctionNoProtoType(DestType, FnType->getExtInfo()); } // Rebuild the appropriate pointer-to-function type. switch (Kind) { case FK_MemberFunction: // Nothing to do. break; case FK_FunctionPointer: DestType = S.Context.getPointerType(DestType); break; case FK_BlockPointer: DestType = S.Context.getBlockPointerType(DestType); break; } // Finally, we can recurse. ExprResult CalleeResult = Visit(CalleeExpr); if (!CalleeResult.isUsable()) return ExprError(); E->setCallee(CalleeResult.get()); // Bind a temporary if necessary. return S.MaybeBindToTemporary(E); } ExprResult RebuildUnknownAnyExpr::VisitObjCMessageExpr(ObjCMessageExpr *E) { // Verify that this is a legal result type of a call. if (DestType->isArrayType() || DestType->isFunctionType()) { S.Diag(E->getExprLoc(), diag::err_func_returning_array_function) << DestType->isFunctionType() << DestType; return ExprError(); } // Rewrite the method result type if available. if (ObjCMethodDecl *Method = E->getMethodDecl()) { assert(Method->getReturnType() == S.Context.UnknownAnyTy); Method->setReturnType(DestType); } // Change the type of the message. E->setType(DestType.getNonReferenceType()); E->setValueKind(Expr::getValueKindForType(DestType)); return S.MaybeBindToTemporary(E); } ExprResult RebuildUnknownAnyExpr::VisitImplicitCastExpr(ImplicitCastExpr *E) { // The only case we should ever see here is a function-to-pointer decay. if (E->getCastKind() == CK_FunctionToPointerDecay) { assert(E->getValueKind() == VK_RValue); assert(E->getObjectKind() == OK_Ordinary); E->setType(DestType); // Rebuild the sub-expression as the pointee (function) type. DestType = DestType->castAs()->getPointeeType(); ExprResult Result = Visit(E->getSubExpr()); if (!Result.isUsable()) return ExprError(); E->setSubExpr(Result.get()); return E; } else if (E->getCastKind() == CK_LValueToRValue) { assert(E->getValueKind() == VK_RValue); assert(E->getObjectKind() == OK_Ordinary); assert(isa(E->getType())); E->setType(DestType); // The sub-expression has to be a lvalue reference, so rebuild it as such. DestType = S.Context.getLValueReferenceType(DestType); ExprResult Result = Visit(E->getSubExpr()); if (!Result.isUsable()) return ExprError(); E->setSubExpr(Result.get()); return E; } else { llvm_unreachable("Unhandled cast type!"); } } ExprResult RebuildUnknownAnyExpr::resolveDecl(Expr *E, ValueDecl *VD) { ExprValueKind ValueKind = VK_LValue; QualType Type = DestType; // We know how to make this work for certain kinds of decls: // - functions if (FunctionDecl *FD = dyn_cast(VD)) { if (const PointerType *Ptr = Type->getAs()) { DestType = Ptr->getPointeeType(); ExprResult Result = resolveDecl(E, VD); if (Result.isInvalid()) return ExprError(); return S.ImpCastExprToType(Result.get(), Type, CK_FunctionToPointerDecay, VK_RValue); } if (!Type->isFunctionType()) { S.Diag(E->getExprLoc(), diag::err_unknown_any_function) << VD << E->getSourceRange(); return ExprError(); } if (const FunctionProtoType *FT = Type->getAs()) { // We must match the FunctionDecl's type to the hack introduced in // RebuildUnknownAnyExpr::VisitCallExpr to vararg functions of unknown // type. See the lengthy commentary in that routine. QualType FDT = FD->getType(); const FunctionType *FnType = FDT->castAs(); const FunctionProtoType *Proto = dyn_cast_or_null(FnType); DeclRefExpr *DRE = dyn_cast(E); if (DRE && Proto && Proto->getParamTypes().empty() && Proto->isVariadic()) { SourceLocation Loc = FD->getLocation(); FunctionDecl *NewFD = FunctionDecl::Create(FD->getASTContext(), FD->getDeclContext(), Loc, Loc, FD->getNameInfo().getName(), DestType, FD->getTypeSourceInfo(), SC_None, false/*isInlineSpecified*/, FD->hasPrototype(), false/*isConstexprSpecified*/); if (FD->getQualifier()) NewFD->setQualifierInfo(FD->getQualifierLoc()); SmallVector Params; for (const auto &AI : FT->param_types()) { ParmVarDecl *Param = S.BuildParmVarDeclForTypedef(FD, Loc, AI); Param->setScopeInfo(0, Params.size()); Params.push_back(Param); } NewFD->setParams(Params); DRE->setDecl(NewFD); VD = DRE->getDecl(); } } if (CXXMethodDecl *MD = dyn_cast(FD)) if (MD->isInstance()) { ValueKind = VK_RValue; Type = S.Context.BoundMemberTy; } // Function references aren't l-values in C. if (!S.getLangOpts().CPlusPlus) ValueKind = VK_RValue; // - variables } else if (isa(VD)) { if (const ReferenceType *RefTy = Type->getAs()) { Type = RefTy->getPointeeType(); } else if (Type->isFunctionType()) { S.Diag(E->getExprLoc(), diag::err_unknown_any_var_function_type) << VD << E->getSourceRange(); return ExprError(); } // - nothing else } else { S.Diag(E->getExprLoc(), diag::err_unsupported_unknown_any_decl) << VD << E->getSourceRange(); return ExprError(); } // Modifying the declaration like this is friendly to IR-gen but // also really dangerous. VD->setType(DestType); E->setType(Type); E->setValueKind(ValueKind); return E; } /// Check a cast of an unknown-any type. We intentionally only /// trigger this for C-style casts. ExprResult Sema::checkUnknownAnyCast(SourceRange TypeRange, QualType CastType, Expr *CastExpr, CastKind &CastKind, ExprValueKind &VK, CXXCastPath &Path) { // The type we're casting to must be either void or complete. if (!CastType->isVoidType() && RequireCompleteType(TypeRange.getBegin(), CastType, diag::err_typecheck_cast_to_incomplete)) return ExprError(); // Rewrite the casted expression from scratch. ExprResult result = RebuildUnknownAnyExpr(*this, CastType).Visit(CastExpr); if (!result.isUsable()) return ExprError(); CastExpr = result.get(); VK = CastExpr->getValueKind(); CastKind = CK_NoOp; return CastExpr; } ExprResult Sema::forceUnknownAnyToType(Expr *E, QualType ToType) { return RebuildUnknownAnyExpr(*this, ToType).Visit(E); } ExprResult Sema::checkUnknownAnyArg(SourceLocation callLoc, Expr *arg, QualType ¶mType) { // If the syntactic form of the argument is not an explicit cast of // any sort, just do default argument promotion. ExplicitCastExpr *castArg = dyn_cast(arg->IgnoreParens()); if (!castArg) { ExprResult result = DefaultArgumentPromotion(arg); if (result.isInvalid()) return ExprError(); paramType = result.get()->getType(); return result; } // Otherwise, use the type that was written in the explicit cast. assert(!arg->hasPlaceholderType()); paramType = castArg->getTypeAsWritten(); // Copy-initialize a parameter of that type. InitializedEntity entity = InitializedEntity::InitializeParameter(Context, paramType, /*consumed*/ false); return PerformCopyInitialization(entity, callLoc, arg); } static ExprResult diagnoseUnknownAnyExpr(Sema &S, Expr *E) { Expr *orig = E; unsigned diagID = diag::err_uncasted_use_of_unknown_any; while (true) { E = E->IgnoreParenImpCasts(); if (CallExpr *call = dyn_cast(E)) { E = call->getCallee(); diagID = diag::err_uncasted_call_of_unknown_any; } else { break; } } SourceLocation loc; NamedDecl *d; if (DeclRefExpr *ref = dyn_cast(E)) { loc = ref->getLocation(); d = ref->getDecl(); } else if (MemberExpr *mem = dyn_cast(E)) { loc = mem->getMemberLoc(); d = mem->getMemberDecl(); } else if (ObjCMessageExpr *msg = dyn_cast(E)) { diagID = diag::err_uncasted_call_of_unknown_any; loc = msg->getSelectorStartLoc(); d = msg->getMethodDecl(); if (!d) { S.Diag(loc, diag::err_uncasted_send_to_unknown_any_method) << static_cast(msg->isClassMessage()) << msg->getSelector() << orig->getSourceRange(); return ExprError(); } } else { S.Diag(E->getExprLoc(), diag::err_unsupported_unknown_any_expr) << E->getSourceRange(); return ExprError(); } S.Diag(loc, diagID) << d << orig->getSourceRange(); // Never recoverable. return ExprError(); } /// Check for operands with placeholder types and complain if found. /// Returns ExprError() if there was an error and no recovery was possible. ExprResult Sema::CheckPlaceholderExpr(Expr *E) { if (!getLangOpts().CPlusPlus) { // C cannot handle TypoExpr nodes on either side of a binop because it // doesn't handle dependent types properly, so make sure any TypoExprs have // been dealt with before checking the operands. ExprResult Result = CorrectDelayedTyposInExpr(E); if (!Result.isUsable()) return ExprError(); E = Result.get(); } const BuiltinType *placeholderType = E->getType()->getAsPlaceholderType(); if (!placeholderType) return E; switch (placeholderType->getKind()) { // Overloaded expressions. case BuiltinType::Overload: { // Try to resolve a single function template specialization. // This is obligatory. ExprResult Result = E; if (ResolveAndFixSingleFunctionTemplateSpecialization(Result, false)) return Result; // No guarantees that ResolveAndFixSingleFunctionTemplateSpecialization // leaves Result unchanged on failure. Result = E; if (resolveAndFixAddressOfOnlyViableOverloadCandidate(Result)) return Result; // If that failed, try to recover with a call. tryToRecoverWithCall(Result, PDiag(diag::err_ovl_unresolvable), /*complain*/ true); return Result; } // Bound member functions. case BuiltinType::BoundMember: { ExprResult result = E; const Expr *BME = E->IgnoreParens(); PartialDiagnostic PD = PDiag(diag::err_bound_member_function); // Try to give a nicer diagnostic if it is a bound member that we recognize. if (isa(BME)) { PD = PDiag(diag::err_dtor_expr_without_call) << /*pseudo-destructor*/ 1; } else if (const auto *ME = dyn_cast(BME)) { if (ME->getMemberNameInfo().getName().getNameKind() == DeclarationName::CXXDestructorName) PD = PDiag(diag::err_dtor_expr_without_call) << /*destructor*/ 0; } tryToRecoverWithCall(result, PD, /*complain*/ true); return result; } // ARC unbridged casts. case BuiltinType::ARCUnbridgedCast: { Expr *realCast = stripARCUnbridgedCast(E); diagnoseARCUnbridgedCast(realCast); return realCast; } // Expressions of unknown type. case BuiltinType::UnknownAny: return diagnoseUnknownAnyExpr(*this, E); // Pseudo-objects. case BuiltinType::PseudoObject: return checkPseudoObjectRValue(E); case BuiltinType::BuiltinFn: { // Accept __noop without parens by implicitly converting it to a call expr. auto *DRE = dyn_cast(E->IgnoreParenImpCasts()); if (DRE) { auto *FD = cast(DRE->getDecl()); if (FD->getBuiltinID() == Builtin::BI__noop) { E = ImpCastExprToType(E, Context.getPointerType(FD->getType()), CK_BuiltinFnToFnPtr).get(); return new (Context) CallExpr(Context, E, None, Context.IntTy, VK_RValue, SourceLocation()); } } Diag(E->getLocStart(), diag::err_builtin_fn_use); return ExprError(); } // Expressions of unknown type. case BuiltinType::OMPArraySection: Diag(E->getLocStart(), diag::err_omp_array_section_use); return ExprError(); // Everything else should be impossible. #define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \ case BuiltinType::Id: #include "clang/Basic/OpenCLImageTypes.def" #define BUILTIN_TYPE(Id, SingletonId) case BuiltinType::Id: #define PLACEHOLDER_TYPE(Id, SingletonId) #include "clang/AST/BuiltinTypes.def" break; } llvm_unreachable("invalid placeholder type!"); } bool Sema::CheckCaseExpression(Expr *E) { if (E->isTypeDependent()) return true; if (E->isValueDependent() || E->isIntegerConstantExpr(Context)) return E->getType()->isIntegralOrEnumerationType(); return false; } /// ActOnObjCBoolLiteral - Parse {__objc_yes,__objc_no} literals. ExprResult Sema::ActOnObjCBoolLiteral(SourceLocation OpLoc, tok::TokenKind Kind) { assert((Kind == tok::kw___objc_yes || Kind == tok::kw___objc_no) && "Unknown Objective-C Boolean value!"); QualType BoolT = Context.ObjCBuiltinBoolTy; if (!Context.getBOOLDecl()) { LookupResult Result(*this, &Context.Idents.get("BOOL"), OpLoc, Sema::LookupOrdinaryName); if (LookupName(Result, getCurScope()) && Result.isSingleResult()) { NamedDecl *ND = Result.getFoundDecl(); if (TypedefDecl *TD = dyn_cast(ND)) Context.setBOOLDecl(TD); } } if (Context.getBOOLDecl()) BoolT = Context.getBOOLType(); return new (Context) ObjCBoolLiteralExpr(Kind == tok::kw___objc_yes, BoolT, OpLoc); } ExprResult Sema::ActOnObjCAvailabilityCheckExpr( llvm::ArrayRef AvailSpecs, SourceLocation AtLoc, SourceLocation RParen) { StringRef Platform = getASTContext().getTargetInfo().getPlatformName(); auto Spec = std::find_if(AvailSpecs.begin(), AvailSpecs.end(), [&](const AvailabilitySpec &Spec) { return Spec.getPlatform() == Platform; }); VersionTuple Version; if (Spec != AvailSpecs.end()) Version = Spec->getVersion(); // The use of `@available` in the enclosing function should be analyzed to // warn when it's used inappropriately (i.e. not if(@available)). if (getCurFunctionOrMethodDecl()) getEnclosingFunction()->HasPotentialAvailabilityViolations = true; else if (getCurBlock() || getCurLambda()) getCurFunction()->HasPotentialAvailabilityViolations = true; return new (Context) ObjCAvailabilityCheckExpr(Version, AtLoc, RParen, Context.BoolTy); } Index: head/contrib/llvm/tools/clang =================================================================== --- head/contrib/llvm/tools/clang (revision 329982) +++ head/contrib/llvm/tools/clang (revision 329983) Property changes on: head/contrib/llvm/tools/clang ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /vendor/clang/dist-release_60:r329406-329940 Index: head/contrib/llvm/tools/lld/ELF/Driver.cpp =================================================================== --- head/contrib/llvm/tools/lld/ELF/Driver.cpp (revision 329982) +++ head/contrib/llvm/tools/lld/ELF/Driver.cpp (revision 329983) @@ -1,1128 +1,1133 @@ //===- Driver.cpp ---------------------------------------------------------===// // // The LLVM Linker // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // The driver drives the entire linking process. It is responsible for // parsing command line options and doing whatever it is instructed to do. // // One notable thing in the LLD's driver when compared to other linkers is // that the LLD's driver is agnostic on the host operating system. // Other linkers usually have implicit default values (such as a dynamic // linker path or library paths) for each host OS. // // I don't think implicit default values are useful because they are // usually explicitly specified by the compiler driver. They can even // be harmful when you are doing cross-linking. Therefore, in LLD, we // simply trust the compiler driver to pass all required options and // don't try to make effort on our side. // //===----------------------------------------------------------------------===// #include "Driver.h" #include "Config.h" #include "Filesystem.h" #include "ICF.h" #include "InputFiles.h" #include "InputSection.h" #include "LinkerScript.h" #include "OutputSections.h" #include "ScriptParser.h" #include "Strings.h" #include "SymbolTable.h" #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" #include "Writer.h" #include "lld/Common/Args.h" #include "lld/Common/Driver.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" #include "lld/Common/Threads.h" #include "lld/Common/Version.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compression.h" #include "llvm/Support/Path.h" #include "llvm/Support/TarWriter.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Support/raw_ostream.h" #include #include using namespace llvm; using namespace llvm::ELF; using namespace llvm::object; using namespace llvm::sys; using namespace lld; using namespace lld::elf; Configuration *elf::Config; LinkerDriver *elf::Driver; static void setConfigs(); bool elf::link(ArrayRef Args, bool CanExitEarly, raw_ostream &Error) { errorHandler().LogName = Args[0]; errorHandler().ErrorLimitExceededMsg = "too many errors emitted, stopping now (use " "-error-limit=0 to see all errors)"; errorHandler().ErrorOS = &Error; errorHandler().ColorDiagnostics = Error.has_colors(); InputSections.clear(); OutputSections.clear(); Tar = nullptr; BinaryFiles.clear(); BitcodeFiles.clear(); ObjectFiles.clear(); SharedFiles.clear(); Config = make(); Driver = make(); Script = make(); Symtab = make(); Config->Argv = {Args.begin(), Args.end()}; Driver->main(Args, CanExitEarly); // Exit immediately if we don't need to return to the caller. // This saves time because the overhead of calling destructors // for all globally-allocated objects is not negligible. if (Config->ExitEarly) exitLld(errorCount() ? 1 : 0); freeArena(); return !errorCount(); } // Parses a linker -m option. static std::tuple parseEmulation(StringRef Emul) { uint8_t OSABI = 0; StringRef S = Emul; if (S.endswith("_fbsd")) { S = S.drop_back(5); OSABI = ELFOSABI_FREEBSD; } std::pair Ret = StringSwitch>(S) .Cases("aarch64elf", "aarch64linux", {ELF64LEKind, EM_AARCH64}) .Cases("armelf", "armelf_linux_eabi", {ELF32LEKind, EM_ARM}) .Case("elf32_x86_64", {ELF32LEKind, EM_X86_64}) .Cases("elf32btsmip", "elf32btsmipn32", {ELF32BEKind, EM_MIPS}) .Cases("elf32ltsmip", "elf32ltsmipn32", {ELF32LEKind, EM_MIPS}) .Case("elf32ppc", {ELF32BEKind, EM_PPC}) .Case("elf64btsmip", {ELF64BEKind, EM_MIPS}) .Case("elf64ltsmip", {ELF64LEKind, EM_MIPS}) .Case("elf64ppc", {ELF64BEKind, EM_PPC64}) .Cases("elf_amd64", "elf_x86_64", {ELF64LEKind, EM_X86_64}) .Case("elf_i386", {ELF32LEKind, EM_386}) .Case("elf_iamcu", {ELF32LEKind, EM_IAMCU}) .Default({ELFNoneKind, EM_NONE}); if (Ret.first == ELFNoneKind) error("unknown emulation: " + Emul); return std::make_tuple(Ret.first, Ret.second, OSABI); } // Returns slices of MB by parsing MB as an archive file. // Each slice consists of a member file in the archive. std::vector> static getArchiveMembers( MemoryBufferRef MB) { std::unique_ptr File = CHECK(Archive::create(MB), MB.getBufferIdentifier() + ": failed to parse archive"); std::vector> V; Error Err = Error::success(); bool AddToTar = File->isThin() && Tar; for (const ErrorOr &COrErr : File->children(Err)) { Archive::Child C = CHECK(COrErr, MB.getBufferIdentifier() + ": could not get the child of the archive"); MemoryBufferRef MBRef = CHECK(C.getMemoryBufferRef(), MB.getBufferIdentifier() + ": could not get the buffer for a child of the archive"); if (AddToTar) Tar->append(relativeToRoot(check(C.getFullName())), MBRef.getBuffer()); V.push_back(std::make_pair(MBRef, C.getChildOffset())); } if (Err) fatal(MB.getBufferIdentifier() + ": Archive::children failed: " + toString(std::move(Err))); // Take ownership of memory buffers created for members of thin archives. for (std::unique_ptr &MB : File->takeThinBuffers()) make>(std::move(MB)); return V; } // Opens a file and create a file object. Path has to be resolved already. void LinkerDriver::addFile(StringRef Path, bool WithLOption) { using namespace sys::fs; Optional Buffer = readFile(Path); if (!Buffer.hasValue()) return; MemoryBufferRef MBRef = *Buffer; if (InBinary) { Files.push_back(make(MBRef)); return; } switch (identify_magic(MBRef.getBuffer())) { case file_magic::unknown: readLinkerScript(MBRef); return; case file_magic::archive: { // Handle -whole-archive. if (InWholeArchive) { for (const auto &P : getArchiveMembers(MBRef)) Files.push_back(createObjectFile(P.first, Path, P.second)); return; } std::unique_ptr File = CHECK(Archive::create(MBRef), Path + ": failed to parse archive"); // If an archive file has no symbol table, it is likely that a user // is attempting LTO and using a default ar command that doesn't // understand the LLVM bitcode file. It is a pretty common error, so // we'll handle it as if it had a symbol table. if (!File->isEmpty() && !File->hasSymbolTable()) { for (const auto &P : getArchiveMembers(MBRef)) Files.push_back(make(P.first, Path, P.second)); return; } // Handle the regular case. Files.push_back(make(std::move(File))); return; } case file_magic::elf_shared_object: if (Config->Relocatable) { error("attempted static link of dynamic object " + Path); return; } // DSOs usually have DT_SONAME tags in their ELF headers, and the // sonames are used to identify DSOs. But if they are missing, // they are identified by filenames. We don't know whether the new // file has a DT_SONAME or not because we haven't parsed it yet. // Here, we set the default soname for the file because we might // need it later. // // If a file was specified by -lfoo, the directory part is not // significant, as a user did not specify it. This behavior is // compatible with GNU. Files.push_back( createSharedFile(MBRef, WithLOption ? path::filename(Path) : Path)); return; default: if (InLib) Files.push_back(make(MBRef, "", 0)); else Files.push_back(createObjectFile(MBRef)); } } // Add a given library by searching it from input search paths. void LinkerDriver::addLibrary(StringRef Name) { if (Optional Path = searchLibrary(Name)) addFile(*Path, /*WithLOption=*/true); else error("unable to find library -l" + Name); } // This function is called on startup. We need this for LTO since // LTO calls LLVM functions to compile bitcode files to native code. // Technically this can be delayed until we read bitcode files, but // we don't bother to do lazily because the initialization is fast. static void initLLVM(opt::InputArgList &Args) { InitializeAllTargets(); InitializeAllTargetMCs(); InitializeAllAsmPrinters(); InitializeAllAsmParsers(); // Parse and evaluate -mllvm options. std::vector V; V.push_back("lld (LLVM option parsing)"); for (auto *Arg : Args.filtered(OPT_mllvm)) V.push_back(Arg->getValue()); cl::ParseCommandLineOptions(V.size(), V.data()); } // Some command line options or some combinations of them are not allowed. // This function checks for such errors. static void checkOptions(opt::InputArgList &Args) { // The MIPS ABI as of 2016 does not support the GNU-style symbol lookup // table which is a relatively new feature. if (Config->EMachine == EM_MIPS && Config->GnuHash) error("the .gnu.hash section is not compatible with the MIPS target."); if (Config->FixCortexA53Errata843419 && Config->EMachine != EM_AARCH64) error("--fix-cortex-a53-843419 is only supported on AArch64 targets."); if (Config->Pie && Config->Shared) error("-shared and -pie may not be used together"); if (!Config->Shared && !Config->FilterList.empty()) error("-F may not be used without -shared"); if (!Config->Shared && !Config->AuxiliaryList.empty()) error("-f may not be used without -shared"); if (!Config->Relocatable && !Config->DefineCommon) error("-no-define-common not supported in non relocatable output"); if (Config->Relocatable) { if (Config->Shared) error("-r and -shared may not be used together"); if (Config->GcSections) error("-r and --gc-sections may not be used together"); if (Config->ICF) error("-r and --icf may not be used together"); if (Config->Pie) error("-r and -pie may not be used together"); } } static const char *getReproduceOption(opt::InputArgList &Args) { if (auto *Arg = Args.getLastArg(OPT_reproduce)) return Arg->getValue(); return getenv("LLD_REPRODUCE"); } static bool hasZOption(opt::InputArgList &Args, StringRef Key) { for (auto *Arg : Args.filtered(OPT_z)) if (Key == Arg->getValue()) return true; return false; } void LinkerDriver::main(ArrayRef ArgsArr, bool CanExitEarly) { ELFOptTable Parser; opt::InputArgList Args = Parser.parse(ArgsArr.slice(1)); // Interpret this flag early because error() depends on them. errorHandler().ErrorLimit = args::getInteger(Args, OPT_error_limit, 20); // Handle -help if (Args.hasArg(OPT_help)) { printHelp(ArgsArr[0]); return; } // Handle -v or -version. // // A note about "compatible with GNU linkers" message: this is a hack for // scripts generated by GNU Libtool 2.4.6 (released in February 2014 and // still the newest version in March 2017) or earlier to recognize LLD as // a GNU compatible linker. As long as an output for the -v option // contains "GNU" or "with BFD", they recognize us as GNU-compatible. // // This is somewhat ugly hack, but in reality, we had no choice other // than doing this. Considering the very long release cycle of Libtool, // it is not easy to improve it to recognize LLD as a GNU compatible // linker in a timely manner. Even if we can make it, there are still a // lot of "configure" scripts out there that are generated by old version // of Libtool. We cannot convince every software developer to migrate to // the latest version and re-generate scripts. So we have this hack. if (Args.hasArg(OPT_v) || Args.hasArg(OPT_version)) message(getLLDVersion() + " (compatible with GNU linkers)"); // The behavior of -v or --version is a bit strange, but this is // needed for compatibility with GNU linkers. if (Args.hasArg(OPT_v) && !Args.hasArg(OPT_INPUT)) return; if (Args.hasArg(OPT_version)) return; Config->ExitEarly = CanExitEarly && !Args.hasArg(OPT_full_shutdown); errorHandler().ExitEarly = Config->ExitEarly; if (const char *Path = getReproduceOption(Args)) { // Note that --reproduce is a debug option so you can ignore it // if you are trying to understand the whole picture of the code. Expected> ErrOrWriter = TarWriter::create(Path, path::stem(Path)); if (ErrOrWriter) { Tar = ErrOrWriter->get(); Tar->append("response.txt", createResponseFile(Args)); Tar->append("version.txt", getLLDVersion() + "\n"); make>(std::move(*ErrOrWriter)); } else { error(Twine("--reproduce: failed to open ") + Path + ": " + toString(ErrOrWriter.takeError())); } } readConfigs(Args); initLLVM(Args); createFiles(Args); inferMachineType(); setConfigs(); checkOptions(Args); if (errorCount()) return; switch (Config->EKind) { case ELF32LEKind: link(Args); return; case ELF32BEKind: link(Args); return; case ELF64LEKind: link(Args); return; case ELF64BEKind: link(Args); return; default: llvm_unreachable("unknown Config->EKind"); } } static std::string getRpath(opt::InputArgList &Args) { std::vector V = args::getStrings(Args, OPT_rpath); return llvm::join(V.begin(), V.end(), ":"); } // Determines what we should do if there are remaining unresolved // symbols after the name resolution. static UnresolvedPolicy getUnresolvedSymbolPolicy(opt::InputArgList &Args) { if (Args.hasArg(OPT_relocatable)) return UnresolvedPolicy::IgnoreAll; UnresolvedPolicy ErrorOrWarn = Args.hasFlag(OPT_error_unresolved_symbols, OPT_warn_unresolved_symbols, true) ? UnresolvedPolicy::ReportError : UnresolvedPolicy::Warn; // Process the last of -unresolved-symbols, -no-undefined or -z defs. for (auto *Arg : llvm::reverse(Args)) { switch (Arg->getOption().getID()) { case OPT_unresolved_symbols: { StringRef S = Arg->getValue(); if (S == "ignore-all" || S == "ignore-in-object-files") return UnresolvedPolicy::Ignore; if (S == "ignore-in-shared-libs" || S == "report-all") return ErrorOrWarn; error("unknown --unresolved-symbols value: " + S); continue; } case OPT_no_undefined: return ErrorOrWarn; case OPT_z: if (StringRef(Arg->getValue()) == "defs") return ErrorOrWarn; continue; } } // -shared implies -unresolved-symbols=ignore-all because missing // symbols are likely to be resolved at runtime using other DSOs. if (Config->Shared) return UnresolvedPolicy::Ignore; return ErrorOrWarn; } static Target2Policy getTarget2(opt::InputArgList &Args) { StringRef S = Args.getLastArgValue(OPT_target2, "got-rel"); if (S == "rel") return Target2Policy::Rel; if (S == "abs") return Target2Policy::Abs; if (S == "got-rel") return Target2Policy::GotRel; error("unknown --target2 option: " + S); return Target2Policy::GotRel; } static bool isOutputFormatBinary(opt::InputArgList &Args) { if (auto *Arg = Args.getLastArg(OPT_oformat)) { StringRef S = Arg->getValue(); if (S == "binary") return true; error("unknown --oformat value: " + S); } return false; } static DiscardPolicy getDiscard(opt::InputArgList &Args) { if (Args.hasArg(OPT_relocatable)) return DiscardPolicy::None; auto *Arg = Args.getLastArg(OPT_discard_all, OPT_discard_locals, OPT_discard_none); if (!Arg) return DiscardPolicy::Default; if (Arg->getOption().getID() == OPT_discard_all) return DiscardPolicy::All; if (Arg->getOption().getID() == OPT_discard_locals) return DiscardPolicy::Locals; return DiscardPolicy::None; } static StringRef getDynamicLinker(opt::InputArgList &Args) { auto *Arg = Args.getLastArg(OPT_dynamic_linker, OPT_no_dynamic_linker); if (!Arg || Arg->getOption().getID() == OPT_no_dynamic_linker) return ""; return Arg->getValue(); } static StripPolicy getStrip(opt::InputArgList &Args) { if (Args.hasArg(OPT_relocatable)) return StripPolicy::None; auto *Arg = Args.getLastArg(OPT_strip_all, OPT_strip_debug); if (!Arg) return StripPolicy::None; if (Arg->getOption().getID() == OPT_strip_all) return StripPolicy::All; return StripPolicy::Debug; } static uint64_t parseSectionAddress(StringRef S, const opt::Arg &Arg) { uint64_t VA = 0; if (S.startswith("0x")) S = S.drop_front(2); if (!to_integer(S, VA, 16)) error("invalid argument: " + toString(Arg)); return VA; } static StringMap getSectionStartMap(opt::InputArgList &Args) { StringMap Ret; for (auto *Arg : Args.filtered(OPT_section_start)) { StringRef Name; StringRef Addr; std::tie(Name, Addr) = StringRef(Arg->getValue()).split('='); Ret[Name] = parseSectionAddress(Addr, *Arg); } if (auto *Arg = Args.getLastArg(OPT_Ttext)) Ret[".text"] = parseSectionAddress(Arg->getValue(), *Arg); if (auto *Arg = Args.getLastArg(OPT_Tdata)) Ret[".data"] = parseSectionAddress(Arg->getValue(), *Arg); if (auto *Arg = Args.getLastArg(OPT_Tbss)) Ret[".bss"] = parseSectionAddress(Arg->getValue(), *Arg); return Ret; } static SortSectionPolicy getSortSection(opt::InputArgList &Args) { StringRef S = Args.getLastArgValue(OPT_sort_section); if (S == "alignment") return SortSectionPolicy::Alignment; if (S == "name") return SortSectionPolicy::Name; if (!S.empty()) error("unknown --sort-section rule: " + S); return SortSectionPolicy::Default; } static OrphanHandlingPolicy getOrphanHandling(opt::InputArgList &Args) { StringRef S = Args.getLastArgValue(OPT_orphan_handling, "place"); if (S == "warn") return OrphanHandlingPolicy::Warn; if (S == "error") return OrphanHandlingPolicy::Error; if (S != "place") error("unknown --orphan-handling mode: " + S); return OrphanHandlingPolicy::Place; } // Parse --build-id or --build-id=