Index: projects/clang390-import/contrib/llvm/include/llvm/CodeGen/ISDOpcodes.h =================================================================== --- projects/clang390-import/contrib/llvm/include/llvm/CodeGen/ISDOpcodes.h (revision 305682) +++ projects/clang390-import/contrib/llvm/include/llvm/CodeGen/ISDOpcodes.h (revision 305683) @@ -1,944 +1,949 @@ //===-- llvm/CodeGen/ISDOpcodes.h - CodeGen opcodes -------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file declares codegen opcodes and related utilities. // //===----------------------------------------------------------------------===// #ifndef LLVM_CODEGEN_ISDOPCODES_H #define LLVM_CODEGEN_ISDOPCODES_H namespace llvm { /// ISD namespace - This namespace contains an enum which represents all of the /// SelectionDAG node types and value types. /// namespace ISD { //===--------------------------------------------------------------------===// /// ISD::NodeType enum - This enum defines the target-independent operators /// for a SelectionDAG. /// /// Targets may also define target-dependent operator codes for SDNodes. For /// example, on x86, these are the enum values in the X86ISD namespace. /// Targets should aim to use target-independent operators to model their /// instruction sets as much as possible, and only use target-dependent /// operators when they have special requirements. /// /// Finally, during and after selection proper, SNodes may use special /// operator codes that correspond directly with MachineInstr opcodes. These /// are used to represent selected instructions. See the isMachineOpcode() /// and getMachineOpcode() member functions of SDNode. /// enum NodeType { /// DELETED_NODE - This is an illegal value that is used to catch /// errors. This opcode is not a legal opcode for any node. DELETED_NODE, /// EntryToken - This is the marker used to indicate the start of a region. EntryToken, /// TokenFactor - This node takes multiple tokens as input and produces a /// single token result. This is used to represent the fact that the operand /// operators are independent of each other. TokenFactor, /// AssertSext, AssertZext - These nodes record if a register contains a /// value that has already been zero or sign extended from a narrower type. /// These nodes take two operands. The first is the node that has already /// been extended, and the second is a value type node indicating the width /// of the extension AssertSext, AssertZext, /// Various leaf nodes. BasicBlock, VALUETYPE, CONDCODE, Register, RegisterMask, Constant, ConstantFP, GlobalAddress, GlobalTLSAddress, FrameIndex, JumpTable, ConstantPool, ExternalSymbol, BlockAddress, /// The address of the GOT GLOBAL_OFFSET_TABLE, /// FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and /// llvm.returnaddress on the DAG. These nodes take one operand, the index /// of the frame or return address to return. An index of zero corresponds /// to the current function's frame or return address, an index of one to /// the parent's frame or return address, and so on. FRAMEADDR, RETURNADDR, /// LOCAL_RECOVER - Represents the llvm.localrecover intrinsic. /// Materializes the offset from the local object pointer of another /// function to a particular local object passed to llvm.localescape. The /// operand is the MCSymbol label used to represent this offset, since /// typically the offset is not known until after code generation of the /// parent. LOCAL_RECOVER, /// READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on /// the DAG, which implements the named register global variables extension. READ_REGISTER, WRITE_REGISTER, /// FRAME_TO_ARGS_OFFSET - This node represents offset from frame pointer to /// first (possible) on-stack argument. This is needed for correct stack /// adjustment during unwind. FRAME_TO_ARGS_OFFSET, + /// EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical + /// Frame Address (CFA), generally the value of the stack pointer at the + /// call site in the previous frame. + EH_DWARF_CFA, + /// OUTCHAIN = EH_RETURN(INCHAIN, OFFSET, HANDLER) - This node represents /// 'eh_return' gcc dwarf builtin, which is used to return from /// exception. The general meaning is: adjust stack by OFFSET and pass /// execution to HANDLER. Many platform-related details also :) EH_RETURN, /// RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) /// This corresponds to the eh.sjlj.setjmp intrinsic. /// It takes an input chain and a pointer to the jump buffer as inputs /// and returns an outchain. EH_SJLJ_SETJMP, /// OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) /// This corresponds to the eh.sjlj.longjmp intrinsic. /// It takes an input chain and a pointer to the jump buffer as inputs /// and returns an outchain. EH_SJLJ_LONGJMP, /// OUTCHAIN = EH_SJLJ_SETUP_DISPATCH(INCHAIN) /// The target initializes the dispatch table here. EH_SJLJ_SETUP_DISPATCH, /// TargetConstant* - Like Constant*, but the DAG does not do any folding, /// simplification, or lowering of the constant. They are used for constants /// which are known to fit in the immediate fields of their users, or for /// carrying magic numbers which are not values which need to be /// materialized in registers. TargetConstant, TargetConstantFP, /// TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or /// anything else with this node, and this is valid in the target-specific /// dag, turning into a GlobalAddress operand. TargetGlobalAddress, TargetGlobalTLSAddress, TargetFrameIndex, TargetJumpTable, TargetConstantPool, TargetExternalSymbol, TargetBlockAddress, MCSymbol, /// TargetIndex - Like a constant pool entry, but with completely /// target-dependent semantics. Holds target flags, a 32-bit index, and a /// 64-bit index. Targets can use this however they like. TargetIndex, /// RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) /// This node represents a target intrinsic function with no side effects. /// The first operand is the ID number of the intrinsic from the /// llvm::Intrinsic namespace. The operands to the intrinsic follow. The /// node returns the result of the intrinsic. INTRINSIC_WO_CHAIN, /// RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) /// This node represents a target intrinsic function with side effects that /// returns a result. The first operand is a chain pointer. The second is /// the ID number of the intrinsic from the llvm::Intrinsic namespace. The /// operands to the intrinsic follow. The node has two results, the result /// of the intrinsic and an output chain. INTRINSIC_W_CHAIN, /// OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) /// This node represents a target intrinsic function with side effects that /// does not return a result. The first operand is a chain pointer. The /// second is the ID number of the intrinsic from the llvm::Intrinsic /// namespace. The operands to the intrinsic follow. INTRINSIC_VOID, /// CopyToReg - This node has three operands: a chain, a register number to /// set to this value, and a value. CopyToReg, /// CopyFromReg - This node indicates that the input value is a virtual or /// physical register that is defined outside of the scope of this /// SelectionDAG. The register is available from the RegisterSDNode object. CopyFromReg, /// UNDEF - An undefined node. UNDEF, /// EXTRACT_ELEMENT - This is used to get the lower or upper (determined by /// a Constant, which is required to be operand #1) half of the integer or /// float value specified as operand #0. This is only for use before /// legalization, for values that will be broken into multiple registers. EXTRACT_ELEMENT, /// BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways. /// Given two values of the same integer value type, this produces a value /// twice as big. Like EXTRACT_ELEMENT, this can only be used before /// legalization. BUILD_PAIR, /// MERGE_VALUES - This node takes multiple discrete operands and returns /// them all as its individual results. This nodes has exactly the same /// number of inputs and outputs. This node is useful for some pieces of the /// code generator that want to think about a single node with multiple /// results, not multiple nodes. MERGE_VALUES, /// Simple integer binary arithmetic operators. ADD, SUB, MUL, SDIV, UDIV, SREM, UREM, /// SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing /// a signed/unsigned value of type i[2*N], and return the full value as /// two results, each of type iN. SMUL_LOHI, UMUL_LOHI, /// SDIVREM/UDIVREM - Divide two integers and produce both a quotient and /// remainder result. SDIVREM, UDIVREM, /// CARRY_FALSE - This node is used when folding other nodes, /// like ADDC/SUBC, which indicate the carry result is always false. CARRY_FALSE, /// Carry-setting nodes for multiple precision addition and subtraction. /// These nodes take two operands of the same value type, and produce two /// results. The first result is the normal add or sub result, the second /// result is the carry flag result. ADDC, SUBC, /// Carry-using nodes for multiple precision addition and subtraction. These /// nodes take three operands: The first two are the normal lhs and rhs to /// the add or sub, and the third is the input carry flag. These nodes /// produce two results; the normal result of the add or sub, and the output /// carry flag. These nodes both read and write a carry flag to allow them /// to them to be chained together for add and sub of arbitrarily large /// values. ADDE, SUBE, /// RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition. /// These nodes take two operands: the normal LHS and RHS to the add. They /// produce two results: the normal result of the add, and a boolean that /// indicates if an overflow occurred (*not* a flag, because it may be store /// to memory, etc.). If the type of the boolean is not i1 then the high /// bits conform to getBooleanContents. /// These nodes are generated from llvm.[su]add.with.overflow intrinsics. SADDO, UADDO, /// Same for subtraction. SSUBO, USUBO, /// Same for multiplication. SMULO, UMULO, /// Simple binary floating point operators. FADD, FSUB, FMUL, FDIV, FREM, /// FMA - Perform a * b + c with no intermediate rounding step. FMA, /// FMAD - Perform a * b + c, while getting the same result as the /// separately rounded operations. FMAD, /// FCOPYSIGN(X, Y) - Return the value of X with the sign of Y. NOTE: This /// DAG node does not require that X and Y have the same type, just that /// they are both floating point. X and the result must have the same type. /// FCOPYSIGN(f32, f64) is allowed. FCOPYSIGN, /// INT = FGETSIGN(FP) - Return the sign bit of the specified floating point /// value as an integer 0/1 value. FGETSIGN, /// Returns platform specific canonical encoding of a floating point number. FCANONICALIZE, /// BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the /// specified, possibly variable, elements. The number of elements is /// required to be a power of two. The types of the operands must all be /// the same and must match the vector element type, except that integer /// types are allowed to be larger than the element type, in which case /// the operands are implicitly truncated. BUILD_VECTOR, /// INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element /// at IDX replaced with VAL. If the type of VAL is larger than the vector /// element type then VAL is truncated before replacement. INSERT_VECTOR_ELT, /// EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR /// identified by the (potentially variable) element number IDX. If the /// return type is an integer type larger than the element type of the /// vector, the result is extended to the width of the return type. EXTRACT_VECTOR_ELT, /// CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of /// vector type with the same length and element type, this produces a /// concatenated vector result value, with length equal to the sum of the /// lengths of the input vectors. CONCAT_VECTORS, /// INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector /// with VECTOR2 inserted into VECTOR1 at the (potentially /// variable) element number IDX, which must be a multiple of the /// VECTOR2 vector length. The elements of VECTOR1 starting at /// IDX are overwritten with VECTOR2. Elements IDX through /// vector_length(VECTOR2) must be valid VECTOR1 indices. INSERT_SUBVECTOR, /// EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an /// vector value) starting with the element number IDX, which must be a /// constant multiple of the result vector length. EXTRACT_SUBVECTOR, /// VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as /// VEC1/VEC2. A VECTOR_SHUFFLE node also contains an array of constant int /// values that indicate which value (or undef) each result element will /// get. These constant ints are accessible through the /// ShuffleVectorSDNode class. This is quite similar to the Altivec /// 'vperm' instruction, except that the indices must be constants and are /// in terms of the element size of VEC1/VEC2, not in terms of bytes. VECTOR_SHUFFLE, /// SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a /// scalar value into element 0 of the resultant vector type. The top /// elements 1 to N-1 of the N-element vector are undefined. The type /// of the operand must match the vector element type, except when they /// are integer types. In this case the operand is allowed to be wider /// than the vector element type, and is implicitly truncated to it. SCALAR_TO_VECTOR, /// MULHU/MULHS - Multiply high - Multiply two integers of type iN, /// producing an unsigned/signed value of type i[2*N], then return the top /// part. MULHU, MULHS, /// [US]{MIN/MAX} - Binary minimum or maximum or signed or unsigned /// integers. SMIN, SMAX, UMIN, UMAX, /// Bitwise operators - logical and, logical or, logical xor. AND, OR, XOR, /// Shift and rotation operations. After legalization, the type of the /// shift amount is known to be TLI.getShiftAmountTy(). Before legalization /// the shift amount can be any type, but care must be taken to ensure it is /// large enough. TLI.getShiftAmountTy() is i8 on some targets, but before /// legalization, types like i1024 can occur and i8 doesn't have enough bits /// to represent the shift amount. /// When the 1st operand is a vector, the shift amount must be in the same /// type. (TLI.getShiftAmountTy() will return the same type when the input /// type is a vector.) SHL, SRA, SRL, ROTL, ROTR, /// Byte Swap and Counting operators. BSWAP, CTTZ, CTLZ, CTPOP, BITREVERSE, /// Bit counting operators with an undefined result for zero inputs. CTTZ_ZERO_UNDEF, CTLZ_ZERO_UNDEF, /// Select(COND, TRUEVAL, FALSEVAL). If the type of the boolean COND is not /// i1 then the high bits must conform to getBooleanContents. SELECT, /// Select with a vector condition (op #0) and two vector operands (ops #1 /// and #2), returning a vector result. All vectors have the same length. /// Much like the scalar select and setcc, each bit in the condition selects /// whether the corresponding result element is taken from op #1 or op #2. /// At first, the VSELECT condition is of vXi1 type. Later, targets may /// change the condition type in order to match the VSELECT node using a /// pattern. The condition follows the BooleanContent format of the target. VSELECT, /// Select with condition operator - This selects between a true value and /// a false value (ops #2 and #3) based on the boolean result of comparing /// the lhs and rhs (ops #0 and #1) of a conditional expression with the /// condition code in op #4, a CondCodeSDNode. SELECT_CC, /// SetCC operator - This evaluates to a true value iff the condition is /// true. If the result value type is not i1 then the high bits conform /// to getBooleanContents. The operands to this are the left and right /// operands to compare (ops #0, and #1) and the condition code to compare /// them with (op #2) as a CondCodeSDNode. If the operands are vector types /// then the result type must also be a vector type. SETCC, /// Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but /// op #2 is a *carry value*. This operator checks the result of /// "LHS - RHS - Carry", and can be used to compare two wide integers: /// (setcce lhshi rhshi (subc lhslo rhslo) cc). Only valid for integers. SETCCE, /// SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded /// integer shift operations. The operation ordering is: /// [Lo,Hi] = op [LoLHS,HiLHS], Amt SHL_PARTS, SRA_PARTS, SRL_PARTS, /// Conversion operators. These are all single input single output /// operations. For all of these, the result type must be strictly /// wider or narrower (depending on the operation) than the source /// type. /// SIGN_EXTEND - Used for integer types, replicating the sign bit /// into new bits. SIGN_EXTEND, /// ZERO_EXTEND - Used for integer types, zeroing the new bits. ZERO_EXTEND, /// ANY_EXTEND - Used for integer types. The high bits are undefined. ANY_EXTEND, /// TRUNCATE - Completely drop the high bits. TRUNCATE, /// [SU]INT_TO_FP - These operators convert integers (whose interpreted sign /// depends on the first letter) to floating point. SINT_TO_FP, UINT_TO_FP, /// SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to /// sign extend a small value in a large integer register (e.g. sign /// extending the low 8 bits of a 32-bit register to fill the top 24 bits /// with the 7th bit). The size of the smaller type is indicated by the 1th /// operand, a ValueType node. SIGN_EXTEND_INREG, /// ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an /// in-register any-extension of the low lanes of an integer vector. The /// result type must have fewer elements than the operand type, and those /// elements must be larger integer types such that the total size of the /// operand type and the result type match. Each of the low operand /// elements is any-extended into the corresponding, wider result /// elements with the high bits becoming undef. ANY_EXTEND_VECTOR_INREG, /// SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an /// in-register sign-extension of the low lanes of an integer vector. The /// result type must have fewer elements than the operand type, and those /// elements must be larger integer types such that the total size of the /// operand type and the result type match. Each of the low operand /// elements is sign-extended into the corresponding, wider result /// elements. // FIXME: The SIGN_EXTEND_INREG node isn't specifically limited to // scalars, but it also doesn't handle vectors well. Either it should be // restricted to scalars or this node (and its handling) should be merged // into it. SIGN_EXTEND_VECTOR_INREG, /// ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an /// in-register zero-extension of the low lanes of an integer vector. The /// result type must have fewer elements than the operand type, and those /// elements must be larger integer types such that the total size of the /// operand type and the result type match. Each of the low operand /// elements is zero-extended into the corresponding, wider result /// elements. ZERO_EXTEND_VECTOR_INREG, /// FP_TO_[US]INT - Convert a floating point value to a signed or unsigned /// integer. FP_TO_SINT, FP_TO_UINT, /// X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type /// down to the precision of the destination VT. TRUNC is a flag, which is /// always an integer that is zero or one. If TRUNC is 0, this is a /// normal rounding, if it is 1, this FP_ROUND is known to not change the /// value of Y. /// /// The TRUNC = 1 case is used in cases where we know that the value will /// not be modified by the node, because Y is not using any of the extra /// precision of source type. This allows certain transformations like /// FP_EXTEND(FP_ROUND(X,1)) -> X which are not safe for /// FP_EXTEND(FP_ROUND(X,0)) because the extra bits aren't removed. FP_ROUND, /// FLT_ROUNDS_ - Returns current rounding mode: /// -1 Undefined /// 0 Round to 0 /// 1 Round to nearest /// 2 Round to +inf /// 3 Round to -inf FLT_ROUNDS_, /// X = FP_ROUND_INREG(Y, VT) - This operator takes an FP register, and /// rounds it to a floating point value. It then promotes it and returns it /// in a register of the same size. This operation effectively just /// discards excess precision. The type to round down to is specified by /// the VT operand, a VTSDNode. FP_ROUND_INREG, /// X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type. FP_EXTEND, /// BITCAST - This operator converts between integer, vector and FP /// values, as if the value was stored to memory with one type and loaded /// from the same address with the other type (or equivalently for vector /// format conversions, etc). The source and result are required to have /// the same bit size (e.g. f32 <-> i32). This can also be used for /// int-to-int or fp-to-fp conversions, but that is a noop, deleted by /// getNode(). /// /// This operator is subtly different from the bitcast instruction from /// LLVM-IR since this node may change the bits in the register. For /// example, this occurs on big-endian NEON and big-endian MSA where the /// layout of the bits in the register depends on the vector type and this /// operator acts as a shuffle operation for some vector type combinations. BITCAST, /// ADDRSPACECAST - This operator converts between pointers of different /// address spaces. ADDRSPACECAST, /// CONVERT_RNDSAT - This operator is used to support various conversions /// between various types (float, signed, unsigned and vectors of those /// types) with rounding and saturation. NOTE: Avoid using this operator as /// most target don't support it and the operator might be removed in the /// future. It takes the following arguments: /// 0) value /// 1) dest type (type to convert to) /// 2) src type (type to convert from) /// 3) rounding imm /// 4) saturation imm /// 5) ISD::CvtCode indicating the type of conversion to do CONVERT_RNDSAT, /// FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions /// and truncation for half-precision (16 bit) floating numbers. These nodes /// form a semi-softened interface for dealing with f16 (as an i16), which /// is often a storage-only type but has native conversions. FP16_TO_FP, FP_TO_FP16, /// FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW, /// FLOG, FLOG2, FLOG10, FEXP, FEXP2, /// FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR - Perform various unary /// floating point operations. These are inspired by libm. FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW, FLOG, FLOG2, FLOG10, FEXP, FEXP2, FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR, /// FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two /// values. /// In the case where a single input is NaN, the non-NaN input is returned. /// /// The return value of (FMINNUM 0.0, -0.0) could be either 0.0 or -0.0. FMINNUM, FMAXNUM, /// FMINNAN/FMAXNAN - Behave identically to FMINNUM/FMAXNUM, except that /// when a single input is NaN, NaN is returned. FMINNAN, FMAXNAN, /// FSINCOS - Compute both fsin and fcos as a single operation. FSINCOS, /// LOAD and STORE have token chains as their first operand, then the same /// operands as an LLVM load/store instruction, then an offset node that /// is added / subtracted from the base pointer to form the address (for /// indexed memory ops). LOAD, STORE, /// DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned /// to a specified boundary. This node always has two return values: a new /// stack pointer value and a chain. The first operand is the token chain, /// the second is the number of bytes to allocate, and the third is the /// alignment boundary. The size is guaranteed to be a multiple of the /// stack alignment, and the alignment is guaranteed to be bigger than the /// stack alignment (if required) or 0 to get standard stack alignment. DYNAMIC_STACKALLOC, /// Control flow instructions. These all have token chains. /// BR - Unconditional branch. The first operand is the chain /// operand, the second is the MBB to branch to. BR, /// BRIND - Indirect branch. The first operand is the chain, the second /// is the value to branch to, which must be of the same type as the /// target's pointer type. BRIND, /// BR_JT - Jumptable branch. The first operand is the chain, the second /// is the jumptable index, the last one is the jumptable entry index. BR_JT, /// BRCOND - Conditional branch. The first operand is the chain, the /// second is the condition, the third is the block to branch to if the /// condition is true. If the type of the condition is not i1, then the /// high bits must conform to getBooleanContents. BRCOND, /// BR_CC - Conditional branch. The behavior is like that of SELECT_CC, in /// that the condition is represented as condition code, and two nodes to /// compare, rather than as a combined SetCC node. The operands in order /// are chain, cc, lhs, rhs, block to branch to if condition is true. BR_CC, /// INLINEASM - Represents an inline asm block. This node always has two /// return values: a chain and a flag result. The inputs are as follows: /// Operand #0 : Input chain. /// Operand #1 : a ExternalSymbolSDNode with a pointer to the asm string. /// Operand #2 : a MDNodeSDNode with the !srcloc metadata. /// Operand #3 : HasSideEffect, IsAlignStack bits. /// After this, it is followed by a list of operands with this format: /// ConstantSDNode: Flags that encode whether it is a mem or not, the /// of operands that follow, etc. See InlineAsm.h. /// ... however many operands ... /// Operand #last: Optional, an incoming flag. /// /// The variable width operands are required to represent target addressing /// modes as a single "operand", even though they may have multiple /// SDOperands. INLINEASM, /// EH_LABEL - Represents a label in mid basic block used to track /// locations needed for debug and exception handling tables. These nodes /// take a chain as input and return a chain. EH_LABEL, /// CATCHPAD - Represents a catchpad instruction. CATCHPAD, /// CATCHRET - Represents a return from a catch block funclet. Used for /// MSVC compatible exception handling. Takes a chain operand and a /// destination basic block operand. CATCHRET, /// CLEANUPRET - Represents a return from a cleanup block funclet. Used for /// MSVC compatible exception handling. Takes only a chain operand. CLEANUPRET, /// STACKSAVE - STACKSAVE has one operand, an input chain. It produces a /// value, the same type as the pointer type for the system, and an output /// chain. STACKSAVE, /// STACKRESTORE has two operands, an input chain and a pointer to restore /// to it returns an output chain. STACKRESTORE, /// CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end /// of a call sequence, and carry arbitrary information that target might /// want to know. The first operand is a chain, the rest are specified by /// the target and not touched by the DAG optimizers. /// CALLSEQ_START..CALLSEQ_END pairs may not be nested. CALLSEQ_START, // Beginning of a call sequence CALLSEQ_END, // End of a call sequence /// VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, /// and the alignment. It returns a pair of values: the vaarg value and a /// new chain. VAARG, /// VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, /// a source pointer, a SRCVALUE for the destination, and a SRCVALUE for the /// source. VACOPY, /// VAEND, VASTART - VAEND and VASTART have three operands: an input chain, /// pointer, and a SRCVALUE. VAEND, VASTART, /// SRCVALUE - This is a node type that holds a Value* that is used to /// make reference to a value in the LLVM IR. SRCVALUE, /// MDNODE_SDNODE - This is a node that holdes an MDNode*, which is used to /// reference metadata in the IR. MDNODE_SDNODE, /// PCMARKER - This corresponds to the pcmarker intrinsic. PCMARKER, /// READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic. /// It produces a chain and one i64 value. The only operand is a chain. /// If i64 is not legal, the result will be expanded into smaller values. /// Still, it returns an i64, so targets should set legality for i64. /// The result is the content of the architecture-specific cycle /// counter-like register (or other high accuracy low latency clock source). READCYCLECOUNTER, /// HANDLENODE node - Used as a handle for various purposes. HANDLENODE, /// INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic. It /// takes as input a token chain, the pointer to the trampoline, the pointer /// to the nested function, the pointer to pass for the 'nest' parameter, a /// SRCVALUE for the trampoline and another for the nested function /// (allowing targets to access the original Function*). /// It produces a token chain as output. INIT_TRAMPOLINE, /// ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic. /// It takes a pointer to the trampoline and produces a (possibly) new /// pointer to the same trampoline with platform-specific adjustments /// applied. The pointer it returns points to an executable block of code. ADJUST_TRAMPOLINE, /// TRAP - Trapping instruction TRAP, /// DEBUGTRAP - Trap intended to get the attention of a debugger. DEBUGTRAP, /// PREFETCH - This corresponds to a prefetch intrinsic. The first operand /// is the chain. The other operands are the address to prefetch, /// read / write specifier, locality specifier and instruction / data cache /// specifier. PREFETCH, /// OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) /// This corresponds to the fence instruction. It takes an input chain, and /// two integer constants: an AtomicOrdering and a SynchronizationScope. ATOMIC_FENCE, /// Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) /// This corresponds to "load atomic" instruction. ATOMIC_LOAD, /// OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) /// This corresponds to "store atomic" instruction. ATOMIC_STORE, /// Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) /// For double-word atomic operations: /// ValLo, ValHi, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmpLo, cmpHi, /// swapLo, swapHi) /// This corresponds to the cmpxchg instruction. ATOMIC_CMP_SWAP, /// Val, Success, OUTCHAIN /// = ATOMIC_CMP_SWAP_WITH_SUCCESS(INCHAIN, ptr, cmp, swap) /// N.b. this is still a strong cmpxchg operation, so /// Success == "Val == cmp". ATOMIC_CMP_SWAP_WITH_SUCCESS, /// Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) /// Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN, ptr, amt) /// For double-word atomic operations: /// ValLo, ValHi, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amtLo, amtHi) /// ValLo, ValHi, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN, ptr, amtLo, amtHi) /// These correspond to the atomicrmw instruction. ATOMIC_SWAP, ATOMIC_LOAD_ADD, ATOMIC_LOAD_SUB, ATOMIC_LOAD_AND, ATOMIC_LOAD_OR, ATOMIC_LOAD_XOR, ATOMIC_LOAD_NAND, ATOMIC_LOAD_MIN, ATOMIC_LOAD_MAX, ATOMIC_LOAD_UMIN, ATOMIC_LOAD_UMAX, // Masked load and store - consecutive vector load and store operations // with additional mask operand that prevents memory accesses to the // masked-off lanes. MLOAD, MSTORE, // Masked gather and scatter - load and store operations for a vector of // random addresses with additional mask operand that prevents memory // accesses to the masked-off lanes. MGATHER, MSCATTER, /// This corresponds to the llvm.lifetime.* intrinsics. The first operand /// is the chain and the second operand is the alloca pointer. LIFETIME_START, LIFETIME_END, /// GC_TRANSITION_START/GC_TRANSITION_END - These operators mark the /// beginning and end of GC transition sequence, and carry arbitrary /// information that target might need for lowering. The first operand is /// a chain, the rest are specified by the target and not touched by the DAG /// optimizers. GC_TRANSITION_START..GC_TRANSITION_END pairs may not be /// nested. GC_TRANSITION_START, GC_TRANSITION_END, /// GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of /// the most recent dynamic alloca. For most targets that would be 0, but /// for some others (e.g. PowerPC, PowerPC64) that would be compile-time /// known nonzero constant. The only operand here is the chain. GET_DYNAMIC_AREA_OFFSET, /// BUILTIN_OP_END - This must be the last enum value in this list. /// The target-specific pre-isel opcode values start here. BUILTIN_OP_END }; /// FIRST_TARGET_MEMORY_OPCODE - Target-specific pre-isel operations /// which do not reference a specific memory location should be less than /// this value. Those that do must not be less than this value, and can /// be used with SelectionDAG::getMemIntrinsicNode. static const int FIRST_TARGET_MEMORY_OPCODE = BUILTIN_OP_END+300; //===--------------------------------------------------------------------===// /// MemIndexedMode enum - This enum defines the load / store indexed /// addressing modes. /// /// UNINDEXED "Normal" load / store. The effective address is already /// computed and is available in the base pointer. The offset /// operand is always undefined. In addition to producing a /// chain, an unindexed load produces one value (result of the /// load); an unindexed store does not produce a value. /// /// PRE_INC Similar to the unindexed mode where the effective address is /// PRE_DEC the value of the base pointer add / subtract the offset. /// It considers the computation as being folded into the load / /// store operation (i.e. the load / store does the address /// computation as well as performing the memory transaction). /// The base operand is always undefined. In addition to /// producing a chain, pre-indexed load produces two values /// (result of the load and the result of the address /// computation); a pre-indexed store produces one value (result /// of the address computation). /// /// POST_INC The effective address is the value of the base pointer. The /// POST_DEC value of the offset operand is then added to / subtracted /// from the base after memory transaction. In addition to /// producing a chain, post-indexed load produces two values /// (the result of the load and the result of the base +/- offset /// computation); a post-indexed store produces one value (the /// the result of the base +/- offset computation). enum MemIndexedMode { UNINDEXED = 0, PRE_INC, PRE_DEC, POST_INC, POST_DEC, LAST_INDEXED_MODE }; //===--------------------------------------------------------------------===// /// LoadExtType enum - This enum defines the three variants of LOADEXT /// (load with extension). /// /// SEXTLOAD loads the integer operand and sign extends it to a larger /// integer result type. /// ZEXTLOAD loads the integer operand and zero extends it to a larger /// integer result type. /// EXTLOAD is used for two things: floating point extending loads and /// integer extending loads [the top bits are undefined]. enum LoadExtType { NON_EXTLOAD = 0, EXTLOAD, SEXTLOAD, ZEXTLOAD, LAST_LOADEXT_TYPE }; NodeType getExtForLoadExtType(bool IsFP, LoadExtType); //===--------------------------------------------------------------------===// /// ISD::CondCode enum - These are ordered carefully to make the bitfields /// below work out, when considering SETFALSE (something that never exists /// dynamically) as 0. "U" -> Unsigned (for integer operands) or Unordered /// (for floating point), "L" -> Less than, "G" -> Greater than, "E" -> Equal /// to. If the "N" column is 1, the result of the comparison is undefined if /// the input is a NAN. /// /// All of these (except for the 'always folded ops') should be handled for /// floating point. For integer, only the SETEQ,SETNE,SETLT,SETLE,SETGT, /// SETGE,SETULT,SETULE,SETUGT, and SETUGE opcodes are used. /// /// Note that these are laid out in a specific order to allow bit-twiddling /// to transform conditions. enum CondCode { // Opcode N U L G E Intuitive operation SETFALSE, // 0 0 0 0 Always false (always folded) SETOEQ, // 0 0 0 1 True if ordered and equal SETOGT, // 0 0 1 0 True if ordered and greater than SETOGE, // 0 0 1 1 True if ordered and greater than or equal SETOLT, // 0 1 0 0 True if ordered and less than SETOLE, // 0 1 0 1 True if ordered and less than or equal SETONE, // 0 1 1 0 True if ordered and operands are unequal SETO, // 0 1 1 1 True if ordered (no nans) SETUO, // 1 0 0 0 True if unordered: isnan(X) | isnan(Y) SETUEQ, // 1 0 0 1 True if unordered or equal SETUGT, // 1 0 1 0 True if unordered or greater than SETUGE, // 1 0 1 1 True if unordered, greater than, or equal SETULT, // 1 1 0 0 True if unordered or less than SETULE, // 1 1 0 1 True if unordered, less than, or equal SETUNE, // 1 1 1 0 True if unordered or not equal SETTRUE, // 1 1 1 1 Always true (always folded) // Don't care operations: undefined if the input is a nan. SETFALSE2, // 1 X 0 0 0 Always false (always folded) SETEQ, // 1 X 0 0 1 True if equal SETGT, // 1 X 0 1 0 True if greater than SETGE, // 1 X 0 1 1 True if greater than or equal SETLT, // 1 X 1 0 0 True if less than SETLE, // 1 X 1 0 1 True if less than or equal SETNE, // 1 X 1 1 0 True if not equal SETTRUE2, // 1 X 1 1 1 Always true (always folded) SETCC_INVALID // Marker value. }; /// Return true if this is a setcc instruction that performs a signed /// comparison when used with integer operands. inline bool isSignedIntSetCC(CondCode Code) { return Code == SETGT || Code == SETGE || Code == SETLT || Code == SETLE; } /// Return true if this is a setcc instruction that performs an unsigned /// comparison when used with integer operands. inline bool isUnsignedIntSetCC(CondCode Code) { return Code == SETUGT || Code == SETUGE || Code == SETULT || Code == SETULE; } /// Return true if the specified condition returns true if the two operands to /// the condition are equal. Note that if one of the two operands is a NaN, /// this value is meaningless. inline bool isTrueWhenEqual(CondCode Cond) { return ((int)Cond & 1) != 0; } /// This function returns 0 if the condition is always false if an operand is /// a NaN, 1 if the condition is always true if the operand is a NaN, and 2 if /// the condition is undefined if the operand is a NaN. inline unsigned getUnorderedFlavor(CondCode Cond) { return ((int)Cond >> 3) & 3; } /// Return the operation corresponding to !(X op Y), where 'op' is a valid /// SetCC operation. CondCode getSetCCInverse(CondCode Operation, bool isInteger); /// Return the operation corresponding to (Y op X) when given the operation /// for (X op Y). CondCode getSetCCSwappedOperands(CondCode Operation); /// Return the result of a logical OR between different comparisons of /// identical values: ((X op1 Y) | (X op2 Y)). This function returns /// SETCC_INVALID if it is not possible to represent the resultant comparison. CondCode getSetCCOrOperation(CondCode Op1, CondCode Op2, bool isInteger); /// Return the result of a logical AND between different comparisons of /// identical values: ((X op1 Y) & (X op2 Y)). This function returns /// SETCC_INVALID if it is not possible to represent the resultant comparison. CondCode getSetCCAndOperation(CondCode Op1, CondCode Op2, bool isInteger); //===--------------------------------------------------------------------===// /// This enum defines the various converts CONVERT_RNDSAT supports. enum CvtCode { CVT_FF, /// Float from Float CVT_FS, /// Float from Signed CVT_FU, /// Float from Unsigned CVT_SF, /// Signed from Float CVT_UF, /// Unsigned from Float CVT_SS, /// Signed from Signed CVT_SU, /// Signed from Unsigned CVT_US, /// Unsigned from Signed CVT_UU, /// Unsigned from Unsigned CVT_INVALID /// Marker - Invalid opcode }; } // end llvm::ISD namespace } // end llvm namespace #endif Index: projects/clang390-import/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- projects/clang390-import/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp (revision 305682) +++ projects/clang390-import/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp (revision 305683) @@ -1,4422 +1,4438 @@ //===-- LegalizeDAG.cpp - Implement SelectionDAG::Legalize ----------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file implements the SelectionDAG::Legalize method. // //===----------------------------------------------------------------------===// #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Triple.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "legalizedag" namespace { struct FloatSignAsInt; //===----------------------------------------------------------------------===// /// This takes an arbitrary SelectionDAG as input and /// hacks on it until the target machine can handle it. This involves /// eliminating value sizes the machine cannot handle (promoting small sizes to /// large sizes or splitting up large values into small values) as well as /// eliminating operations the machine cannot handle. /// /// This code also does a small amount of optimization and recognition of idioms /// as part of its processing. For example, if a target does not support a /// 'setcc' instruction efficiently, but does support 'brcc' instruction, this /// will attempt merge setcc and brc instructions into brcc's. /// class SelectionDAGLegalize { const TargetMachine &TM; const TargetLowering &TLI; SelectionDAG &DAG; /// \brief The set of nodes which have already been legalized. We hold a /// reference to it in order to update as necessary on node deletion. SmallPtrSetImpl &LegalizedNodes; /// \brief A set of all the nodes updated during legalization. SmallSetVector *UpdatedNodes; EVT getSetCCResultType(EVT VT) const { return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); } // Libcall insertion helpers. public: SelectionDAGLegalize(SelectionDAG &DAG, SmallPtrSetImpl &LegalizedNodes, SmallSetVector *UpdatedNodes = nullptr) : TM(DAG.getTarget()), TLI(DAG.getTargetLoweringInfo()), DAG(DAG), LegalizedNodes(LegalizedNodes), UpdatedNodes(UpdatedNodes) {} /// \brief Legalizes the given operation. void LegalizeOp(SDNode *Node); private: SDValue OptimizeFloatStore(StoreSDNode *ST); void LegalizeLoadOps(SDNode *Node); void LegalizeStoreOps(SDNode *Node); /// Some targets cannot handle a variable /// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it /// is necessary to spill the vector being inserted into to memory, perform /// the insert there, and then read the result back. SDValue PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, const SDLoc &dl); SDValue ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, const SDLoc &dl); /// Return a vector shuffle operation which /// performs the same shuffe in terms of order or result bytes, but on a type /// whose vector element type is narrower than the original shuffle type. /// e.g. <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef Mask) const; bool LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, bool &NeedInvert, const SDLoc &dl); SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); SDValue ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops, unsigned NumOps, bool isSigned, const SDLoc &dl); std::pair ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128, RTLIB::Libcall Call_PPCF128); SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, RTLIB::Libcall Call_I8, RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32, RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128); void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl &Results); void ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl &Results); SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, const SDLoc &dl); SDValue ExpandBUILD_VECTOR(SDNode *Node); SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node); void ExpandDYNAMIC_STACKALLOC(SDNode *Node, SmallVectorImpl &Results); void getSignAsIntValue(FloatSignAsInt &State, const SDLoc &DL, SDValue Value) const; SDValue modifySignAsInt(const FloatSignAsInt &State, const SDLoc &DL, SDValue NewIntValue) const; SDValue ExpandFCOPYSIGN(SDNode *Node) const; SDValue ExpandFABS(SDNode *Node) const; SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, EVT DestVT, const SDLoc &dl); SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned, const SDLoc &dl); SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned, const SDLoc &dl); SDValue ExpandBITREVERSE(SDValue Op, const SDLoc &dl); SDValue ExpandBSWAP(SDValue Op, const SDLoc &dl); SDValue ExpandBitCount(unsigned Opc, SDValue Op, const SDLoc &dl); SDValue ExpandExtractFromVectorThroughStack(SDValue Op); SDValue ExpandInsertToVectorThroughStack(SDValue Op); SDValue ExpandVectorBuildThroughStack(SDNode* Node); SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP); SDValue ExpandConstant(ConstantSDNode *CP); // if ExpandNode returns false, LegalizeOp falls back to ConvertNodeToLibcall bool ExpandNode(SDNode *Node); void ConvertNodeToLibcall(SDNode *Node); void PromoteNode(SDNode *Node); public: // Node replacement helpers void ReplacedNode(SDNode *N) { LegalizedNodes.erase(N); if (UpdatedNodes) UpdatedNodes->insert(N); } void ReplaceNode(SDNode *Old, SDNode *New) { DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG); dbgs() << " with: "; New->dump(&DAG)); assert(Old->getNumValues() == New->getNumValues() && "Replacing one node with another that produces a different number " "of values!"); DAG.ReplaceAllUsesWith(Old, New); if (UpdatedNodes) UpdatedNodes->insert(New); ReplacedNode(Old); } void ReplaceNode(SDValue Old, SDValue New) { DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG); dbgs() << " with: "; New->dump(&DAG)); DAG.ReplaceAllUsesWith(Old, New); if (UpdatedNodes) UpdatedNodes->insert(New.getNode()); ReplacedNode(Old.getNode()); } void ReplaceNode(SDNode *Old, const SDValue *New) { DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG)); DAG.ReplaceAllUsesWith(Old, New); for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) { DEBUG(dbgs() << (i == 0 ? " with: " : " and: "); New[i]->dump(&DAG)); if (UpdatedNodes) UpdatedNodes->insert(New[i].getNode()); } ReplacedNode(Old); } }; } /// Return a vector shuffle operation which /// performs the same shuffe in terms of order or result bytes, but on a type /// whose vector element type is narrower than the original shuffle type. /// e.g. <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> SDValue SelectionDAGLegalize::ShuffleWithNarrowerEltType( EVT NVT, EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef Mask) const { unsigned NumMaskElts = VT.getVectorNumElements(); unsigned NumDestElts = NVT.getVectorNumElements(); unsigned NumEltsGrowth = NumDestElts / NumMaskElts; assert(NumEltsGrowth && "Cannot promote to vector type with fewer elts!"); if (NumEltsGrowth == 1) return DAG.getVectorShuffle(NVT, dl, N1, N2, Mask); SmallVector NewMask; for (unsigned i = 0; i != NumMaskElts; ++i) { int Idx = Mask[i]; for (unsigned j = 0; j != NumEltsGrowth; ++j) { if (Idx < 0) NewMask.push_back(-1); else NewMask.push_back(Idx * NumEltsGrowth + j); } } assert(NewMask.size() == NumDestElts && "Non-integer NumEltsGrowth?"); assert(TLI.isShuffleMaskLegal(NewMask, NVT) && "Shuffle not legal?"); return DAG.getVectorShuffle(NVT, dl, N1, N2, NewMask); } /// Expands the ConstantFP node to an integer constant or /// a load from the constant pool. SDValue SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { bool Extend = false; SDLoc dl(CFP); // If a FP immediate is precise when represented as a float and if the // target can do an extending load from float to double, we put it into // the constant pool as a float, even if it's is statically typed as a // double. This shrinks FP constants and canonicalizes them for targets where // an FP extending load is the same cost as a normal load (such as on the x87 // fp stack or PPC FP unit). EVT VT = CFP->getValueType(0); ConstantFP *LLVMC = const_cast(CFP->getConstantFPValue()); if (!UseCP) { assert((VT == MVT::f64 || VT == MVT::f32) && "Invalid type expansion"); return DAG.getConstant(LLVMC->getValueAPF().bitcastToAPInt(), dl, (VT == MVT::f64) ? MVT::i64 : MVT::i32); } EVT OrigVT = VT; EVT SVT = VT; while (SVT != MVT::f32 && SVT != MVT::f16) { SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1); if (ConstantFPSDNode::isValueValidForType(SVT, CFP->getValueAPF()) && // Only do this if the target has a native EXTLOAD instruction from // smaller type. TLI.isLoadExtLegal(ISD::EXTLOAD, OrigVT, SVT) && TLI.ShouldShrinkFPConstant(OrigVT)) { Type *SType = SVT.getTypeForEVT(*DAG.getContext()); LLVMC = cast(ConstantExpr::getFPTrunc(LLVMC, SType)); VT = SVT; Extend = true; } } SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy(DAG.getDataLayout())); unsigned Alignment = cast(CPIdx)->getAlignment(); if (Extend) { SDValue Result = DAG.getExtLoad( ISD::EXTLOAD, dl, OrigVT, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), VT, Alignment); return Result; } SDValue Result = DAG.getLoad( OrigVT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Alignment); return Result; } /// Expands the Constant node to a load from the constant pool. SDValue SelectionDAGLegalize::ExpandConstant(ConstantSDNode *CP) { SDLoc dl(CP); EVT VT = CP->getValueType(0); SDValue CPIdx = DAG.getConstantPool(CP->getConstantIntValue(), TLI.getPointerTy(DAG.getDataLayout())); unsigned Alignment = cast(CPIdx)->getAlignment(); SDValue Result = DAG.getLoad( VT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Alignment); return Result; } /// Some target cannot handle a variable insertion index for the /// INSERT_VECTOR_ELT instruction. In this case, it /// is necessary to spill the vector being inserted into to memory, perform /// the insert there, and then read the result back. SDValue SelectionDAGLegalize::PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, const SDLoc &dl) { SDValue Tmp1 = Vec; SDValue Tmp2 = Val; SDValue Tmp3 = Idx; // If the target doesn't support this, we have to spill the input vector // to a temporary stack slot, update the element, then reload it. This is // badness. We could also load the value into a vector register (either // with a "move to register" or "extload into register" instruction, then // permute it into place, if the idx is a constant and if the idx is // supported by the target. EVT VT = Tmp1.getValueType(); EVT EltVT = VT.getVectorElementType(); EVT IdxVT = Tmp3.getValueType(); EVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); SDValue StackPtr = DAG.CreateStackTemporary(VT); int SPFI = cast(StackPtr.getNode())->getIndex(); // Store the vector. SDValue Ch = DAG.getStore( DAG.getEntryNode(), dl, Tmp1, StackPtr, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI)); // Truncate or zero extend offset to target pointer type. Tmp3 = DAG.getZExtOrTrunc(Tmp3, dl, PtrVT); // Add the offset to the index. unsigned EltSize = EltVT.getSizeInBits()/8; Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3, DAG.getConstant(EltSize, dl, IdxVT)); SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr); // Store the scalar value. Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT); // Load the updated vector. return DAG.getLoad(VT, dl, Ch, StackPtr, MachinePointerInfo::getFixedStack( DAG.getMachineFunction(), SPFI)); } SDValue SelectionDAGLegalize::ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, const SDLoc &dl) { if (ConstantSDNode *InsertPos = dyn_cast(Idx)) { // SCALAR_TO_VECTOR requires that the type of the value being inserted // match the element type of the vector being created, except for // integers in which case the inserted value can be over width. EVT EltVT = Vec.getValueType().getVectorElementType(); if (Val.getValueType() == EltVT || (EltVT.isInteger() && Val.getValueType().bitsGE(EltVT))) { SDValue ScVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, Vec.getValueType(), Val); unsigned NumElts = Vec.getValueType().getVectorNumElements(); // We generate a shuffle of InVec and ScVec, so the shuffle mask // should be 0,1,2,3,4,5... with the appropriate element replaced with // elt 0 of the RHS. SmallVector ShufOps; for (unsigned i = 0; i != NumElts; ++i) ShufOps.push_back(i != InsertPos->getZExtValue() ? i : NumElts); return DAG.getVectorShuffle(Vec.getValueType(), dl, Vec, ScVec, ShufOps); } } return PerformInsertVectorEltInMemory(Vec, Val, Idx, dl); } SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' // FIXME: We shouldn't do this for TargetConstantFP's. // FIXME: move this to the DAG Combiner! Note that we can't regress due // to phase ordering between legalized code and the dag combiner. This // probably means that we need to integrate dag combiner and legalizer // together. // We generally can't do this one for long doubles. SDValue Chain = ST->getChain(); SDValue Ptr = ST->getBasePtr(); unsigned Alignment = ST->getAlignment(); MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); AAMDNodes AAInfo = ST->getAAInfo(); SDLoc dl(ST); if (ConstantFPSDNode *CFP = dyn_cast(ST->getValue())) { if (CFP->getValueType(0) == MVT::f32 && TLI.isTypeLegal(MVT::i32)) { SDValue Con = DAG.getConstant(CFP->getValueAPF(). bitcastToAPInt().zextOrTrunc(32), SDLoc(CFP), MVT::i32); return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), Alignment, MMOFlags, AAInfo); } if (CFP->getValueType(0) == MVT::f64) { // If this target supports 64-bit registers, do a single 64-bit store. if (TLI.isTypeLegal(MVT::i64)) { SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). zextOrTrunc(64), SDLoc(CFP), MVT::i64); return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), Alignment, MMOFlags, AAInfo); } if (TLI.isTypeLegal(MVT::i32) && !ST->isVolatile()) { // Otherwise, if the target supports 32-bit registers, use 2 32-bit // stores. If the target supports neither 32- nor 64-bits, this // xform is certainly not worth it. const APInt &IntVal = CFP->getValueAPF().bitcastToAPInt(); SDValue Lo = DAG.getConstant(IntVal.trunc(32), dl, MVT::i32); SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), dl, MVT::i32); if (DAG.getDataLayout().isBigEndian()) std::swap(Lo, Hi); Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), Alignment, MMOFlags, AAInfo); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(4, dl, Ptr.getValueType())); Hi = DAG.getStore(Chain, dl, Hi, Ptr, ST->getPointerInfo().getWithOffset(4), MinAlign(Alignment, 4U), MMOFlags, AAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } } } return SDValue(nullptr, 0); } void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { StoreSDNode *ST = cast(Node); SDValue Chain = ST->getChain(); SDValue Ptr = ST->getBasePtr(); SDLoc dl(Node); unsigned Alignment = ST->getAlignment(); MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); AAMDNodes AAInfo = ST->getAAInfo(); if (!ST->isTruncatingStore()) { if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { ReplaceNode(ST, OptStore); return; } { SDValue Value = ST->getValue(); MVT VT = Value.getSimpleValueType(); switch (TLI.getOperationAction(ISD::STORE, VT)) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: { // If this is an unaligned store and the target doesn't support it, // expand it. EVT MemVT = ST->getMemoryVT(); unsigned AS = ST->getAddressSpace(); unsigned Align = ST->getAlignment(); const DataLayout &DL = DAG.getDataLayout(); if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) { SDValue Result = TLI.expandUnalignedStore(ST, DAG); ReplaceNode(SDValue(ST, 0), Result); } break; } case TargetLowering::Custom: { SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); if (Res && Res != SDValue(Node, 0)) ReplaceNode(SDValue(Node, 0), Res); return; } case TargetLowering::Promote: { MVT NVT = TLI.getTypeToPromoteTo(ISD::STORE, VT); assert(NVT.getSizeInBits() == VT.getSizeInBits() && "Can only promote stores to same size type"); Value = DAG.getNode(ISD::BITCAST, dl, NVT, Value); SDValue Result = DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), Alignment, MMOFlags, AAInfo); ReplaceNode(SDValue(Node, 0), Result); break; } } return; } } else { SDValue Value = ST->getValue(); EVT StVT = ST->getMemoryVT(); unsigned StWidth = StVT.getSizeInBits(); auto &DL = DAG.getDataLayout(); if (StWidth != StVT.getStoreSizeInBits()) { // Promote to a byte-sized store with upper bits zero if not // storing an integral number of bytes. For example, promote // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1) EVT NVT = EVT::getIntegerVT(*DAG.getContext(), StVT.getStoreSizeInBits()); Value = DAG.getZeroExtendInReg(Value, dl, StVT); SDValue Result = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), NVT, Alignment, MMOFlags, AAInfo); ReplaceNode(SDValue(Node, 0), Result); } else if (StWidth & (StWidth - 1)) { // If not storing a power-of-2 number of bits, expand as two stores. assert(!StVT.isVector() && "Unsupported truncstore!"); unsigned RoundWidth = 1 << Log2_32(StWidth); assert(RoundWidth < StWidth); unsigned ExtraWidth = StWidth - RoundWidth; assert(ExtraWidth < RoundWidth); assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && "Store size not an integral number of bytes!"); EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth); EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth); SDValue Lo, Hi; unsigned IncrementSize; if (DL.isLittleEndian()) { // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16) // Store the bottom RoundWidth bits. Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), RoundVT, Alignment, MMOFlags, AAInfo); // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); Hi = DAG.getNode( ISD::SRL, dl, Value.getValueType(), Value, DAG.getConstant(RoundWidth, dl, TLI.getShiftAmountTy(Value.getValueType(), DL))); Hi = DAG.getTruncStore( Chain, dl, Hi, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); } else { // Big endian - avoid unaligned stores. // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X // Store the top RoundWidth bits. Hi = DAG.getNode( ISD::SRL, dl, Value.getValueType(), Value, DAG.getConstant(ExtraWidth, dl, TLI.getShiftAmountTy(Value.getValueType(), DL))); Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(), RoundVT, Alignment, MMOFlags, AAInfo); // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); Lo = DAG.getTruncStore( Chain, dl, Value, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); } // The order of the stores doesn't matter. SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); ReplaceNode(SDValue(Node, 0), Result); } else { switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: { EVT MemVT = ST->getMemoryVT(); unsigned AS = ST->getAddressSpace(); unsigned Align = ST->getAlignment(); // If this is an unaligned store and the target doesn't support it, // expand it. if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) { SDValue Result = TLI.expandUnalignedStore(ST, DAG); ReplaceNode(SDValue(ST, 0), Result); } break; } case TargetLowering::Custom: { SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); if (Res && Res != SDValue(Node, 0)) ReplaceNode(SDValue(Node, 0), Res); return; } case TargetLowering::Expand: assert(!StVT.isVector() && "Vector Stores are handled in LegalizeVectorOps"); // TRUNCSTORE:i16 i32 -> STORE i16 assert(TLI.isTypeLegal(StVT) && "Do not know how to expand this store!"); Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value); SDValue Result = DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), Alignment, MMOFlags, AAInfo); ReplaceNode(SDValue(Node, 0), Result); break; } } } } void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { LoadSDNode *LD = cast(Node); SDValue Chain = LD->getChain(); // The chain. SDValue Ptr = LD->getBasePtr(); // The base pointer. SDValue Value; // The value returned by the load op. SDLoc dl(Node); ISD::LoadExtType ExtType = LD->getExtensionType(); if (ExtType == ISD::NON_EXTLOAD) { MVT VT = Node->getSimpleValueType(0); SDValue RVal = SDValue(Node, 0); SDValue RChain = SDValue(Node, 1); switch (TLI.getOperationAction(Node->getOpcode(), VT)) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: { EVT MemVT = LD->getMemoryVT(); unsigned AS = LD->getAddressSpace(); unsigned Align = LD->getAlignment(); const DataLayout &DL = DAG.getDataLayout(); // If this is an unaligned load and the target doesn't support it, // expand it. if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) { std::tie(RVal, RChain) = TLI.expandUnalignedLoad(LD, DAG); } break; } case TargetLowering::Custom: { if (SDValue Res = TLI.LowerOperation(RVal, DAG)) { RVal = Res; RChain = Res.getValue(1); } break; } case TargetLowering::Promote: { MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); assert(NVT.getSizeInBits() == VT.getSizeInBits() && "Can only promote loads to same size type"); SDValue Res = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getMemOperand()); RVal = DAG.getNode(ISD::BITCAST, dl, VT, Res); RChain = Res.getValue(1); break; } } if (RChain.getNode() != Node) { assert(RVal.getNode() != Node && "Load must be completely replaced"); DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), RVal); DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), RChain); if (UpdatedNodes) { UpdatedNodes->insert(RVal.getNode()); UpdatedNodes->insert(RChain.getNode()); } ReplacedNode(Node); } return; } EVT SrcVT = LD->getMemoryVT(); unsigned SrcWidth = SrcVT.getSizeInBits(); unsigned Alignment = LD->getAlignment(); MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); AAMDNodes AAInfo = LD->getAAInfo(); if (SrcWidth != SrcVT.getStoreSizeInBits() && // Some targets pretend to have an i1 loading operation, and actually // load an i8. This trick is correct for ZEXTLOAD because the top 7 // bits are guaranteed to be zero; it helps the optimizers understand // that these bits are zero. It is also useful for EXTLOAD, since it // tells the optimizers that those bits are undefined. It would be // nice to have an effective generic way of getting these benefits... // Until such a way is found, don't insist on promoting i1 here. (SrcVT != MVT::i1 || TLI.getLoadExtAction(ExtType, Node->getValueType(0), MVT::i1) == TargetLowering::Promote)) { // Promote to a byte-sized load if not loading an integral number of // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24. unsigned NewWidth = SrcVT.getStoreSizeInBits(); EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth); SDValue Ch; // The extra bits are guaranteed to be zero, since we stored them that // way. A zext load from NVT thus automatically gives zext from SrcVT. ISD::LoadExtType NewExtType = ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD; SDValue Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo(), NVT, Alignment, MMOFlags, AAInfo); Ch = Result.getValue(1); // The chain. if (ExtType == ISD::SEXTLOAD) // Having the top bits zero doesn't help when sign extending. Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Result.getValueType(), Result, DAG.getValueType(SrcVT)); else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType()) // All the top bits are guaranteed to be zero - inform the optimizers. Result = DAG.getNode(ISD::AssertZext, dl, Result.getValueType(), Result, DAG.getValueType(SrcVT)); Value = Result; Chain = Ch; } else if (SrcWidth & (SrcWidth - 1)) { // If not loading a power-of-2 number of bits, expand as two loads. assert(!SrcVT.isVector() && "Unsupported extload!"); unsigned RoundWidth = 1 << Log2_32(SrcWidth); assert(RoundWidth < SrcWidth); unsigned ExtraWidth = SrcWidth - RoundWidth; assert(ExtraWidth < RoundWidth); assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && "Load size not an integral number of bytes!"); EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth); EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth); SDValue Lo, Hi, Ch; unsigned IncrementSize; auto &DL = DAG.getDataLayout(); if (DL.isLittleEndian()) { // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16) // Load the bottom RoundWidth bits. Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo(), RoundVT, Alignment, MMOFlags, AAInfo); // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); // Build a factor node to remember that this load is independent of // the other one. Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), Hi.getValue(1)); // Move the top bits to the right place. Hi = DAG.getNode( ISD::SHL, dl, Hi.getValueType(), Hi, DAG.getConstant(RoundWidth, dl, TLI.getShiftAmountTy(Hi.getValueType(), DL))); // Join the hi and lo parts. Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); } else { // Big endian - avoid unaligned loads. // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8 // Load the top RoundWidth bits. Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo(), RoundVT, Alignment, MMOFlags, AAInfo); // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); // Build a factor node to remember that this load is independent of // the other one. Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), Hi.getValue(1)); // Move the top bits to the right place. Hi = DAG.getNode( ISD::SHL, dl, Hi.getValueType(), Hi, DAG.getConstant(ExtraWidth, dl, TLI.getShiftAmountTy(Hi.getValueType(), DL))); // Join the hi and lo parts. Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); } Chain = Ch; } else { bool isCustom = false; switch (TLI.getLoadExtAction(ExtType, Node->getValueType(0), SrcVT.getSimpleVT())) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Custom: isCustom = true; // FALLTHROUGH case TargetLowering::Legal: { Value = SDValue(Node, 0); Chain = SDValue(Node, 1); if (isCustom) { if (SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG)) { Value = Res; Chain = Res.getValue(1); } } else { // If this is an unaligned load and the target doesn't support it, // expand it. EVT MemVT = LD->getMemoryVT(); unsigned AS = LD->getAddressSpace(); unsigned Align = LD->getAlignment(); const DataLayout &DL = DAG.getDataLayout(); if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) { std::tie(Value, Chain) = TLI.expandUnalignedLoad(LD, DAG); } } break; } case TargetLowering::Expand: EVT DestVT = Node->getValueType(0); if (!TLI.isLoadExtLegal(ISD::EXTLOAD, DestVT, SrcVT)) { // If the source type is not legal, see if there is a legal extload to // an intermediate type that we can then extend further. EVT LoadVT = TLI.getRegisterType(SrcVT.getSimpleVT()); if (TLI.isTypeLegal(SrcVT) || // Same as SrcVT == LoadVT? TLI.isLoadExtLegal(ExtType, LoadVT, SrcVT)) { // If we are loading a legal type, this is a non-extload followed by a // full extend. ISD::LoadExtType MidExtType = (LoadVT == SrcVT) ? ISD::NON_EXTLOAD : ExtType; SDValue Load = DAG.getExtLoad(MidExtType, dl, LoadVT, Chain, Ptr, SrcVT, LD->getMemOperand()); unsigned ExtendOp = ISD::getExtForLoadExtType(SrcVT.isFloatingPoint(), ExtType); Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); Chain = Load.getValue(1); break; } // Handle the special case of fp16 extloads. EXTLOAD doesn't have the // normal undefined upper bits behavior to allow using an in-reg extend // with the illegal FP type, so load as an integer and do the // from-integer conversion. if (SrcVT.getScalarType() == MVT::f16) { EVT ISrcVT = SrcVT.changeTypeToInteger(); EVT IDestVT = DestVT.changeTypeToInteger(); EVT LoadVT = TLI.getRegisterType(IDestVT.getSimpleVT()); SDValue Result = DAG.getExtLoad(ISD::ZEXTLOAD, dl, LoadVT, Chain, Ptr, ISrcVT, LD->getMemOperand()); Value = DAG.getNode(ISD::FP16_TO_FP, dl, DestVT, Result); Chain = Result.getValue(1); break; } } assert(!SrcVT.isVector() && "Vector Loads are handled in LegalizeVectorOps"); // FIXME: This does not work for vectors on most targets. Sign- // and zero-extend operations are currently folded into extending // loads, whether they are legal or not, and then we end up here // without any support for legalizing them. assert(ExtType != ISD::EXTLOAD && "EXTLOAD should always be supported!"); // Turn the unsupported load into an EXTLOAD followed by an // explicit zero/sign extend inreg. SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), Chain, Ptr, SrcVT, LD->getMemOperand()); SDValue ValRes; if (ExtType == ISD::SEXTLOAD) ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Result.getValueType(), Result, DAG.getValueType(SrcVT)); else ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType()); Value = ValRes; Chain = Result.getValue(1); break; } } // Since loads produce two values, make sure to remember that we legalized // both of them. if (Chain.getNode() != Node) { assert(Value.getNode() != Node && "Load must be completely replaced"); DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Value); DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain); if (UpdatedNodes) { UpdatedNodes->insert(Value.getNode()); UpdatedNodes->insert(Chain.getNode()); } ReplacedNode(Node); } } /// Return a legal replacement for the given operation, with all legal operands. void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { DEBUG(dbgs() << "\nLegalizing: "; Node->dump(&DAG)); if (Node->getOpcode() == ISD::TargetConstant) // Allow illegal target nodes. return; #ifndef NDEBUG for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) assert((TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) == TargetLowering::TypeLegal || TLI.isTypeLegal(Node->getValueType(i))) && "Unexpected illegal type!"); for (const SDValue &Op : Node->op_values()) assert((TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) == TargetLowering::TypeLegal || TLI.isTypeLegal(Op.getValueType()) || Op.getOpcode() == ISD::TargetConstant) && "Unexpected illegal type!"); #endif // Figure out the correct action; the way to query this varies by opcode TargetLowering::LegalizeAction Action = TargetLowering::Legal; bool SimpleFinishLegalizing = true; switch (Node->getOpcode()) { case ISD::INTRINSIC_W_CHAIN: case ISD::INTRINSIC_WO_CHAIN: case ISD::INTRINSIC_VOID: case ISD::STACKSAVE: Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other); break; case ISD::GET_DYNAMIC_AREA_OFFSET: Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); break; case ISD::VAARG: Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); if (Action != TargetLowering::Promote) Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other); break; case ISD::FP_TO_FP16: case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: case ISD::EXTRACT_VECTOR_ELT: Action = TLI.getOperationAction(Node->getOpcode(), Node->getOperand(0).getValueType()); break; case ISD::FP_ROUND_INREG: case ISD::SIGN_EXTEND_INREG: { EVT InnerType = cast(Node->getOperand(1))->getVT(); Action = TLI.getOperationAction(Node->getOpcode(), InnerType); break; } case ISD::ATOMIC_STORE: { Action = TLI.getOperationAction(Node->getOpcode(), Node->getOperand(2).getValueType()); break; } case ISD::SELECT_CC: case ISD::SETCC: case ISD::BR_CC: { unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 : Node->getOpcode() == ISD::SETCC ? 2 : Node->getOpcode() == ISD::SETCCE ? 3 : 1; unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0; MVT OpVT = Node->getOperand(CompareOperand).getSimpleValueType(); ISD::CondCode CCCode = cast(Node->getOperand(CCOperand))->get(); Action = TLI.getCondCodeAction(CCCode, OpVT); if (Action == TargetLowering::Legal) { if (Node->getOpcode() == ISD::SELECT_CC) Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); else Action = TLI.getOperationAction(Node->getOpcode(), OpVT); } break; } case ISD::LOAD: case ISD::STORE: // FIXME: Model these properly. LOAD and STORE are complicated, and // STORE expects the unlegalized operand in some cases. SimpleFinishLegalizing = false; break; case ISD::CALLSEQ_START: case ISD::CALLSEQ_END: // FIXME: This shouldn't be necessary. These nodes have special properties // dealing with the recursive nature of legalization. Removing this // special case should be done as part of making LegalizeDAG non-recursive. SimpleFinishLegalizing = false; break; case ISD::EXTRACT_ELEMENT: case ISD::FLT_ROUNDS_: case ISD::FPOWI: case ISD::MERGE_VALUES: case ISD::EH_RETURN: case ISD::FRAME_TO_ARGS_OFFSET: + case ISD::EH_DWARF_CFA: case ISD::EH_SJLJ_SETJMP: case ISD::EH_SJLJ_LONGJMP: case ISD::EH_SJLJ_SETUP_DISPATCH: // These operations lie about being legal: when they claim to be legal, // they should actually be expanded. Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); if (Action == TargetLowering::Legal) Action = TargetLowering::Expand; break; case ISD::INIT_TRAMPOLINE: case ISD::ADJUST_TRAMPOLINE: case ISD::FRAMEADDR: case ISD::RETURNADDR: // These operations lie about being legal: when they claim to be legal, // they should actually be custom-lowered. Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); if (Action == TargetLowering::Legal) Action = TargetLowering::Custom; break; case ISD::READCYCLECOUNTER: // READCYCLECOUNTER returns an i64, even if type legalization might have // expanded that to several smaller types. Action = TLI.getOperationAction(Node->getOpcode(), MVT::i64); break; case ISD::READ_REGISTER: case ISD::WRITE_REGISTER: // Named register is legal in the DAG, but blocked by register name // selection if not implemented by target (to chose the correct register) // They'll be converted to Copy(To/From)Reg. Action = TargetLowering::Legal; break; case ISD::DEBUGTRAP: Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); if (Action == TargetLowering::Expand) { // replace ISD::DEBUGTRAP with ISD::TRAP SDValue NewVal; NewVal = DAG.getNode(ISD::TRAP, SDLoc(Node), Node->getVTList(), Node->getOperand(0)); ReplaceNode(Node, NewVal.getNode()); LegalizeOp(NewVal.getNode()); return; } break; default: if (Node->getOpcode() >= ISD::BUILTIN_OP_END) { Action = TargetLowering::Legal; } else { Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); } break; } if (SimpleFinishLegalizing) { SDNode *NewNode = Node; switch (Node->getOpcode()) { default: break; case ISD::SHL: case ISD::SRL: case ISD::SRA: case ISD::ROTL: case ISD::ROTR: // Legalizing shifts/rotates requires adjusting the shift amount // to the appropriate width. if (!Node->getOperand(1).getValueType().isVector()) { SDValue SAO = DAG.getShiftAmountOperand(Node->getOperand(0).getValueType(), Node->getOperand(1)); HandleSDNode Handle(SAO); LegalizeOp(SAO.getNode()); NewNode = DAG.UpdateNodeOperands(Node, Node->getOperand(0), Handle.getValue()); } break; case ISD::SRL_PARTS: case ISD::SRA_PARTS: case ISD::SHL_PARTS: // Legalizing shifts/rotates requires adjusting the shift amount // to the appropriate width. if (!Node->getOperand(2).getValueType().isVector()) { SDValue SAO = DAG.getShiftAmountOperand(Node->getOperand(0).getValueType(), Node->getOperand(2)); HandleSDNode Handle(SAO); LegalizeOp(SAO.getNode()); NewNode = DAG.UpdateNodeOperands(Node, Node->getOperand(0), Node->getOperand(1), Handle.getValue()); } break; } if (NewNode != Node) { ReplaceNode(Node, NewNode); Node = NewNode; } switch (Action) { case TargetLowering::Legal: return; case TargetLowering::Custom: { // FIXME: The handling for custom lowering with multiple results is // a complete mess. if (SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG)) { if (!(Res.getNode() != Node || Res.getResNo() != 0)) return; if (Node->getNumValues() == 1) { // We can just directly replace this node with the lowered value. ReplaceNode(SDValue(Node, 0), Res); return; } SmallVector ResultVals; for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) ResultVals.push_back(Res.getValue(i)); ReplaceNode(Node, ResultVals.data()); return; } } // FALL THROUGH case TargetLowering::Expand: if (ExpandNode(Node)) return; // FALL THROUGH case TargetLowering::LibCall: ConvertNodeToLibcall(Node); return; case TargetLowering::Promote: PromoteNode(Node); return; } } switch (Node->getOpcode()) { default: #ifndef NDEBUG dbgs() << "NODE: "; Node->dump( &DAG); dbgs() << "\n"; #endif llvm_unreachable("Do not know how to legalize this operator!"); case ISD::CALLSEQ_START: case ISD::CALLSEQ_END: break; case ISD::LOAD: { return LegalizeLoadOps(Node); } case ISD::STORE: { return LegalizeStoreOps(Node); } } } SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { SDValue Vec = Op.getOperand(0); SDValue Idx = Op.getOperand(1); SDLoc dl(Op); // Before we generate a new store to a temporary stack slot, see if there is // already one that we can use. There often is because when we scalarize // vector operations (using SelectionDAG::UnrollVectorOp for example) a whole // series of EXTRACT_VECTOR_ELT nodes are generated, one for each element in // the vector. If all are expanded here, we don't want one store per vector // element. // Caches for hasPredecessorHelper SmallPtrSet Visited; SmallVector Worklist; Worklist.push_back(Idx.getNode()); SDValue StackPtr, Ch; for (SDNode::use_iterator UI = Vec.getNode()->use_begin(), UE = Vec.getNode()->use_end(); UI != UE; ++UI) { SDNode *User = *UI; if (StoreSDNode *ST = dyn_cast(User)) { if (ST->isIndexed() || ST->isTruncatingStore() || ST->getValue() != Vec) continue; // Make sure that nothing else could have stored into the destination of // this store. if (!ST->getChain().reachesChainWithoutSideEffects(DAG.getEntryNode())) continue; // If the index is dependent on the store we will introduce a cycle when // creating the load (the load uses the index, and by replacing the chain // we will make the index dependent on the load). if (SDNode::hasPredecessorHelper(ST, Visited, Worklist)) continue; StackPtr = ST->getBasePtr(); Ch = SDValue(ST, 0); break; } } if (!Ch.getNode()) { // Store the value to a temporary stack slot, then LOAD the returned part. StackPtr = DAG.CreateStackTemporary(Vec.getValueType()); Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo()); } // Add the offset to the index. unsigned EltSize = Vec.getValueType().getVectorElementType().getSizeInBits()/8; Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx, DAG.getConstant(EltSize, SDLoc(Vec), Idx.getValueType())); Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy(DAG.getDataLayout())); StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr); SDValue NewLoad; if (Op.getValueType().isVector()) NewLoad = DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, MachinePointerInfo()); else NewLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, MachinePointerInfo(), Vec.getValueType().getVectorElementType()); // Replace the chain going out of the store, by the one out of the load. DAG.ReplaceAllUsesOfValueWith(Ch, SDValue(NewLoad.getNode(), 1)); // We introduced a cycle though, so update the loads operands, making sure // to use the original store's chain as an incoming chain. SmallVector NewLoadOperands(NewLoad->op_begin(), NewLoad->op_end()); NewLoadOperands[0] = Ch; NewLoad = SDValue(DAG.UpdateNodeOperands(NewLoad.getNode(), NewLoadOperands), 0); return NewLoad; } SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { assert(Op.getValueType().isVector() && "Non-vector insert subvector!"); SDValue Vec = Op.getOperand(0); SDValue Part = Op.getOperand(1); SDValue Idx = Op.getOperand(2); SDLoc dl(Op); // Store the value to a temporary stack slot, then LOAD the returned part. SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType()); int FI = cast(StackPtr.getNode())->getIndex(); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); // First store the whole vector. SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo); // Then store the inserted part. // Add the offset to the index. unsigned EltSize = Vec.getValueType().getVectorElementType().getSizeInBits()/8; Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx, DAG.getConstant(EltSize, SDLoc(Vec), Idx.getValueType())); Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy(DAG.getDataLayout())); SDValue SubStackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr); // Store the subvector. Ch = DAG.getStore(Ch, dl, Part, SubStackPtr, MachinePointerInfo()); // Finally, load the updated vector. return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo); } SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { // We can't handle this case efficiently. Allocate a sufficiently // aligned object on the stack, store each element into it, then load // the result as a vector. // Create the stack frame object. EVT VT = Node->getValueType(0); EVT EltVT = VT.getVectorElementType(); SDLoc dl(Node); SDValue FIPtr = DAG.CreateStackTemporary(VT); int FI = cast(FIPtr.getNode())->getIndex(); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); // Emit a store of each element to the stack slot. SmallVector Stores; unsigned TypeByteSize = EltVT.getSizeInBits() / 8; // Store (in the right endianness) the elements to memory. for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { // Ignore undef elements. if (Node->getOperand(i).isUndef()) continue; unsigned Offset = TypeByteSize*i; SDValue Idx = DAG.getConstant(Offset, dl, FIPtr.getValueType()); Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx); // If the destination vector element type is narrower than the source // element type, only store the bits necessary. if (EltVT.bitsLT(Node->getOperand(i).getValueType().getScalarType())) { Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(i), Idx, PtrInfo.getWithOffset(Offset), EltVT)); } else Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, Node->getOperand(i), Idx, PtrInfo.getWithOffset(Offset))); } SDValue StoreChain; if (!Stores.empty()) // Not all undef elements? StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); else StoreChain = DAG.getEntryNode(); // Result is a load from the stack slot. return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo); } namespace { /// Keeps track of state when getting the sign of a floating-point value as an /// integer. struct FloatSignAsInt { EVT FloatVT; SDValue Chain; SDValue FloatPtr; SDValue IntPtr; MachinePointerInfo IntPointerInfo; MachinePointerInfo FloatPointerInfo; SDValue IntValue; APInt SignMask; uint8_t SignBit; }; } /// Bitcast a floating-point value to an integer value. Only bitcast the part /// containing the sign bit if the target has no integer value capable of /// holding all bits of the floating-point value. void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State, const SDLoc &DL, SDValue Value) const { EVT FloatVT = Value.getValueType(); unsigned NumBits = FloatVT.getSizeInBits(); State.FloatVT = FloatVT; EVT IVT = EVT::getIntegerVT(*DAG.getContext(), NumBits); // Convert to an integer of the same size. if (TLI.isTypeLegal(IVT)) { State.IntValue = DAG.getNode(ISD::BITCAST, DL, IVT, Value); State.SignMask = APInt::getSignBit(NumBits); State.SignBit = NumBits - 1; return; } auto &DataLayout = DAG.getDataLayout(); // Store the float to memory, then load the sign part out as an integer. MVT LoadTy = TLI.getRegisterType(*DAG.getContext(), MVT::i8); // First create a temporary that is aligned for both the load and store. SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy); int FI = cast(StackPtr.getNode())->getIndex(); // Then store the float to it. State.FloatPtr = StackPtr; MachineFunction &MF = DAG.getMachineFunction(); State.FloatPointerInfo = MachinePointerInfo::getFixedStack(MF, FI); State.Chain = DAG.getStore(DAG.getEntryNode(), DL, Value, State.FloatPtr, State.FloatPointerInfo); SDValue IntPtr; if (DataLayout.isBigEndian()) { assert(FloatVT.isByteSized() && "Unsupported floating point type!"); // Load out a legal integer with the same sign bit as the float. IntPtr = StackPtr; State.IntPointerInfo = State.FloatPointerInfo; } else { // Advance the pointer so that the loaded byte will contain the sign bit. unsigned ByteOffset = (FloatVT.getSizeInBits() / 8) - 1; IntPtr = DAG.getNode(ISD::ADD, DL, StackPtr.getValueType(), StackPtr, DAG.getConstant(ByteOffset, DL, StackPtr.getValueType())); State.IntPointerInfo = MachinePointerInfo::getFixedStack(MF, FI, ByteOffset); } State.IntPtr = IntPtr; State.IntValue = DAG.getExtLoad(ISD::EXTLOAD, DL, LoadTy, State.Chain, IntPtr, State.IntPointerInfo, MVT::i8); State.SignMask = APInt::getOneBitSet(LoadTy.getSizeInBits(), 7); State.SignBit = 7; } /// Replace the integer value produced by getSignAsIntValue() with a new value /// and cast the result back to a floating-point type. SDValue SelectionDAGLegalize::modifySignAsInt(const FloatSignAsInt &State, const SDLoc &DL, SDValue NewIntValue) const { if (!State.Chain) return DAG.getNode(ISD::BITCAST, DL, State.FloatVT, NewIntValue); // Override the part containing the sign bit in the value stored on the stack. SDValue Chain = DAG.getTruncStore(State.Chain, DL, NewIntValue, State.IntPtr, State.IntPointerInfo, MVT::i8); return DAG.getLoad(State.FloatVT, DL, Chain, State.FloatPtr, State.FloatPointerInfo); } SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const { SDLoc DL(Node); SDValue Mag = Node->getOperand(0); SDValue Sign = Node->getOperand(1); // Get sign bit into an integer value. FloatSignAsInt SignAsInt; getSignAsIntValue(SignAsInt, DL, Sign); EVT IntVT = SignAsInt.IntValue.getValueType(); SDValue SignMask = DAG.getConstant(SignAsInt.SignMask, DL, IntVT); SDValue SignBit = DAG.getNode(ISD::AND, DL, IntVT, SignAsInt.IntValue, SignMask); // If FABS is legal transform FCOPYSIGN(x, y) => sign(x) ? -FABS(x) : FABS(X) EVT FloatVT = Mag.getValueType(); if (TLI.isOperationLegalOrCustom(ISD::FABS, FloatVT) && TLI.isOperationLegalOrCustom(ISD::FNEG, FloatVT)) { SDValue AbsValue = DAG.getNode(ISD::FABS, DL, FloatVT, Mag); SDValue NegValue = DAG.getNode(ISD::FNEG, DL, FloatVT, AbsValue); SDValue Cond = DAG.getSetCC(DL, getSetCCResultType(IntVT), SignBit, DAG.getConstant(0, DL, IntVT), ISD::SETNE); return DAG.getSelect(DL, FloatVT, Cond, NegValue, AbsValue); } // Transform Mag value to integer, and clear the sign bit. FloatSignAsInt MagAsInt; getSignAsIntValue(MagAsInt, DL, Mag); EVT MagVT = MagAsInt.IntValue.getValueType(); SDValue ClearSignMask = DAG.getConstant(~MagAsInt.SignMask, DL, MagVT); SDValue ClearedSign = DAG.getNode(ISD::AND, DL, MagVT, MagAsInt.IntValue, ClearSignMask); // Get the signbit at the right position for MagAsInt. int ShiftAmount = SignAsInt.SignBit - MagAsInt.SignBit; if (SignBit.getValueSizeInBits() > ClearedSign.getValueSizeInBits()) { if (ShiftAmount > 0) { SDValue ShiftCnst = DAG.getConstant(ShiftAmount, DL, IntVT); SignBit = DAG.getNode(ISD::SRL, DL, IntVT, SignBit, ShiftCnst); } else if (ShiftAmount < 0) { SDValue ShiftCnst = DAG.getConstant(-ShiftAmount, DL, IntVT); SignBit = DAG.getNode(ISD::SHL, DL, IntVT, SignBit, ShiftCnst); } SignBit = DAG.getNode(ISD::TRUNCATE, DL, MagVT, SignBit); } else if (SignBit.getValueSizeInBits() < ClearedSign.getValueSizeInBits()) { SignBit = DAG.getNode(ISD::ZERO_EXTEND, DL, MagVT, SignBit); if (ShiftAmount > 0) { SDValue ShiftCnst = DAG.getConstant(ShiftAmount, DL, MagVT); SignBit = DAG.getNode(ISD::SRL, DL, MagVT, SignBit, ShiftCnst); } else if (ShiftAmount < 0) { SDValue ShiftCnst = DAG.getConstant(-ShiftAmount, DL, MagVT); SignBit = DAG.getNode(ISD::SHL, DL, MagVT, SignBit, ShiftCnst); } } // Store the part with the modified sign and convert back to float. SDValue CopiedSign = DAG.getNode(ISD::OR, DL, MagVT, ClearedSign, SignBit); return modifySignAsInt(MagAsInt, DL, CopiedSign); } SDValue SelectionDAGLegalize::ExpandFABS(SDNode *Node) const { SDLoc DL(Node); SDValue Value = Node->getOperand(0); // Transform FABS(x) => FCOPYSIGN(x, 0.0) if FCOPYSIGN is legal. EVT FloatVT = Value.getValueType(); if (TLI.isOperationLegalOrCustom(ISD::FCOPYSIGN, FloatVT)) { SDValue Zero = DAG.getConstantFP(0.0, DL, FloatVT); return DAG.getNode(ISD::FCOPYSIGN, DL, FloatVT, Value, Zero); } // Transform value to integer, clear the sign bit and transform back. FloatSignAsInt ValueAsInt; getSignAsIntValue(ValueAsInt, DL, Value); EVT IntVT = ValueAsInt.IntValue.getValueType(); SDValue ClearSignMask = DAG.getConstant(~ValueAsInt.SignMask, DL, IntVT); SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, ValueAsInt.IntValue, ClearSignMask); return modifySignAsInt(ValueAsInt, DL, ClearedSign); } void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, SmallVectorImpl &Results) { unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore(); assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and" " not tell us which reg is the stack pointer!"); SDLoc dl(Node); EVT VT = Node->getValueType(0); SDValue Tmp1 = SDValue(Node, 0); SDValue Tmp2 = SDValue(Node, 1); SDValue Tmp3 = Node->getOperand(2); SDValue Chain = Tmp1.getOperand(0); // Chain the dynamic stack allocation so that it doesn't modify the stack // pointer when other instructions are using the stack. Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, dl, true), dl); SDValue Size = Tmp2.getOperand(1); SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); Chain = SP.getValue(1); unsigned Align = cast(Tmp3)->getZExtValue(); unsigned StackAlign = DAG.getSubtarget().getFrameLowering()->getStackAlignment(); Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value if (Align > StackAlign) Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1, DAG.getConstant(-(uint64_t)Align, dl, VT)); Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true), DAG.getIntPtrConstant(0, dl, true), SDValue(), dl); Results.push_back(Tmp1); Results.push_back(Tmp2); } /// Legalize a SETCC with given LHS and RHS and condition code CC on the current /// target. /// /// If the SETCC has been legalized using AND / OR, then the legalized node /// will be stored in LHS. RHS and CC will be set to SDValue(). NeedInvert /// will be set to false. /// /// If the SETCC has been legalized by using getSetCCSwappedOperands(), /// then the values of LHS and RHS will be swapped, CC will be set to the /// new condition, and NeedInvert will be set to false. /// /// If the SETCC has been legalized using the inverse condcode, then LHS and /// RHS will be unchanged, CC will set to the inverted condcode, and NeedInvert /// will be set to true. The caller must invert the result of the SETCC with /// SelectionDAG::getLogicalNOT() or take equivalent action to swap the effect /// of a true/false result. /// /// \returns true if the SetCC has been legalized, false if it hasn't. bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, bool &NeedInvert, const SDLoc &dl) { MVT OpVT = LHS.getSimpleValueType(); ISD::CondCode CCCode = cast(CC)->get(); NeedInvert = false; switch (TLI.getCondCodeAction(CCCode, OpVT)) { default: llvm_unreachable("Unknown condition code action!"); case TargetLowering::Legal: // Nothing to do. break; case TargetLowering::Expand: { ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode); if (TLI.isCondCodeLegal(InvCC, OpVT)) { std::swap(LHS, RHS); CC = DAG.getCondCode(InvCC); return true; } ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID; unsigned Opc = 0; switch (CCCode) { default: llvm_unreachable("Don't know how to expand this condition!"); case ISD::SETO: assert(TLI.getCondCodeAction(ISD::SETOEQ, OpVT) == TargetLowering::Legal && "If SETO is expanded, SETOEQ must be legal!"); CC1 = ISD::SETOEQ; CC2 = ISD::SETOEQ; Opc = ISD::AND; break; case ISD::SETUO: assert(TLI.getCondCodeAction(ISD::SETUNE, OpVT) == TargetLowering::Legal && "If SETUO is expanded, SETUNE must be legal!"); CC1 = ISD::SETUNE; CC2 = ISD::SETUNE; Opc = ISD::OR; break; case ISD::SETOEQ: case ISD::SETOGT: case ISD::SETOGE: case ISD::SETOLT: case ISD::SETOLE: case ISD::SETONE: case ISD::SETUEQ: case ISD::SETUNE: case ISD::SETUGT: case ISD::SETUGE: case ISD::SETULT: case ISD::SETULE: // If we are floating point, assign and break, otherwise fall through. if (!OpVT.isInteger()) { // We can use the 4th bit to tell if we are the unordered // or ordered version of the opcode. CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO; Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND; CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10); break; } // Fallthrough if we are unsigned integer. case ISD::SETLE: case ISD::SETGT: case ISD::SETGE: case ISD::SETLT: // We only support using the inverted operation, which is computed above // and not a different manner of supporting expanding these cases. llvm_unreachable("Don't know how to expand this condition!"); case ISD::SETNE: case ISD::SETEQ: // Try inverting the result of the inverse condition. InvCC = CCCode == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ; if (TLI.isCondCodeLegal(InvCC, OpVT)) { CC = DAG.getCondCode(InvCC); NeedInvert = true; return true; } // If inverting the condition didn't work then we have no means to expand // the condition. llvm_unreachable("Don't know how to expand this condition!"); } SDValue SetCC1, SetCC2; if (CCCode != ISD::SETO && CCCode != ISD::SETUO) { // If we aren't the ordered or unorder operation, // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS). SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1); SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2); } else { // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS) SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1); SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2); } LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2); RHS = SDValue(); CC = SDValue(); return true; } } return false; } /// Emit a store/load combination to the stack. This stores /// SrcOp to a stack slot of type SlotVT, truncating it if needed. It then does /// a load from the stack slot to DestVT, extending it if needed. /// The resultant code need not be legal. SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, const SDLoc &dl) { // Create the stack frame object. unsigned SrcAlign = DAG.getDataLayout().getPrefTypeAlignment( SrcOp.getValueType().getTypeForEVT(*DAG.getContext())); SDValue FIPtr = DAG.CreateStackTemporary(SlotVT, SrcAlign); FrameIndexSDNode *StackPtrFI = cast(FIPtr); int SPFI = StackPtrFI->getIndex(); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); unsigned SrcSize = SrcOp.getValueType().getSizeInBits(); unsigned SlotSize = SlotVT.getSizeInBits(); unsigned DestSize = DestVT.getSizeInBits(); Type *DestType = DestVT.getTypeForEVT(*DAG.getContext()); unsigned DestAlign = DAG.getDataLayout().getPrefTypeAlignment(DestType); // Emit a store to the stack slot. Use a truncstore if the input value is // later than DestVT. SDValue Store; if (SrcSize > SlotSize) Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, PtrInfo, SlotVT, SrcAlign); else { assert(SrcSize == SlotSize && "Invalid store"); Store = DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, PtrInfo, SrcAlign); } // Result is a load from the stack slot. if (SlotSize == DestSize) return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo, DestAlign); assert(SlotSize < DestSize && "Unknown extension!"); return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, PtrInfo, SlotVT, DestAlign); } SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) { SDLoc dl(Node); // Create a vector sized/aligned stack slot, store the value to element #0, // then load the whole vector back out. SDValue StackPtr = DAG.CreateStackTemporary(Node->getValueType(0)); FrameIndexSDNode *StackPtrFI = cast(StackPtr); int SPFI = StackPtrFI->getIndex(); SDValue Ch = DAG.getTruncStore( DAG.getEntryNode(), dl, Node->getOperand(0), StackPtr, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI), Node->getValueType(0).getVectorElementType()); return DAG.getLoad( Node->getValueType(0), dl, Ch, StackPtr, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI)); } static bool ExpandBVWithShuffles(SDNode *Node, SelectionDAG &DAG, const TargetLowering &TLI, SDValue &Res) { unsigned NumElems = Node->getNumOperands(); SDLoc dl(Node); EVT VT = Node->getValueType(0); // Try to group the scalars into pairs, shuffle the pairs together, then // shuffle the pairs of pairs together, etc. until the vector has // been built. This will work only if all of the necessary shuffle masks // are legal. // We do this in two phases; first to check the legality of the shuffles, // and next, assuming that all shuffles are legal, to create the new nodes. for (int Phase = 0; Phase < 2; ++Phase) { SmallVector >, 16> IntermedVals, NewIntermedVals; for (unsigned i = 0; i < NumElems; ++i) { SDValue V = Node->getOperand(i); if (V.isUndef()) continue; SDValue Vec; if (Phase) Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, V); IntermedVals.push_back(std::make_pair(Vec, SmallVector(1, i))); } while (IntermedVals.size() > 2) { NewIntermedVals.clear(); for (unsigned i = 0, e = (IntermedVals.size() & ~1u); i < e; i += 2) { // This vector and the next vector are shuffled together (simply to // append the one to the other). SmallVector ShuffleVec(NumElems, -1); SmallVector FinalIndices; FinalIndices.reserve(IntermedVals[i].second.size() + IntermedVals[i+1].second.size()); int k = 0; for (unsigned j = 0, f = IntermedVals[i].second.size(); j != f; ++j, ++k) { ShuffleVec[k] = j; FinalIndices.push_back(IntermedVals[i].second[j]); } for (unsigned j = 0, f = IntermedVals[i+1].second.size(); j != f; ++j, ++k) { ShuffleVec[k] = NumElems + j; FinalIndices.push_back(IntermedVals[i+1].second[j]); } SDValue Shuffle; if (Phase) Shuffle = DAG.getVectorShuffle(VT, dl, IntermedVals[i].first, IntermedVals[i+1].first, ShuffleVec); else if (!TLI.isShuffleMaskLegal(ShuffleVec, VT)) return false; NewIntermedVals.push_back( std::make_pair(Shuffle, std::move(FinalIndices))); } // If we had an odd number of defined values, then append the last // element to the array of new vectors. if ((IntermedVals.size() & 1) != 0) NewIntermedVals.push_back(IntermedVals.back()); IntermedVals.swap(NewIntermedVals); } assert(IntermedVals.size() <= 2 && IntermedVals.size() > 0 && "Invalid number of intermediate vectors"); SDValue Vec1 = IntermedVals[0].first; SDValue Vec2; if (IntermedVals.size() > 1) Vec2 = IntermedVals[1].first; else if (Phase) Vec2 = DAG.getUNDEF(VT); SmallVector ShuffleVec(NumElems, -1); for (unsigned i = 0, e = IntermedVals[0].second.size(); i != e; ++i) ShuffleVec[IntermedVals[0].second[i]] = i; for (unsigned i = 0, e = IntermedVals[1].second.size(); i != e; ++i) ShuffleVec[IntermedVals[1].second[i]] = NumElems + i; if (Phase) Res = DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec); else if (!TLI.isShuffleMaskLegal(ShuffleVec, VT)) return false; } return true; } /// Expand a BUILD_VECTOR node on targets that don't /// support the operation, but do support the resultant vector type. SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { unsigned NumElems = Node->getNumOperands(); SDValue Value1, Value2; SDLoc dl(Node); EVT VT = Node->getValueType(0); EVT OpVT = Node->getOperand(0).getValueType(); EVT EltVT = VT.getVectorElementType(); // If the only non-undef value is the low element, turn this into a // SCALAR_TO_VECTOR node. If this is { X, X, X, X }, determine X. bool isOnlyLowElement = true; bool MoreThanTwoValues = false; bool isConstant = true; for (unsigned i = 0; i < NumElems; ++i) { SDValue V = Node->getOperand(i); if (V.isUndef()) continue; if (i > 0) isOnlyLowElement = false; if (!isa(V) && !isa(V)) isConstant = false; if (!Value1.getNode()) { Value1 = V; } else if (!Value2.getNode()) { if (V != Value1) Value2 = V; } else if (V != Value1 && V != Value2) { MoreThanTwoValues = true; } } if (!Value1.getNode()) return DAG.getUNDEF(VT); if (isOnlyLowElement) return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Node->getOperand(0)); // If all elements are constants, create a load from the constant pool. if (isConstant) { SmallVector CV; for (unsigned i = 0, e = NumElems; i != e; ++i) { if (ConstantFPSDNode *V = dyn_cast(Node->getOperand(i))) { CV.push_back(const_cast(V->getConstantFPValue())); } else if (ConstantSDNode *V = dyn_cast(Node->getOperand(i))) { if (OpVT==EltVT) CV.push_back(const_cast(V->getConstantIntValue())); else { // If OpVT and EltVT don't match, EltVT is not legal and the // element values have been promoted/truncated earlier. Undo this; // we don't want a v16i8 to become a v16i32 for example. const ConstantInt *CI = V->getConstantIntValue(); CV.push_back(ConstantInt::get(EltVT.getTypeForEVT(*DAG.getContext()), CI->getZExtValue())); } } else { assert(Node->getOperand(i).isUndef()); Type *OpNTy = EltVT.getTypeForEVT(*DAG.getContext()); CV.push_back(UndefValue::get(OpNTy)); } } Constant *CP = ConstantVector::get(CV); SDValue CPIdx = DAG.getConstantPool(CP, TLI.getPointerTy(DAG.getDataLayout())); unsigned Alignment = cast(CPIdx)->getAlignment(); return DAG.getLoad( VT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Alignment); } SmallSet DefinedValues; for (unsigned i = 0; i < NumElems; ++i) { if (Node->getOperand(i).isUndef()) continue; DefinedValues.insert(Node->getOperand(i)); } if (TLI.shouldExpandBuildVectorWithShuffles(VT, DefinedValues.size())) { if (!MoreThanTwoValues) { SmallVector ShuffleVec(NumElems, -1); for (unsigned i = 0; i < NumElems; ++i) { SDValue V = Node->getOperand(i); if (V.isUndef()) continue; ShuffleVec[i] = V == Value1 ? 0 : NumElems; } if (TLI.isShuffleMaskLegal(ShuffleVec, Node->getValueType(0))) { // Get the splatted value into the low element of a vector register. SDValue Vec1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value1); SDValue Vec2; if (Value2.getNode()) Vec2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value2); else Vec2 = DAG.getUNDEF(VT); // Return shuffle(LowValVec, undef, <0,0,0,0>) return DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec); } } else { SDValue Res; if (ExpandBVWithShuffles(Node, DAG, TLI, Res)) return Res; } } // Otherwise, we can't handle this case efficiently. return ExpandVectorBuildThroughStack(Node); } // Expand a node into a call to a libcall. If the result value // does not fit into a register, return the lo part and set the hi part to the // by-reg argument. If it does fit into a single register, return the result // and leave the Hi part unset. SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned) { TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; for (const SDValue &Op : Node->op_values()) { EVT ArgVT = Op.getValueType(); Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); Entry.Node = Op; Entry.Ty = ArgTy; Entry.isSExt = isSigned; Entry.isZExt = !isSigned; Args.push_back(Entry); } SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy(DAG.getDataLayout())); Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); // By default, the input chain to this libcall is the entry node of the // function. If the libcall is going to be emitted as a tail call then // TLI.isUsedByReturnOnly will change it to the right chain if the return // node which is being folded has a non-entry input chain. SDValue InChain = DAG.getEntryNode(); // isTailCall may be true since the callee does not reference caller stack // frame. Check if it's in the right position and that the return types match. SDValue TCChain = InChain; const Function *F = DAG.getMachineFunction().getFunction(); bool isTailCall = TLI.isInTailCallPosition(DAG, Node, TCChain) && (RetTy == F->getReturnType() || F->getReturnType()->isVoidTy()); if (isTailCall) InChain = TCChain; TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(SDLoc(Node)).setChain(InChain) .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) .setTailCall(isTailCall).setSExtResult(isSigned).setZExtResult(!isSigned); std::pair CallInfo = TLI.LowerCallTo(CLI); if (!CallInfo.second.getNode()) // It's a tailcall, return the chain (which is the DAG root). return DAG.getRoot(); return CallInfo.first; } /// Generate a libcall taking the given operands as arguments /// and returning a result of type RetVT. SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops, unsigned NumOps, bool isSigned, const SDLoc &dl) { TargetLowering::ArgListTy Args; Args.reserve(NumOps); TargetLowering::ArgListEntry Entry; for (unsigned i = 0; i != NumOps; ++i) { Entry.Node = Ops[i]; Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); Entry.isSExt = isSigned; Entry.isZExt = !isSigned; Args.push_back(Entry); } SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy(DAG.getDataLayout())); Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) .setSExtResult(isSigned).setZExtResult(!isSigned); std::pair CallInfo = TLI.LowerCallTo(CLI); return CallInfo.first; } // Expand a node into a call to a libcall. Similar to // ExpandLibCall except that the first operand is the in-chain. std::pair SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned) { SDValue InChain = Node->getOperand(0); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) { EVT ArgVT = Node->getOperand(i).getValueType(); Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy; Entry.isSExt = isSigned; Entry.isZExt = !isSigned; Args.push_back(Entry); } SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy(DAG.getDataLayout())); Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(SDLoc(Node)).setChain(InChain) .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) .setSExtResult(isSigned).setZExtResult(!isSigned); std::pair CallInfo = TLI.LowerCallTo(CLI); return CallInfo; } SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128, RTLIB::Libcall Call_PPCF128) { RTLIB::Libcall LC; switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::f32: LC = Call_F32; break; case MVT::f64: LC = Call_F64; break; case MVT::f80: LC = Call_F80; break; case MVT::f128: LC = Call_F128; break; case MVT::ppcf128: LC = Call_PPCF128; break; } return ExpandLibCall(LC, Node, false); } SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, RTLIB::Libcall Call_I8, RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32, RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128) { RTLIB::Libcall LC; switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: LC = Call_I8; break; case MVT::i16: LC = Call_I16; break; case MVT::i32: LC = Call_I32; break; case MVT::i64: LC = Call_I64; break; case MVT::i128: LC = Call_I128; break; } return ExpandLibCall(LC, Node, isSigned); } /// Issue libcalls to __{u}divmod to compute div / rem pairs. void SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl &Results) { unsigned Opcode = Node->getOpcode(); bool isSigned = Opcode == ISD::SDIVREM; RTLIB::Libcall LC; switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break; } // The input chain to this libcall is the entry node of the function. // Legalizing the call will automatically add the previous call to the // dependence. SDValue InChain = DAG.getEntryNode(); EVT RetVT = Node->getValueType(0); Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; for (const SDValue &Op : Node->op_values()) { EVT ArgVT = Op.getValueType(); Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); Entry.Node = Op; Entry.Ty = ArgTy; Entry.isSExt = isSigned; Entry.isZExt = !isSigned; Args.push_back(Entry); } // Also pass the return address of the remainder. SDValue FIPtr = DAG.CreateStackTemporary(RetVT); Entry.Node = FIPtr; Entry.Ty = RetTy->getPointerTo(); Entry.isSExt = isSigned; Entry.isZExt = !isSigned; Args.push_back(Entry); SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy(DAG.getDataLayout())); SDLoc dl(Node); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(InChain) .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) .setSExtResult(isSigned).setZExtResult(!isSigned); std::pair CallInfo = TLI.LowerCallTo(CLI); // Remainder is loaded back from the stack frame. SDValue Rem = DAG.getLoad(RetVT, dl, CallInfo.second, FIPtr, MachinePointerInfo()); Results.push_back(CallInfo.first); Results.push_back(Rem); } /// Return true if sincos libcall is available. static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) { RTLIB::Libcall LC; switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::f32: LC = RTLIB::SINCOS_F32; break; case MVT::f64: LC = RTLIB::SINCOS_F64; break; case MVT::f80: LC = RTLIB::SINCOS_F80; break; case MVT::f128: LC = RTLIB::SINCOS_F128; break; case MVT::ppcf128: LC = RTLIB::SINCOS_PPCF128; break; } return TLI.getLibcallName(LC) != nullptr; } /// Return true if sincos libcall is available and can be used to combine sin /// and cos. static bool canCombineSinCosLibcall(SDNode *Node, const TargetLowering &TLI, const TargetMachine &TM) { if (!isSinCosLibcallAvailable(Node, TLI)) return false; // GNU sin/cos functions set errno while sincos does not. Therefore // combining sin and cos is only safe if unsafe-fpmath is enabled. if (TM.getTargetTriple().isGNUEnvironment() && !TM.Options.UnsafeFPMath) return false; return true; } /// Only issue sincos libcall if both sin and cos are needed. static bool useSinCos(SDNode *Node) { unsigned OtherOpcode = Node->getOpcode() == ISD::FSIN ? ISD::FCOS : ISD::FSIN; SDValue Op0 = Node->getOperand(0); for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), UE = Op0.getNode()->use_end(); UI != UE; ++UI) { SDNode *User = *UI; if (User == Node) continue; // The other user might have been turned into sincos already. if (User->getOpcode() == OtherOpcode || User->getOpcode() == ISD::FSINCOS) return true; } return false; } /// Issue libcalls to sincos to compute sin / cos pairs. void SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl &Results) { RTLIB::Libcall LC; switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::f32: LC = RTLIB::SINCOS_F32; break; case MVT::f64: LC = RTLIB::SINCOS_F64; break; case MVT::f80: LC = RTLIB::SINCOS_F80; break; case MVT::f128: LC = RTLIB::SINCOS_F128; break; case MVT::ppcf128: LC = RTLIB::SINCOS_PPCF128; break; } // The input chain to this libcall is the entry node of the function. // Legalizing the call will automatically add the previous call to the // dependence. SDValue InChain = DAG.getEntryNode(); EVT RetVT = Node->getValueType(0); Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; // Pass the argument. Entry.Node = Node->getOperand(0); Entry.Ty = RetTy; Entry.isSExt = false; Entry.isZExt = false; Args.push_back(Entry); // Pass the return address of sin. SDValue SinPtr = DAG.CreateStackTemporary(RetVT); Entry.Node = SinPtr; Entry.Ty = RetTy->getPointerTo(); Entry.isSExt = false; Entry.isZExt = false; Args.push_back(Entry); // Also pass the return address of the cos. SDValue CosPtr = DAG.CreateStackTemporary(RetVT); Entry.Node = CosPtr; Entry.Ty = RetTy->getPointerTo(); Entry.isSExt = false; Entry.isZExt = false; Args.push_back(Entry); SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy(DAG.getDataLayout())); SDLoc dl(Node); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(InChain) .setCallee(TLI.getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()), Callee, std::move(Args)); std::pair CallInfo = TLI.LowerCallTo(CLI); Results.push_back( DAG.getLoad(RetVT, dl, CallInfo.second, SinPtr, MachinePointerInfo())); Results.push_back( DAG.getLoad(RetVT, dl, CallInfo.second, CosPtr, MachinePointerInfo())); } /// This function is responsible for legalizing a /// INT_TO_FP operation of the specified operand when the target requests that /// we expand it. At this point, we know that the result and operand types are /// legal for the target. SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, EVT DestVT, const SDLoc &dl) { // TODO: Should any fast-math-flags be set for the created nodes? if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) { // simple 32-bit [signed|unsigned] integer to float/double expansion // Get the stack frame index of a 8 byte buffer. SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64); // word offset constant for Hi/Lo address computation SDValue WordOff = DAG.getConstant(sizeof(int), dl, StackSlot.getValueType()); // set up Hi and Lo (into buffer) address based on endian SDValue Hi = StackSlot; SDValue Lo = DAG.getNode(ISD::ADD, dl, StackSlot.getValueType(), StackSlot, WordOff); if (DAG.getDataLayout().isLittleEndian()) std::swap(Hi, Lo); // if signed map to unsigned space SDValue Op0Mapped; if (isSigned) { // constant used to invert sign bit (signed to unsigned mapping) SDValue SignBit = DAG.getConstant(0x80000000u, dl, MVT::i32); Op0Mapped = DAG.getNode(ISD::XOR, dl, MVT::i32, Op0, SignBit); } else { Op0Mapped = Op0; } // store the lo of the constructed double - based on integer input SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op0Mapped, Lo, MachinePointerInfo()); // initial hi portion of constructed double SDValue InitialHi = DAG.getConstant(0x43300000u, dl, MVT::i32); // store the hi of the constructed double - biased exponent SDValue Store2 = DAG.getStore(Store1, dl, InitialHi, Hi, MachinePointerInfo()); // load the constructed double SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, MachinePointerInfo()); // FP constant to bias correct the final result SDValue Bias = DAG.getConstantFP(isSigned ? BitsToDouble(0x4330000080000000ULL) : BitsToDouble(0x4330000000000000ULL), dl, MVT::f64); // subtract the bias SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias); // final result SDValue Result; // handle final rounding if (DestVT == MVT::f64) { // do nothing Result = Sub; } else if (DestVT.bitsLT(MVT::f64)) { Result = DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub, DAG.getIntPtrConstant(0, dl)); } else if (DestVT.bitsGT(MVT::f64)) { Result = DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub); } return Result; } assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet"); // Code below here assumes !isSigned without checking again. // Implementation of unsigned i64 to f64 following the algorithm in // __floatundidf in compiler_rt. This implementation has the advantage // of performing rounding correctly, both in the default rounding mode // and in all alternate rounding modes. // TODO: Generalize this for use with other types. if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f64) { SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, MVT::i64); SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(BitsToDouble(UINT64_C(0x4530000000100000)), dl, MVT::f64); SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, MVT::i64); SDValue Lo = DAG.getZeroExtendInReg(Op0, dl, MVT::i32); SDValue Hi = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, DAG.getConstant(32, dl, MVT::i64)); SDValue LoOr = DAG.getNode(ISD::OR, dl, MVT::i64, Lo, TwoP52); SDValue HiOr = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, TwoP84); SDValue LoFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, LoOr); SDValue HiFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, HiOr); SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt, TwoP84PlusTwoP52); return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub); } // Implementation of unsigned i64 to f32. // TODO: Generalize this for use with other types. if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) { // For unsigned conversions, convert them to signed conversions using the // algorithm from the x86_64 __floatundidf in compiler_rt. if (!isSigned) { SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Op0); SDValue ShiftConst = DAG.getConstant( 1, dl, TLI.getShiftAmountTy(Op0.getValueType(), DAG.getDataLayout())); SDValue Shr = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, ShiftConst); SDValue AndConst = DAG.getConstant(1, dl, MVT::i64); SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, AndConst); SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, Shr); SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Or); SDValue Slow = DAG.getNode(ISD::FADD, dl, MVT::f32, SignCvt, SignCvt); // TODO: This really should be implemented using a branch rather than a // select. We happen to get lucky and machinesink does the right // thing most of the time. This would be a good candidate for a //pseudo-op, or, even better, for whole-function isel. SDValue SignBitTest = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), Op0, DAG.getConstant(0, dl, MVT::i64), ISD::SETLT); return DAG.getSelect(dl, MVT::f32, SignBitTest, Slow, Fast); } // Otherwise, implement the fully general conversion. SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, DAG.getConstant(UINT64_C(0xfffffffffffff800), dl, MVT::i64)); SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, DAG.getConstant(UINT64_C(0x800), dl, MVT::i64)); SDValue And2 = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, DAG.getConstant(UINT64_C(0x7ff), dl, MVT::i64)); SDValue Ne = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), And2, DAG.getConstant(UINT64_C(0), dl, MVT::i64), ISD::SETNE); SDValue Sel = DAG.getSelect(dl, MVT::i64, Ne, Or, Op0); SDValue Ge = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), Op0, DAG.getConstant(UINT64_C(0x0020000000000000), dl, MVT::i64), ISD::SETUGE); SDValue Sel2 = DAG.getSelect(dl, MVT::i64, Ge, Sel, Op0); EVT SHVT = TLI.getShiftAmountTy(Sel2.getValueType(), DAG.getDataLayout()); SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2, DAG.getConstant(32, dl, SHVT)); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sh); SDValue Fcvt = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Trunc); SDValue TwoP32 = DAG.getConstantFP(BitsToDouble(UINT64_C(0x41f0000000000000)), dl, MVT::f64); SDValue Fmul = DAG.getNode(ISD::FMUL, dl, MVT::f64, TwoP32, Fcvt); SDValue Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sel2); SDValue Fcvt2 = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Lo); SDValue Fadd = DAG.getNode(ISD::FADD, dl, MVT::f64, Fmul, Fcvt2); return DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Fadd, DAG.getIntPtrConstant(0, dl)); } SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0); SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(Op0.getValueType()), Op0, DAG.getConstant(0, dl, Op0.getValueType()), ISD::SETLT); SDValue Zero = DAG.getIntPtrConstant(0, dl), Four = DAG.getIntPtrConstant(4, dl); SDValue CstOffset = DAG.getSelect(dl, Zero.getValueType(), SignSet, Four, Zero); // If the sign bit of the integer is set, the large number will be treated // as a negative number. To counteract this, the dynamic code adds an // offset depending on the data type. uint64_t FF; switch (Op0.getSimpleValueType().SimpleTy) { default: llvm_unreachable("Unsupported integer type!"); case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float) case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float) case MVT::i32: FF = 0x4F800000ULL; break; // 2^32 (as a float) case MVT::i64: FF = 0x5F800000ULL; break; // 2^64 (as a float) } if (DAG.getDataLayout().isLittleEndian()) FF <<= 32; Constant *FudgeFactor = ConstantInt::get( Type::getInt64Ty(*DAG.getContext()), FF); SDValue CPIdx = DAG.getConstantPool(FudgeFactor, TLI.getPointerTy(DAG.getDataLayout())); unsigned Alignment = cast(CPIdx)->getAlignment(); CPIdx = DAG.getNode(ISD::ADD, dl, CPIdx.getValueType(), CPIdx, CstOffset); Alignment = std::min(Alignment, 4u); SDValue FudgeInReg; if (DestVT == MVT::f32) FudgeInReg = DAG.getLoad( MVT::f32, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Alignment); else { SDValue Load = DAG.getExtLoad( ISD::EXTLOAD, dl, DestVT, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32, Alignment); HandleSDNode Handle(Load); LegalizeOp(Load.getNode()); FudgeInReg = Handle.getValue(); } return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg); } /// This function is responsible for legalizing a /// *INT_TO_FP operation of the specified operand when the target requests that /// we promote it. At this point, we know that the result and operand types are /// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP /// operation that takes a larger input. SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned, const SDLoc &dl) { // First step, figure out the appropriate *INT_TO_FP operation to use. EVT NewInTy = LegalOp.getValueType(); unsigned OpToUse = 0; // Scan for the appropriate larger type to use. while (1) { NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT().SimpleTy+1); assert(NewInTy.isInteger() && "Ran out of possibilities!"); // If the target supports SINT_TO_FP of this type, use it. if (TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, NewInTy)) { OpToUse = ISD::SINT_TO_FP; break; } if (isSigned) continue; // If the target supports UINT_TO_FP of this type, use it. if (TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, NewInTy)) { OpToUse = ISD::UINT_TO_FP; break; } // Otherwise, try a larger type. } // Okay, we found the operation and type to use. Zero extend our input to the // desired type then run the operation on it. return DAG.getNode(OpToUse, dl, DestVT, DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, NewInTy, LegalOp)); } /// This function is responsible for legalizing a /// FP_TO_*INT operation of the specified operand when the target requests that /// we promote it. At this point, we know that the result and operand types are /// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT /// operation that returns a larger result. SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned, const SDLoc &dl) { // First step, figure out the appropriate FP_TO*INT operation to use. EVT NewOutTy = DestVT; unsigned OpToUse = 0; // Scan for the appropriate larger type to use. while (1) { NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT().SimpleTy+1); assert(NewOutTy.isInteger() && "Ran out of possibilities!"); // A larger signed type can hold all unsigned values of the requested type, // so using FP_TO_SINT is valid if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewOutTy)) { OpToUse = ISD::FP_TO_SINT; break; } // However, if the value may be < 0.0, we *must* use some FP_TO_SINT. if (!isSigned && TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewOutTy)) { OpToUse = ISD::FP_TO_UINT; break; } // Otherwise, try a larger type. } // Okay, we found the operation and type to use. SDValue Operation = DAG.getNode(OpToUse, dl, NewOutTy, LegalOp); // Truncate the result of the extended FP_TO_*INT operation to the desired // size. return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation); } /// Open code the operations for BITREVERSE. SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, const SDLoc &dl) { EVT VT = Op.getValueType(); EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); unsigned Sz = VT.getScalarSizeInBits(); SDValue Tmp, Tmp2; Tmp = DAG.getConstant(0, dl, VT); for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) { if (I < J) Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT)); else Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT)); APInt Shift(Sz, 1); Shift = Shift.shl(J); Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT)); Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2); } return Tmp; } /// Open code the operations for BSWAP of the specified operation. SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, const SDLoc &dl) { EVT VT = Op.getValueType(); EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8; switch (VT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unhandled Expand type in BSWAP!"); case MVT::i16: Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); return DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); case MVT::i32: Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(0xFF0000, dl, VT)); Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT)); Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3); Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1); return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2); case MVT::i64: Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT)); Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, dl, SHVT)); Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT)); Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT)); Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7, DAG.getConstant(255ULL<<48, dl, VT)); Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6, DAG.getConstant(255ULL<<40, dl, VT)); Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5, DAG.getConstant(255ULL<<32, dl, VT)); Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4, DAG.getConstant(255ULL<<24, dl, VT)); Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(255ULL<<16, dl, VT)); Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(255ULL<<8 , dl, VT)); Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7); Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5); Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3); Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1); Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6); Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2); return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4); } } /// Expand the specified bitcount instruction into operations. SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, const SDLoc &dl) { switch (Opc) { default: llvm_unreachable("Cannot expand this yet!"); case ISD::CTPOP: { EVT VT = Op.getValueType(); EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); unsigned Len = VT.getSizeInBits(); assert(VT.isInteger() && Len <= 128 && Len % 8 == 0 && "CTPOP not implemented for this type."); // This is the "best" algorithm from // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel SDValue Mask55 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT); SDValue Mask33 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT); SDValue Mask0F = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT); SDValue Mask01 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT); // v = v - ((v >> 1) & 0x55555555...) Op = DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getNode(ISD::AND, dl, VT, DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(1, dl, ShVT)), Mask55)); // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...) Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33), DAG.getNode(ISD::AND, dl, VT, DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(2, dl, ShVT)), Mask33)); // v = (v + (v >> 4)) & 0x0F0F0F0F... Op = DAG.getNode(ISD::AND, dl, VT, DAG.getNode(ISD::ADD, dl, VT, Op, DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(4, dl, ShVT))), Mask0F); // v = (v * 0x01010101...) >> (Len - 8) Op = DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::MUL, dl, VT, Op, Mask01), DAG.getConstant(Len - 8, dl, ShVT)); return Op; } case ISD::CTLZ_ZERO_UNDEF: // This trivially expands to CTLZ. return DAG.getNode(ISD::CTLZ, dl, Op.getValueType(), Op); case ISD::CTLZ: { EVT VT = Op.getValueType(); unsigned len = VT.getSizeInBits(); if (TLI.isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) { EVT SetCCVT = getSetCCResultType(VT); SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op); SDValue Zero = DAG.getConstant(0, dl, VT); SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ); return DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero, DAG.getConstant(len, dl, VT), CTLZ); } // for now, we do this: // x = x | (x >> 1); // x = x | (x >> 2); // ... // x = x | (x >>16); // x = x | (x >>32); // for 64-bit input // return popcount(~x); // // Ref: "Hacker's Delight" by Henry Warren EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); for (unsigned i = 0; (1U << i) <= (len / 2); ++i) { SDValue Tmp3 = DAG.getConstant(1ULL << i, dl, ShVT); Op = DAG.getNode(ISD::OR, dl, VT, Op, DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3)); } Op = DAG.getNOT(dl, Op, VT); return DAG.getNode(ISD::CTPOP, dl, VT, Op); } case ISD::CTTZ_ZERO_UNDEF: // This trivially expands to CTTZ. return DAG.getNode(ISD::CTTZ, dl, Op.getValueType(), Op); case ISD::CTTZ: { // for now, we use: { return popcount(~x & (x - 1)); } // unless the target has ctlz but not ctpop, in which case we use: // { return 32 - nlz(~x & (x-1)); } // Ref: "Hacker's Delight" by Henry Warren EVT VT = Op.getValueType(); SDValue Tmp3 = DAG.getNode(ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT), DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT))); // If ISD::CTLZ is legal and CTPOP isn't, then do that instead. if (!TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) && TLI.isOperationLegalOrCustom(ISD::CTLZ, VT)) return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(VT.getSizeInBits(), dl, VT), DAG.getNode(ISD::CTLZ, dl, VT, Tmp3)); return DAG.getNode(ISD::CTPOP, dl, VT, Tmp3); } } } bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { SmallVector Results; SDLoc dl(Node); SDValue Tmp1, Tmp2, Tmp3, Tmp4; bool NeedInvert; switch (Node->getOpcode()) { case ISD::CTPOP: case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl); Results.push_back(Tmp1); break; case ISD::BITREVERSE: Results.push_back(ExpandBITREVERSE(Node->getOperand(0), dl)); break; case ISD::BSWAP: Results.push_back(ExpandBSWAP(Node->getOperand(0), dl)); break; case ISD::FRAMEADDR: case ISD::RETURNADDR: case ISD::FRAME_TO_ARGS_OFFSET: Results.push_back(DAG.getConstant(0, dl, Node->getValueType(0))); break; + case ISD::EH_DWARF_CFA: { + SDValue CfaArg = DAG.getSExtOrTrunc(Node->getOperand(0), dl, + TLI.getPointerTy(DAG.getDataLayout())); + SDValue Offset = DAG.getNode(ISD::ADD, dl, + CfaArg.getValueType(), + DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, dl, + CfaArg.getValueType()), + CfaArg); + SDValue FA = DAG.getNode( + ISD::FRAMEADDR, dl, TLI.getPointerTy(DAG.getDataLayout()), + DAG.getConstant(0, dl, TLI.getPointerTy(DAG.getDataLayout()))); + Results.push_back(DAG.getNode(ISD::ADD, dl, FA.getValueType(), + FA, Offset)); + break; + } case ISD::FLT_ROUNDS_: Results.push_back(DAG.getConstant(1, dl, Node->getValueType(0))); break; case ISD::EH_RETURN: case ISD::EH_LABEL: case ISD::PREFETCH: case ISD::VAEND: case ISD::EH_SJLJ_LONGJMP: // If the target didn't expand these, there's nothing to do, so just // preserve the chain and be done. Results.push_back(Node->getOperand(0)); break; case ISD::READCYCLECOUNTER: // If the target didn't expand this, just return 'zero' and preserve the // chain. Results.append(Node->getNumValues() - 1, DAG.getConstant(0, dl, Node->getValueType(0))); Results.push_back(Node->getOperand(0)); break; case ISD::EH_SJLJ_SETJMP: // If the target didn't expand this, just return 'zero' and preserve the // chain. Results.push_back(DAG.getConstant(0, dl, MVT::i32)); Results.push_back(Node->getOperand(0)); break; case ISD::ATOMIC_LOAD: { // There is no libcall for atomic load; fake it with ATOMIC_CMP_SWAP. SDValue Zero = DAG.getConstant(0, dl, Node->getValueType(0)); SDVTList VTs = DAG.getVTList(Node->getValueType(0), MVT::Other); SDValue Swap = DAG.getAtomicCmpSwap( ISD::ATOMIC_CMP_SWAP, dl, cast(Node)->getMemoryVT(), VTs, Node->getOperand(0), Node->getOperand(1), Zero, Zero, cast(Node)->getMemOperand(), cast(Node)->getOrdering(), cast(Node)->getOrdering(), cast(Node)->getSynchScope()); Results.push_back(Swap.getValue(0)); Results.push_back(Swap.getValue(1)); break; } case ISD::ATOMIC_STORE: { // There is no libcall for atomic store; fake it with ATOMIC_SWAP. SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl, cast(Node)->getMemoryVT(), Node->getOperand(0), Node->getOperand(1), Node->getOperand(2), cast(Node)->getMemOperand(), cast(Node)->getOrdering(), cast(Node)->getSynchScope()); Results.push_back(Swap.getValue(1)); break; } case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: { // Expanding an ATOMIC_CMP_SWAP_WITH_SUCCESS produces an ATOMIC_CMP_SWAP and // splits out the success value as a comparison. Expanding the resulting // ATOMIC_CMP_SWAP will produce a libcall. SDVTList VTs = DAG.getVTList(Node->getValueType(0), MVT::Other); SDValue Res = DAG.getAtomicCmpSwap( ISD::ATOMIC_CMP_SWAP, dl, cast(Node)->getMemoryVT(), VTs, Node->getOperand(0), Node->getOperand(1), Node->getOperand(2), Node->getOperand(3), cast(Node)->getMemOperand(), cast(Node)->getSuccessOrdering(), cast(Node)->getFailureOrdering(), cast(Node)->getSynchScope()); SDValue ExtRes = Res; SDValue LHS = Res; SDValue RHS = Node->getOperand(1); EVT AtomicType = cast(Node)->getMemoryVT(); EVT OuterType = Node->getValueType(0); switch (TLI.getExtendForAtomicOps()) { case ISD::SIGN_EXTEND: LHS = DAG.getNode(ISD::AssertSext, dl, OuterType, Res, DAG.getValueType(AtomicType)); RHS = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, OuterType, Node->getOperand(2), DAG.getValueType(AtomicType)); ExtRes = LHS; break; case ISD::ZERO_EXTEND: LHS = DAG.getNode(ISD::AssertZext, dl, OuterType, Res, DAG.getValueType(AtomicType)); RHS = DAG.getNode(ISD::ZERO_EXTEND, dl, OuterType, Node->getOperand(2)); ExtRes = LHS; break; case ISD::ANY_EXTEND: LHS = DAG.getZeroExtendInReg(Res, dl, AtomicType); RHS = DAG.getNode(ISD::ZERO_EXTEND, dl, OuterType, Node->getOperand(2)); break; default: llvm_unreachable("Invalid atomic op extension"); } SDValue Success = DAG.getSetCC(dl, Node->getValueType(1), LHS, RHS, ISD::SETEQ); Results.push_back(ExtRes.getValue(0)); Results.push_back(Success); Results.push_back(Res.getValue(1)); break; } case ISD::DYNAMIC_STACKALLOC: ExpandDYNAMIC_STACKALLOC(Node, Results); break; case ISD::MERGE_VALUES: for (unsigned i = 0; i < Node->getNumValues(); i++) Results.push_back(Node->getOperand(i)); break; case ISD::UNDEF: { EVT VT = Node->getValueType(0); if (VT.isInteger()) Results.push_back(DAG.getConstant(0, dl, VT)); else { assert(VT.isFloatingPoint() && "Unknown value type!"); Results.push_back(DAG.getConstantFP(0, dl, VT)); } break; } case ISD::FP_ROUND: case ISD::BITCAST: Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0), Node->getValueType(0), dl); Results.push_back(Tmp1); break; case ISD::FP_EXTEND: Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getOperand(0).getValueType(), Node->getValueType(0), dl); Results.push_back(Tmp1); break; case ISD::SIGN_EXTEND_INREG: { // NOTE: we could fall back on load/store here too for targets without // SAR. However, it is doubtful that any exist. EVT ExtraVT = cast(Node->getOperand(1))->getVT(); EVT VT = Node->getValueType(0); EVT ShiftAmountTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); if (VT.isVector()) ShiftAmountTy = VT; unsigned BitsDiff = VT.getScalarType().getSizeInBits() - ExtraVT.getScalarType().getSizeInBits(); SDValue ShiftCst = DAG.getConstant(BitsDiff, dl, ShiftAmountTy); Tmp1 = DAG.getNode(ISD::SHL, dl, Node->getValueType(0), Node->getOperand(0), ShiftCst); Tmp1 = DAG.getNode(ISD::SRA, dl, Node->getValueType(0), Tmp1, ShiftCst); Results.push_back(Tmp1); break; } case ISD::FP_ROUND_INREG: { // The only way we can lower this is to turn it into a TRUNCSTORE, // EXTLOAD pair, targeting a temporary location (a stack slot). // NOTE: there is a choice here between constantly creating new stack // slots and always reusing the same one. We currently always create // new ones, as reuse may inhibit scheduling. EVT ExtraVT = cast(Node->getOperand(1))->getVT(); Tmp1 = EmitStackConvert(Node->getOperand(0), ExtraVT, Node->getValueType(0), dl); Results.push_back(Tmp1); break; } case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: Tmp1 = ExpandLegalINT_TO_FP(Node->getOpcode() == ISD::SINT_TO_FP, Node->getOperand(0), Node->getValueType(0), dl); Results.push_back(Tmp1); break; case ISD::FP_TO_SINT: if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG)) Results.push_back(Tmp1); break; case ISD::FP_TO_UINT: { SDValue True, False; EVT VT = Node->getOperand(0).getValueType(); EVT NVT = Node->getValueType(0); APFloat apf(DAG.EVTToAPFloatSemantics(VT), APInt::getNullValue(VT.getSizeInBits())); APInt x = APInt::getSignBit(NVT.getSizeInBits()); (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven); Tmp1 = DAG.getConstantFP(apf, dl, VT); Tmp2 = DAG.getSetCC(dl, getSetCCResultType(VT), Node->getOperand(0), Tmp1, ISD::SETLT); True = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, Node->getOperand(0)); // TODO: Should any fast-math-flags be set for the FSUB? False = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, DAG.getNode(ISD::FSUB, dl, VT, Node->getOperand(0), Tmp1)); False = DAG.getNode(ISD::XOR, dl, NVT, False, DAG.getConstant(x, dl, NVT)); Tmp1 = DAG.getSelect(dl, NVT, Tmp2, True, False); Results.push_back(Tmp1); break; } case ISD::VAARG: Results.push_back(DAG.expandVAArg(Node)); Results.push_back(Results[0].getValue(1)); break; case ISD::VACOPY: Results.push_back(DAG.expandVACopy(Node)); break; case ISD::EXTRACT_VECTOR_ELT: if (Node->getOperand(0).getValueType().getVectorNumElements() == 1) // This must be an access of the only element. Return it. Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), Node->getOperand(0)); else Tmp1 = ExpandExtractFromVectorThroughStack(SDValue(Node, 0)); Results.push_back(Tmp1); break; case ISD::EXTRACT_SUBVECTOR: Results.push_back(ExpandExtractFromVectorThroughStack(SDValue(Node, 0))); break; case ISD::INSERT_SUBVECTOR: Results.push_back(ExpandInsertToVectorThroughStack(SDValue(Node, 0))); break; case ISD::CONCAT_VECTORS: { Results.push_back(ExpandVectorBuildThroughStack(Node)); break; } case ISD::SCALAR_TO_VECTOR: Results.push_back(ExpandSCALAR_TO_VECTOR(Node)); break; case ISD::INSERT_VECTOR_ELT: Results.push_back(ExpandINSERT_VECTOR_ELT(Node->getOperand(0), Node->getOperand(1), Node->getOperand(2), dl)); break; case ISD::VECTOR_SHUFFLE: { SmallVector NewMask; ArrayRef Mask = cast(Node)->getMask(); EVT VT = Node->getValueType(0); EVT EltVT = VT.getVectorElementType(); SDValue Op0 = Node->getOperand(0); SDValue Op1 = Node->getOperand(1); if (!TLI.isTypeLegal(EltVT)) { EVT NewEltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT); // BUILD_VECTOR operands are allowed to be wider than the element type. // But if NewEltVT is smaller that EltVT the BUILD_VECTOR does not accept // it. if (NewEltVT.bitsLT(EltVT)) { // Convert shuffle node. // If original node was v4i64 and the new EltVT is i32, // cast operands to v8i32 and re-build the mask. // Calculate new VT, the size of the new VT should be equal to original. EVT NewVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, VT.getSizeInBits() / NewEltVT.getSizeInBits()); assert(NewVT.bitsEq(VT)); // cast operands to new VT Op0 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op0); Op1 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op1); // Convert the shuffle mask unsigned int factor = NewVT.getVectorNumElements()/VT.getVectorNumElements(); // EltVT gets smaller assert(factor > 0); for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) { if (Mask[i] < 0) { for (unsigned fi = 0; fi < factor; ++fi) NewMask.push_back(Mask[i]); } else { for (unsigned fi = 0; fi < factor; ++fi) NewMask.push_back(Mask[i]*factor+fi); } } Mask = NewMask; VT = NewVT; } EltVT = NewEltVT; } unsigned NumElems = VT.getVectorNumElements(); SmallVector Ops; for (unsigned i = 0; i != NumElems; ++i) { if (Mask[i] < 0) { Ops.push_back(DAG.getUNDEF(EltVT)); continue; } unsigned Idx = Mask[i]; if (Idx < NumElems) Ops.push_back(DAG.getNode( ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0, DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())))); else Ops.push_back(DAG.getNode( ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op1, DAG.getConstant(Idx - NumElems, dl, TLI.getVectorIdxTy(DAG.getDataLayout())))); } Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); // We may have changed the BUILD_VECTOR type. Cast it back to the Node type. Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), Tmp1); Results.push_back(Tmp1); break; } case ISD::EXTRACT_ELEMENT: { EVT OpTy = Node->getOperand(0).getValueType(); if (cast(Node->getOperand(1))->getZExtValue()) { // 1 -> Hi Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0), DAG.getConstant(OpTy.getSizeInBits() / 2, dl, TLI.getShiftAmountTy( Node->getOperand(0).getValueType(), DAG.getDataLayout()))); Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Tmp1); } else { // 0 -> Lo Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Node->getOperand(0)); } Results.push_back(Tmp1); break; } case ISD::STACKSAVE: // Expand to CopyFromReg if the target set // StackPointerRegisterToSaveRestore. if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) { Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, SP, Node->getValueType(0))); Results.push_back(Results[0].getValue(1)); } else { Results.push_back(DAG.getUNDEF(Node->getValueType(0))); Results.push_back(Node->getOperand(0)); } break; case ISD::STACKRESTORE: // Expand to CopyToReg if the target set // StackPointerRegisterToSaveRestore. if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) { Results.push_back(DAG.getCopyToReg(Node->getOperand(0), dl, SP, Node->getOperand(1))); } else { Results.push_back(Node->getOperand(0)); } break; case ISD::GET_DYNAMIC_AREA_OFFSET: Results.push_back(DAG.getConstant(0, dl, Node->getValueType(0))); Results.push_back(Results[0].getValue(0)); break; case ISD::FCOPYSIGN: Results.push_back(ExpandFCOPYSIGN(Node)); break; case ISD::FNEG: // Expand Y = FNEG(X) -> Y = SUB -0.0, X Tmp1 = DAG.getConstantFP(-0.0, dl, Node->getValueType(0)); // TODO: If FNEG has fast-math-flags, propagate them to the FSUB. Tmp1 = DAG.getNode(ISD::FSUB, dl, Node->getValueType(0), Tmp1, Node->getOperand(0)); Results.push_back(Tmp1); break; case ISD::FABS: Results.push_back(ExpandFABS(Node)); break; case ISD::SMIN: case ISD::SMAX: case ISD::UMIN: case ISD::UMAX: { // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B ISD::CondCode Pred; switch (Node->getOpcode()) { default: llvm_unreachable("How did we get here?"); case ISD::SMAX: Pred = ISD::SETGT; break; case ISD::SMIN: Pred = ISD::SETLT; break; case ISD::UMAX: Pred = ISD::SETUGT; break; case ISD::UMIN: Pred = ISD::SETULT; break; } Tmp1 = Node->getOperand(0); Tmp2 = Node->getOperand(1); Tmp1 = DAG.getSelectCC(dl, Tmp1, Tmp2, Tmp1, Tmp2, Pred); Results.push_back(Tmp1); break; } case ISD::FSIN: case ISD::FCOS: { EVT VT = Node->getValueType(0); // Turn fsin / fcos into ISD::FSINCOS node if there are a pair of fsin / // fcos which share the same operand and both are used. if ((TLI.isOperationLegalOrCustom(ISD::FSINCOS, VT) || canCombineSinCosLibcall(Node, TLI, TM)) && useSinCos(Node)) { SDVTList VTs = DAG.getVTList(VT, VT); Tmp1 = DAG.getNode(ISD::FSINCOS, dl, VTs, Node->getOperand(0)); if (Node->getOpcode() == ISD::FCOS) Tmp1 = Tmp1.getValue(1); Results.push_back(Tmp1); } break; } case ISD::FMAD: llvm_unreachable("Illegal fmad should never be formed"); case ISD::FP16_TO_FP: if (Node->getValueType(0) != MVT::f32) { // We can extend to types bigger than f32 in two steps without changing // the result. Since "f16 -> f32" is much more commonly available, give // CodeGen the option of emitting that before resorting to a libcall. SDValue Res = DAG.getNode(ISD::FP16_TO_FP, dl, MVT::f32, Node->getOperand(0)); Results.push_back( DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Res)); } break; case ISD::FP_TO_FP16: if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) { SDValue Op = Node->getOperand(0); MVT SVT = Op.getSimpleValueType(); if ((SVT == MVT::f64 || SVT == MVT::f80) && TLI.isOperationLegalOrCustom(ISD::FP_TO_FP16, MVT::f32)) { // Under fastmath, we can expand this node into a fround followed by // a float-half conversion. SDValue FloatVal = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Op, DAG.getIntPtrConstant(0, dl)); Results.push_back( DAG.getNode(ISD::FP_TO_FP16, dl, Node->getValueType(0), FloatVal)); } } break; case ISD::ConstantFP: { ConstantFPSDNode *CFP = cast(Node); // Check to see if this FP immediate is already legal. // If this is a legal constant, turn it into a TargetConstantFP node. if (!TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0))) Results.push_back(ExpandConstantFP(CFP, true)); break; } case ISD::Constant: { ConstantSDNode *CP = cast(Node); Results.push_back(ExpandConstant(CP)); break; } case ISD::FSUB: { EVT VT = Node->getValueType(0); if (TLI.isOperationLegalOrCustom(ISD::FADD, VT) && TLI.isOperationLegalOrCustom(ISD::FNEG, VT)) { const SDNodeFlags *Flags = &cast(Node)->Flags; Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1)); Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1, Flags); Results.push_back(Tmp1); } break; } case ISD::SUB: { EVT VT = Node->getValueType(0); assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) && TLI.isOperationLegalOrCustom(ISD::XOR, VT) && "Don't know how to expand this subtraction!"); Tmp1 = DAG.getNode(ISD::XOR, dl, VT, Node->getOperand(1), DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), dl, VT)); Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp1, DAG.getConstant(1, dl, VT)); Results.push_back(DAG.getNode(ISD::ADD, dl, VT, Node->getOperand(0), Tmp1)); break; } case ISD::UREM: case ISD::SREM: { EVT VT = Node->getValueType(0); bool isSigned = Node->getOpcode() == ISD::SREM; unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV; unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; Tmp2 = Node->getOperand(0); Tmp3 = Node->getOperand(1); if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) { SDVTList VTs = DAG.getVTList(VT, VT); Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1); Results.push_back(Tmp1); } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) { // X % Y -> X-X/Y*Y Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3); Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3); Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1); Results.push_back(Tmp1); } break; } case ISD::UDIV: case ISD::SDIV: { bool isSigned = Node->getOpcode() == ISD::SDIV; unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; EVT VT = Node->getValueType(0); if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) { SDVTList VTs = DAG.getVTList(VT, VT); Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0), Node->getOperand(1)); Results.push_back(Tmp1); } break; } case ISD::MULHU: case ISD::MULHS: { unsigned ExpandOpcode = Node->getOpcode() == ISD::MULHU ? ISD::UMUL_LOHI : ISD::SMUL_LOHI; EVT VT = Node->getValueType(0); SDVTList VTs = DAG.getVTList(VT, VT); assert(TLI.isOperationLegalOrCustom(ExpandOpcode, VT) && "If this wasn't legal, it shouldn't have been created!"); Tmp1 = DAG.getNode(ExpandOpcode, dl, VTs, Node->getOperand(0), Node->getOperand(1)); Results.push_back(Tmp1.getValue(1)); break; } case ISD::MUL: { EVT VT = Node->getValueType(0); SDVTList VTs = DAG.getVTList(VT, VT); // See if multiply or divide can be lowered using two-result operations. // We just need the low half of the multiply; try both the signed // and unsigned forms. If the target supports both SMUL_LOHI and // UMUL_LOHI, form a preference by checking which forms of plain // MULH it supports. bool HasSMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::SMUL_LOHI, VT); bool HasUMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::UMUL_LOHI, VT); bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, VT); bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, VT); unsigned OpToUse = 0; if (HasSMUL_LOHI && !HasMULHS) { OpToUse = ISD::SMUL_LOHI; } else if (HasUMUL_LOHI && !HasMULHU) { OpToUse = ISD::UMUL_LOHI; } else if (HasSMUL_LOHI) { OpToUse = ISD::SMUL_LOHI; } else if (HasUMUL_LOHI) { OpToUse = ISD::UMUL_LOHI; } if (OpToUse) { Results.push_back(DAG.getNode(OpToUse, dl, VTs, Node->getOperand(0), Node->getOperand(1))); break; } SDValue Lo, Hi; EVT HalfType = VT.getHalfSizedIntegerVT(*DAG.getContext()); if (TLI.isOperationLegalOrCustom(ISD::ZERO_EXTEND, VT) && TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND, VT) && TLI.isOperationLegalOrCustom(ISD::SHL, VT) && TLI.isOperationLegalOrCustom(ISD::OR, VT) && TLI.expandMUL(Node, Lo, Hi, HalfType, DAG)) { Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo); Hi = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Hi); SDValue Shift = DAG.getConstant(HalfType.getSizeInBits(), dl, TLI.getShiftAmountTy(HalfType, DAG.getDataLayout())); Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift); Results.push_back(DAG.getNode(ISD::OR, dl, VT, Lo, Hi)); } break; } case ISD::SADDO: case ISD::SSUBO: { SDValue LHS = Node->getOperand(0); SDValue RHS = Node->getOperand(1); SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ? ISD::ADD : ISD::SUB, dl, LHS.getValueType(), LHS, RHS); Results.push_back(Sum); EVT ResultType = Node->getValueType(1); EVT OType = getSetCCResultType(Node->getValueType(0)); SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType()); // LHSSign -> LHS >= 0 // RHSSign -> RHS >= 0 // SumSign -> Sum >= 0 // // Add: // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) // Sub: // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) // SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE); SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE); SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign, Node->getOpcode() == ISD::SADDO ? ISD::SETEQ : ISD::SETNE); SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE); SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE); SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE); Results.push_back(DAG.getBoolExtOrTrunc(Cmp, dl, ResultType, ResultType)); break; } case ISD::UADDO: case ISD::USUBO: { SDValue LHS = Node->getOperand(0); SDValue RHS = Node->getOperand(1); SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::UADDO ? ISD::ADD : ISD::SUB, dl, LHS.getValueType(), LHS, RHS); Results.push_back(Sum); EVT ResultType = Node->getValueType(1); EVT SetCCType = getSetCCResultType(Node->getValueType(0)); ISD::CondCode CC = Node->getOpcode() == ISD::UADDO ? ISD::SETULT : ISD::SETUGT; SDValue SetCC = DAG.getSetCC(dl, SetCCType, Sum, LHS, CC); Results.push_back(DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType)); break; } case ISD::UMULO: case ISD::SMULO: { EVT VT = Node->getValueType(0); EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2); SDValue LHS = Node->getOperand(0); SDValue RHS = Node->getOperand(1); SDValue BottomHalf; SDValue TopHalf; static const unsigned Ops[2][3] = { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND }, { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }}; bool isSigned = Node->getOpcode() == ISD::SMULO; if (TLI.isOperationLegalOrCustom(Ops[isSigned][0], VT)) { BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS); TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS); } else if (TLI.isOperationLegalOrCustom(Ops[isSigned][1], VT)) { BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS, RHS); TopHalf = BottomHalf.getValue(1); } else if (TLI.isTypeLegal(WideVT)) { LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS); RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS); Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS); BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1, DAG.getIntPtrConstant(0, dl)); TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1, DAG.getIntPtrConstant(1, dl)); } else { // We can fall back to a libcall with an illegal type for the MUL if we // have a libcall big enough. // Also, we can fall back to a division in some cases, but that's a big // performance hit in the general case. RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (WideVT == MVT::i16) LC = RTLIB::MUL_I16; else if (WideVT == MVT::i32) LC = RTLIB::MUL_I32; else if (WideVT == MVT::i64) LC = RTLIB::MUL_I64; else if (WideVT == MVT::i128) LC = RTLIB::MUL_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!"); // The high part is obtained by SRA'ing all but one of the bits of low // part. unsigned LoSize = VT.getSizeInBits(); SDValue HiLHS = DAG.getNode(ISD::SRA, dl, VT, RHS, DAG.getConstant(LoSize - 1, dl, TLI.getPointerTy(DAG.getDataLayout()))); SDValue HiRHS = DAG.getNode(ISD::SRA, dl, VT, LHS, DAG.getConstant(LoSize - 1, dl, TLI.getPointerTy(DAG.getDataLayout()))); // Here we're passing the 2 arguments explicitly as 4 arguments that are // pre-lowered to the correct types. This all depends upon WideVT not // being a legal type for the architecture and thus has to be split to // two arguments. SDValue Args[] = { LHS, HiLHS, RHS, HiRHS }; SDValue Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl); BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, DAG.getIntPtrConstant(0, dl)); TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, DAG.getIntPtrConstant(1, dl)); // Ret is a node with an illegal type. Because such things are not // generally permitted during this phase of legalization, make sure the // node has no more uses. The above EXTRACT_ELEMENT nodes should have been // folded. assert(Ret->use_empty() && "Unexpected uses of illegally type from expanded lib call."); } if (isSigned) { Tmp1 = DAG.getConstant( VT.getSizeInBits() - 1, dl, TLI.getShiftAmountTy(BottomHalf.getValueType(), DAG.getDataLayout())); Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, Tmp1); TopHalf = DAG.getSetCC(dl, getSetCCResultType(VT), TopHalf, Tmp1, ISD::SETNE); } else { TopHalf = DAG.getSetCC(dl, getSetCCResultType(VT), TopHalf, DAG.getConstant(0, dl, VT), ISD::SETNE); } Results.push_back(BottomHalf); Results.push_back(TopHalf); break; } case ISD::BUILD_PAIR: { EVT PairTy = Node->getValueType(0); Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, PairTy, Node->getOperand(0)); Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, PairTy, Node->getOperand(1)); Tmp2 = DAG.getNode( ISD::SHL, dl, PairTy, Tmp2, DAG.getConstant(PairTy.getSizeInBits() / 2, dl, TLI.getShiftAmountTy(PairTy, DAG.getDataLayout()))); Results.push_back(DAG.getNode(ISD::OR, dl, PairTy, Tmp1, Tmp2)); break; } case ISD::SELECT: Tmp1 = Node->getOperand(0); Tmp2 = Node->getOperand(1); Tmp3 = Node->getOperand(2); if (Tmp1.getOpcode() == ISD::SETCC) { Tmp1 = DAG.getSelectCC(dl, Tmp1.getOperand(0), Tmp1.getOperand(1), Tmp2, Tmp3, cast(Tmp1.getOperand(2))->get()); } else { Tmp1 = DAG.getSelectCC(dl, Tmp1, DAG.getConstant(0, dl, Tmp1.getValueType()), Tmp2, Tmp3, ISD::SETNE); } Results.push_back(Tmp1); break; case ISD::BR_JT: { SDValue Chain = Node->getOperand(0); SDValue Table = Node->getOperand(1); SDValue Index = Node->getOperand(2); EVT PTy = TLI.getPointerTy(DAG.getDataLayout()); const DataLayout &TD = DAG.getDataLayout(); unsigned EntrySize = DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD); Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index, DAG.getConstant(EntrySize, dl, Index.getValueType())); SDValue Addr = DAG.getNode(ISD::ADD, dl, Index.getValueType(), Index, Table); EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8); SDValue LD = DAG.getExtLoad( ISD::SEXTLOAD, dl, PTy, Chain, Addr, MachinePointerInfo::getJumpTable(DAG.getMachineFunction()), MemVT); Addr = LD; if (TM.isPositionIndependent()) { // For PIC, the sequence is: // BRIND(load(Jumptable + index) + RelocBase) // RelocBase can be JumpTable, GOT or some sort of global base. Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, TLI.getPICJumpTableRelocBase(Table, DAG)); } Tmp1 = DAG.getNode(ISD::BRIND, dl, MVT::Other, LD.getValue(1), Addr); Results.push_back(Tmp1); break; } case ISD::BRCOND: // Expand brcond's setcc into its constituent parts and create a BR_CC // Node. Tmp1 = Node->getOperand(0); Tmp2 = Node->getOperand(1); if (Tmp2.getOpcode() == ISD::SETCC) { Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1, Tmp2.getOperand(2), Tmp2.getOperand(0), Tmp2.getOperand(1), Node->getOperand(2)); } else { // We test only the i1 bit. Skip the AND if UNDEF. Tmp3 = (Tmp2.isUndef()) ? Tmp2 : DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2, DAG.getConstant(1, dl, Tmp2.getValueType())); Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1, DAG.getCondCode(ISD::SETNE), Tmp3, DAG.getConstant(0, dl, Tmp3.getValueType()), Node->getOperand(2)); } Results.push_back(Tmp1); break; case ISD::SETCC: { Tmp1 = Node->getOperand(0); Tmp2 = Node->getOperand(1); Tmp3 = Node->getOperand(2); bool Legalized = LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, Tmp3, NeedInvert, dl); if (Legalized) { // If we expanded the SETCC by swapping LHS and RHS, or by inverting the // condition code, create a new SETCC node. if (Tmp3.getNode()) Tmp1 = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), Tmp1, Tmp2, Tmp3); // If we expanded the SETCC by inverting the condition code, then wrap // the existing SETCC in a NOT to restore the intended condition. if (NeedInvert) Tmp1 = DAG.getLogicalNOT(dl, Tmp1, Tmp1->getValueType(0)); Results.push_back(Tmp1); break; } // Otherwise, SETCC for the given comparison type must be completely // illegal; expand it into a SELECT_CC. EVT VT = Node->getValueType(0); int TrueValue; switch (TLI.getBooleanContents(Tmp1->getValueType(0))) { case TargetLowering::ZeroOrOneBooleanContent: case TargetLowering::UndefinedBooleanContent: TrueValue = 1; break; case TargetLowering::ZeroOrNegativeOneBooleanContent: TrueValue = -1; break; } Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, VT, Tmp1, Tmp2, DAG.getConstant(TrueValue, dl, VT), DAG.getConstant(0, dl, VT), Tmp3); Results.push_back(Tmp1); break; } case ISD::SELECT_CC: { Tmp1 = Node->getOperand(0); // LHS Tmp2 = Node->getOperand(1); // RHS Tmp3 = Node->getOperand(2); // True Tmp4 = Node->getOperand(3); // False EVT VT = Node->getValueType(0); SDValue CC = Node->getOperand(4); ISD::CondCode CCOp = cast(CC)->get(); if (TLI.isCondCodeLegal(CCOp, Tmp1.getSimpleValueType())) { // If the condition code is legal, then we need to expand this // node using SETCC and SELECT. EVT CmpVT = Tmp1.getValueType(); assert(!TLI.isOperationExpand(ISD::SELECT, VT) && "Cannot expand ISD::SELECT_CC when ISD::SELECT also needs to be " "expanded."); EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT); SDValue Cond = DAG.getNode(ISD::SETCC, dl, CCVT, Tmp1, Tmp2, CC); Results.push_back(DAG.getSelect(dl, VT, Cond, Tmp3, Tmp4)); break; } // SELECT_CC is legal, so the condition code must not be. bool Legalized = false; // Try to legalize by inverting the condition. This is for targets that // might support an ordered version of a condition, but not the unordered // version (or vice versa). ISD::CondCode InvCC = ISD::getSetCCInverse(CCOp, Tmp1.getValueType().isInteger()); if (TLI.isCondCodeLegal(InvCC, Tmp1.getSimpleValueType())) { // Use the new condition code and swap true and false Legalized = true; Tmp1 = DAG.getSelectCC(dl, Tmp1, Tmp2, Tmp4, Tmp3, InvCC); } else { // If The inverse is not legal, then try to swap the arguments using // the inverse condition code. ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InvCC); if (TLI.isCondCodeLegal(SwapInvCC, Tmp1.getSimpleValueType())) { // The swapped inverse condition is legal, so swap true and false, // lhs and rhs. Legalized = true; Tmp1 = DAG.getSelectCC(dl, Tmp2, Tmp1, Tmp4, Tmp3, SwapInvCC); } } if (!Legalized) { Legalized = LegalizeSetCCCondCode( getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC, NeedInvert, dl); assert(Legalized && "Can't legalize SELECT_CC with legal condition!"); // If we expanded the SETCC by inverting the condition code, then swap // the True/False operands to match. if (NeedInvert) std::swap(Tmp3, Tmp4); // If we expanded the SETCC by swapping LHS and RHS, or by inverting the // condition code, create a new SELECT_CC node. if (CC.getNode()) { Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2, Tmp3, Tmp4, CC); } else { Tmp2 = DAG.getConstant(0, dl, Tmp1.getValueType()); CC = DAG.getCondCode(ISD::SETNE); Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2, Tmp3, Tmp4, CC); } } Results.push_back(Tmp1); break; } case ISD::BR_CC: { Tmp1 = Node->getOperand(0); // Chain Tmp2 = Node->getOperand(2); // LHS Tmp3 = Node->getOperand(3); // RHS Tmp4 = Node->getOperand(1); // CC bool Legalized = LegalizeSetCCCondCode(getSetCCResultType( Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, NeedInvert, dl); (void)Legalized; assert(Legalized && "Can't legalize BR_CC with legal condition!"); // If we expanded the SETCC by inverting the condition code, then wrap // the existing SETCC in a NOT to restore the intended condition. if (NeedInvert) Tmp4 = DAG.getNOT(dl, Tmp4, Tmp4->getValueType(0)); // If we expanded the SETCC by swapping LHS and RHS, create a new BR_CC // node. if (Tmp4.getNode()) { Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2, Tmp3, Node->getOperand(4)); } else { Tmp3 = DAG.getConstant(0, dl, Tmp2.getValueType()); Tmp4 = DAG.getCondCode(ISD::SETNE); Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2, Tmp3, Node->getOperand(4)); } Results.push_back(Tmp1); break; } case ISD::BUILD_VECTOR: Results.push_back(ExpandBUILD_VECTOR(Node)); break; case ISD::SRA: case ISD::SRL: case ISD::SHL: { // Scalarize vector SRA/SRL/SHL. EVT VT = Node->getValueType(0); assert(VT.isVector() && "Unable to legalize non-vector shift"); assert(TLI.isTypeLegal(VT.getScalarType())&& "Element type must be legal"); unsigned NumElem = VT.getVectorNumElements(); SmallVector Scalars; for (unsigned Idx = 0; Idx < NumElem; Idx++) { SDValue Ex = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(), Node->getOperand(0), DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); SDValue Sh = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(), Node->getOperand(1), DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); Scalars.push_back(DAG.getNode(Node->getOpcode(), dl, VT.getScalarType(), Ex, Sh)); } SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), Scalars); ReplaceNode(SDValue(Node, 0), Result); break; } case ISD::GLOBAL_OFFSET_TABLE: case ISD::GlobalAddress: case ISD::GlobalTLSAddress: case ISD::ExternalSymbol: case ISD::ConstantPool: case ISD::JumpTable: case ISD::INTRINSIC_W_CHAIN: case ISD::INTRINSIC_WO_CHAIN: case ISD::INTRINSIC_VOID: // FIXME: Custom lowering for these operations shouldn't return null! break; } // Replace the original node with the legalized result. if (Results.empty()) return false; ReplaceNode(Node, Results.data()); return true; } void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { SmallVector Results; SDLoc dl(Node); SDValue Tmp1, Tmp2, Tmp3, Tmp4; unsigned Opc = Node->getOpcode(); switch (Opc) { case ISD::ATOMIC_FENCE: { // If the target didn't lower this, lower it to '__sync_synchronize()' call // FIXME: handle "fence singlethread" more efficiently. TargetLowering::ArgListTy Args; TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl) .setChain(Node->getOperand(0)) .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), DAG.getExternalSymbol("__sync_synchronize", TLI.getPointerTy(DAG.getDataLayout())), std::move(Args)); std::pair CallResult = TLI.LowerCallTo(CLI); Results.push_back(CallResult.second); break; } // By default, atomic intrinsics are marked Legal and lowered. Targets // which don't support them directly, however, may want libcalls, in which // case they mark them Expand, and we get here. case ISD::ATOMIC_SWAP: case ISD::ATOMIC_LOAD_ADD: case ISD::ATOMIC_LOAD_SUB: case ISD::ATOMIC_LOAD_AND: case ISD::ATOMIC_LOAD_OR: case ISD::ATOMIC_LOAD_XOR: case ISD::ATOMIC_LOAD_NAND: case ISD::ATOMIC_LOAD_MIN: case ISD::ATOMIC_LOAD_MAX: case ISD::ATOMIC_LOAD_UMIN: case ISD::ATOMIC_LOAD_UMAX: case ISD::ATOMIC_CMP_SWAP: { MVT VT = cast(Node)->getMemoryVT().getSimpleVT(); RTLIB::Libcall LC = RTLIB::getSYNC(Opc, VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!"); std::pair Tmp = ExpandChainLibCall(LC, Node, false); Results.push_back(Tmp.first); Results.push_back(Tmp.second); break; } case ISD::TRAP: { // If this operation is not supported, lower it to 'abort()' call TargetLowering::ArgListTy Args; TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl) .setChain(Node->getOperand(0)) .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), DAG.getExternalSymbol("abort", TLI.getPointerTy(DAG.getDataLayout())), std::move(Args)); std::pair CallResult = TLI.LowerCallTo(CLI); Results.push_back(CallResult.second); break; } case ISD::FMINNUM: Results.push_back(ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64, RTLIB::FMIN_F80, RTLIB::FMIN_F128, RTLIB::FMIN_PPCF128)); break; case ISD::FMAXNUM: Results.push_back(ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64, RTLIB::FMAX_F80, RTLIB::FMAX_F128, RTLIB::FMAX_PPCF128)); break; case ISD::FSQRT: Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64, RTLIB::SQRT_F80, RTLIB::SQRT_F128, RTLIB::SQRT_PPCF128)); break; case ISD::FSIN: Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64, RTLIB::SIN_F80, RTLIB::SIN_F128, RTLIB::SIN_PPCF128)); break; case ISD::FCOS: Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64, RTLIB::COS_F80, RTLIB::COS_F128, RTLIB::COS_PPCF128)); break; case ISD::FSINCOS: // Expand into sincos libcall. ExpandSinCosLibCall(Node, Results); break; case ISD::FLOG: Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64, RTLIB::LOG_F80, RTLIB::LOG_F128, RTLIB::LOG_PPCF128)); break; case ISD::FLOG2: Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64, RTLIB::LOG2_F80, RTLIB::LOG2_F128, RTLIB::LOG2_PPCF128)); break; case ISD::FLOG10: Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64, RTLIB::LOG10_F80, RTLIB::LOG10_F128, RTLIB::LOG10_PPCF128)); break; case ISD::FEXP: Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64, RTLIB::EXP_F80, RTLIB::EXP_F128, RTLIB::EXP_PPCF128)); break; case ISD::FEXP2: Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64, RTLIB::EXP2_F80, RTLIB::EXP2_F128, RTLIB::EXP2_PPCF128)); break; case ISD::FTRUNC: Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, RTLIB::TRUNC_PPCF128)); break; case ISD::FFLOOR: Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, RTLIB::FLOOR_PPCF128)); break; case ISD::FCEIL: Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64, RTLIB::CEIL_F80, RTLIB::CEIL_F128, RTLIB::CEIL_PPCF128)); break; case ISD::FRINT: Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64, RTLIB::RINT_F80, RTLIB::RINT_F128, RTLIB::RINT_PPCF128)); break; case ISD::FNEARBYINT: Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32, RTLIB::NEARBYINT_F64, RTLIB::NEARBYINT_F80, RTLIB::NEARBYINT_F128, RTLIB::NEARBYINT_PPCF128)); break; case ISD::FROUND: Results.push_back(ExpandFPLibCall(Node, RTLIB::ROUND_F32, RTLIB::ROUND_F64, RTLIB::ROUND_F80, RTLIB::ROUND_F128, RTLIB::ROUND_PPCF128)); break; case ISD::FPOWI: Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64, RTLIB::POWI_F80, RTLIB::POWI_F128, RTLIB::POWI_PPCF128)); break; case ISD::FPOW: Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64, RTLIB::POW_F80, RTLIB::POW_F128, RTLIB::POW_PPCF128)); break; case ISD::FDIV: Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64, RTLIB::DIV_F80, RTLIB::DIV_F128, RTLIB::DIV_PPCF128)); break; case ISD::FREM: Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64, RTLIB::REM_F80, RTLIB::REM_F128, RTLIB::REM_PPCF128)); break; case ISD::FMA: Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64, RTLIB::FMA_F80, RTLIB::FMA_F128, RTLIB::FMA_PPCF128)); break; case ISD::FADD: Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64, RTLIB::ADD_F80, RTLIB::ADD_F128, RTLIB::ADD_PPCF128)); break; case ISD::FMUL: Results.push_back(ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64, RTLIB::MUL_F80, RTLIB::MUL_F128, RTLIB::MUL_PPCF128)); break; case ISD::FP16_TO_FP: if (Node->getValueType(0) == MVT::f32) { Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false)); } break; case ISD::FP_TO_FP16: { RTLIB::Libcall LC = RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::f16); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to expand fp_to_fp16"); Results.push_back(ExpandLibCall(LC, Node, false)); break; } case ISD::FSUB: Results.push_back(ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64, RTLIB::SUB_F80, RTLIB::SUB_F128, RTLIB::SUB_PPCF128)); break; case ISD::SREM: Results.push_back(ExpandIntLibCall(Node, true, RTLIB::SREM_I8, RTLIB::SREM_I16, RTLIB::SREM_I32, RTLIB::SREM_I64, RTLIB::SREM_I128)); break; case ISD::UREM: Results.push_back(ExpandIntLibCall(Node, false, RTLIB::UREM_I8, RTLIB::UREM_I16, RTLIB::UREM_I32, RTLIB::UREM_I64, RTLIB::UREM_I128)); break; case ISD::SDIV: Results.push_back(ExpandIntLibCall(Node, true, RTLIB::SDIV_I8, RTLIB::SDIV_I16, RTLIB::SDIV_I32, RTLIB::SDIV_I64, RTLIB::SDIV_I128)); break; case ISD::UDIV: Results.push_back(ExpandIntLibCall(Node, false, RTLIB::UDIV_I8, RTLIB::UDIV_I16, RTLIB::UDIV_I32, RTLIB::UDIV_I64, RTLIB::UDIV_I128)); break; case ISD::SDIVREM: case ISD::UDIVREM: // Expand into divrem libcall ExpandDivRemLibCall(Node, Results); break; case ISD::MUL: Results.push_back(ExpandIntLibCall(Node, false, RTLIB::MUL_I8, RTLIB::MUL_I16, RTLIB::MUL_I32, RTLIB::MUL_I64, RTLIB::MUL_I128)); break; } // Replace the original node with the legalized result. if (!Results.empty()) ReplaceNode(Node, Results.data()); } // Determine the vector type to use in place of an original scalar element when // promoting equally sized vectors. static MVT getPromotedVectorElementType(const TargetLowering &TLI, MVT EltVT, MVT NewEltVT) { unsigned OldEltsPerNewElt = EltVT.getSizeInBits() / NewEltVT.getSizeInBits(); MVT MidVT = MVT::getVectorVT(NewEltVT, OldEltsPerNewElt); assert(TLI.isTypeLegal(MidVT) && "unexpected"); return MidVT; } void SelectionDAGLegalize::PromoteNode(SDNode *Node) { SmallVector Results; MVT OVT = Node->getSimpleValueType(0); if (Node->getOpcode() == ISD::UINT_TO_FP || Node->getOpcode() == ISD::SINT_TO_FP || Node->getOpcode() == ISD::SETCC || Node->getOpcode() == ISD::EXTRACT_VECTOR_ELT || Node->getOpcode() == ISD::INSERT_VECTOR_ELT) { OVT = Node->getOperand(0).getSimpleValueType(); } if (Node->getOpcode() == ISD::BR_CC) OVT = Node->getOperand(2).getSimpleValueType(); MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT); SDLoc dl(Node); SDValue Tmp1, Tmp2, Tmp3; switch (Node->getOpcode()) { case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: case ISD::CTPOP: // Zero extend the argument. Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0)); if (Node->getOpcode() == ISD::CTTZ) { // The count is the same in the promoted type except if the original // value was zero. This can be handled by setting the bit just off // the top of the original type. auto TopBit = APInt::getOneBitSet(NVT.getSizeInBits(), OVT.getSizeInBits()); Tmp1 = DAG.getNode(ISD::OR, dl, NVT, Tmp1, DAG.getConstant(TopBit, dl, NVT)); } // Perform the larger operation. For CTPOP and CTTZ_ZERO_UNDEF, this is // already the correct result. Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); if (Node->getOpcode() == ISD::CTLZ || Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) { // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT)) Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1, DAG.getConstant(NVT.getSizeInBits() - OVT.getSizeInBits(), dl, NVT)); } Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1)); break; case ISD::BSWAP: { unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits(); Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0)); Tmp1 = DAG.getNode(ISD::BSWAP, dl, NVT, Tmp1); Tmp1 = DAG.getNode( ISD::SRL, dl, NVT, Tmp1, DAG.getConstant(DiffBits, dl, TLI.getShiftAmountTy(NVT, DAG.getDataLayout()))); Results.push_back(Tmp1); break; } case ISD::FP_TO_UINT: case ISD::FP_TO_SINT: Tmp1 = PromoteLegalFP_TO_INT(Node->getOperand(0), Node->getValueType(0), Node->getOpcode() == ISD::FP_TO_SINT, dl); Results.push_back(Tmp1); break; case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: Tmp1 = PromoteLegalINT_TO_FP(Node->getOperand(0), Node->getValueType(0), Node->getOpcode() == ISD::SINT_TO_FP, dl); Results.push_back(Tmp1); break; case ISD::VAARG: { SDValue Chain = Node->getOperand(0); // Get the chain. SDValue Ptr = Node->getOperand(1); // Get the pointer. unsigned TruncOp; if (OVT.isVector()) { TruncOp = ISD::BITCAST; } else { assert(OVT.isInteger() && "VAARG promotion is supported only for vectors or integer types"); TruncOp = ISD::TRUNCATE; } // Perform the larger operation, then convert back Tmp1 = DAG.getVAArg(NVT, dl, Chain, Ptr, Node->getOperand(2), Node->getConstantOperandVal(3)); Chain = Tmp1.getValue(1); Tmp2 = DAG.getNode(TruncOp, dl, OVT, Tmp1); // Modified the chain result - switch anything that used the old chain to // use the new one. DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp2); DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain); if (UpdatedNodes) { UpdatedNodes->insert(Tmp2.getNode()); UpdatedNodes->insert(Chain.getNode()); } ReplacedNode(Node); break; } case ISD::AND: case ISD::OR: case ISD::XOR: { unsigned ExtOp, TruncOp; if (OVT.isVector()) { ExtOp = ISD::BITCAST; TruncOp = ISD::BITCAST; } else { assert(OVT.isInteger() && "Cannot promote logic operation"); ExtOp = ISD::ANY_EXTEND; TruncOp = ISD::TRUNCATE; } // Promote each of the values to the new type. Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1)); // Perform the larger operation, then convert back Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2); Results.push_back(DAG.getNode(TruncOp, dl, OVT, Tmp1)); break; } case ISD::SELECT: { unsigned ExtOp, TruncOp; if (Node->getValueType(0).isVector() || Node->getValueType(0).getSizeInBits() == NVT.getSizeInBits()) { ExtOp = ISD::BITCAST; TruncOp = ISD::BITCAST; } else if (Node->getValueType(0).isInteger()) { ExtOp = ISD::ANY_EXTEND; TruncOp = ISD::TRUNCATE; } else { ExtOp = ISD::FP_EXTEND; TruncOp = ISD::FP_ROUND; } Tmp1 = Node->getOperand(0); // Promote each of the values to the new type. Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1)); Tmp3 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2)); // Perform the larger operation, then round down. Tmp1 = DAG.getSelect(dl, NVT, Tmp1, Tmp2, Tmp3); if (TruncOp != ISD::FP_ROUND) Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1); else Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1, DAG.getIntPtrConstant(0, dl)); Results.push_back(Tmp1); break; } case ISD::VECTOR_SHUFFLE: { ArrayRef Mask = cast(Node)->getMask(); // Cast the two input vectors. Tmp1 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(1)); // Convert the shuffle mask to the right # elements. Tmp1 = ShuffleWithNarrowerEltType(NVT, OVT, dl, Tmp1, Tmp2, Mask); Tmp1 = DAG.getNode(ISD::BITCAST, dl, OVT, Tmp1); Results.push_back(Tmp1); break; } case ISD::SETCC: { unsigned ExtOp = ISD::FP_EXTEND; if (NVT.isInteger()) { ISD::CondCode CCCode = cast(Node->getOperand(2))->get(); ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; } Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1)); Results.push_back(DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), Tmp1, Tmp2, Node->getOperand(2))); break; } case ISD::BR_CC: { unsigned ExtOp = ISD::FP_EXTEND; if (NVT.isInteger()) { ISD::CondCode CCCode = cast(Node->getOperand(1))->get(); ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; } Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2)); Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(3)); Results.push_back(DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Node->getOperand(0), Node->getOperand(1), Tmp1, Tmp2, Node->getOperand(4))); break; } case ISD::FADD: case ISD::FSUB: case ISD::FMUL: case ISD::FDIV: case ISD::FREM: case ISD::FMINNUM: case ISD::FMAXNUM: case ISD::FPOW: { Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1)); Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, Node->getFlags()); Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp3, DAG.getIntPtrConstant(0, dl))); break; } case ISD::FMA: { Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1)); Tmp3 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(2)); Results.push_back( DAG.getNode(ISD::FP_ROUND, dl, OVT, DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, Tmp3), DAG.getIntPtrConstant(0, dl))); break; } case ISD::FCOPYSIGN: case ISD::FPOWI: { Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = Node->getOperand(1); Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2); // fcopysign doesn't change anything but the sign bit, so // (fp_round (fcopysign (fpext a), b)) // is as precise as // (fp_round (fpext a)) // which is a no-op. Mark it as a TRUNCating FP_ROUND. const bool isTrunc = (Node->getOpcode() == ISD::FCOPYSIGN); Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp3, DAG.getIntPtrConstant(isTrunc, dl))); break; } case ISD::FFLOOR: case ISD::FCEIL: case ISD::FRINT: case ISD::FNEARBYINT: case ISD::FROUND: case ISD::FTRUNC: case ISD::FNEG: case ISD::FSQRT: case ISD::FSIN: case ISD::FCOS: case ISD::FLOG: case ISD::FLOG2: case ISD::FLOG10: case ISD::FABS: case ISD::FEXP: case ISD::FEXP2: { Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp2, DAG.getIntPtrConstant(0, dl))); break; } case ISD::BUILD_VECTOR: { MVT EltVT = OVT.getVectorElementType(); MVT NewEltVT = NVT.getVectorElementType(); // Handle bitcasts to a different vector type with the same total bit size // // e.g. v2i64 = build_vector i64:x, i64:y => v4i32 // => // v4i32 = concat_vectors (v2i32 (bitcast i64:x)), (v2i32 (bitcast i64:y)) assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() && "Invalid promote type for build_vector"); assert(NewEltVT.bitsLT(EltVT) && "not handled"); MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT); SmallVector NewOps; for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I) { SDValue Op = Node->getOperand(I); NewOps.push_back(DAG.getNode(ISD::BITCAST, SDLoc(Op), MidVT, Op)); } SDLoc SL(Node); SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SL, NVT, NewOps); SDValue CvtVec = DAG.getNode(ISD::BITCAST, SL, OVT, Concat); Results.push_back(CvtVec); break; } case ISD::EXTRACT_VECTOR_ELT: { MVT EltVT = OVT.getVectorElementType(); MVT NewEltVT = NVT.getVectorElementType(); // Handle bitcasts to a different vector type with the same total bit size. // // e.g. v2i64 = extract_vector_elt x:v2i64, y:i32 // => // v4i32:castx = bitcast x:v2i64 // // i64 = bitcast // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))), // (i32 (extract_vector_elt castx, (2 * y + 1))) // assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() && "Invalid promote type for extract_vector_elt"); assert(NewEltVT.bitsLT(EltVT) && "not handled"); MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT); unsigned NewEltsPerOldElt = MidVT.getVectorNumElements(); SDValue Idx = Node->getOperand(1); EVT IdxVT = Idx.getValueType(); SDLoc SL(Node); SDValue Factor = DAG.getConstant(NewEltsPerOldElt, SL, IdxVT); SDValue NewBaseIdx = DAG.getNode(ISD::MUL, SL, IdxVT, Idx, Factor); SDValue CastVec = DAG.getNode(ISD::BITCAST, SL, NVT, Node->getOperand(0)); SmallVector NewOps; for (unsigned I = 0; I < NewEltsPerOldElt; ++I) { SDValue IdxOffset = DAG.getConstant(I, SL, IdxVT); SDValue TmpIdx = DAG.getNode(ISD::ADD, SL, IdxVT, NewBaseIdx, IdxOffset); SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, NewEltVT, CastVec, TmpIdx); NewOps.push_back(Elt); } SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, SL, MidVT, NewOps); Results.push_back(DAG.getNode(ISD::BITCAST, SL, EltVT, NewVec)); break; } case ISD::INSERT_VECTOR_ELT: { MVT EltVT = OVT.getVectorElementType(); MVT NewEltVT = NVT.getVectorElementType(); // Handle bitcasts to a different vector type with the same total bit size // // e.g. v2i64 = insert_vector_elt x:v2i64, y:i64, z:i32 // => // v4i32:castx = bitcast x:v2i64 // v2i32:casty = bitcast y:i64 // // v2i64 = bitcast // (v4i32 insert_vector_elt // (v4i32 insert_vector_elt v4i32:castx, // (extract_vector_elt casty, 0), 2 * z), // (extract_vector_elt casty, 1), (2 * z + 1)) assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() && "Invalid promote type for insert_vector_elt"); assert(NewEltVT.bitsLT(EltVT) && "not handled"); MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT); unsigned NewEltsPerOldElt = MidVT.getVectorNumElements(); SDValue Val = Node->getOperand(1); SDValue Idx = Node->getOperand(2); EVT IdxVT = Idx.getValueType(); SDLoc SL(Node); SDValue Factor = DAG.getConstant(NewEltsPerOldElt, SDLoc(), IdxVT); SDValue NewBaseIdx = DAG.getNode(ISD::MUL, SL, IdxVT, Idx, Factor); SDValue CastVec = DAG.getNode(ISD::BITCAST, SL, NVT, Node->getOperand(0)); SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, MidVT, Val); SDValue NewVec = CastVec; for (unsigned I = 0; I < NewEltsPerOldElt; ++I) { SDValue IdxOffset = DAG.getConstant(I, SL, IdxVT); SDValue InEltIdx = DAG.getNode(ISD::ADD, SL, IdxVT, NewBaseIdx, IdxOffset); SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, NewEltVT, CastVal, IdxOffset); NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, SL, NVT, NewVec, Elt, InEltIdx); } Results.push_back(DAG.getNode(ISD::BITCAST, SL, OVT, NewVec)); break; } case ISD::SCALAR_TO_VECTOR: { MVT EltVT = OVT.getVectorElementType(); MVT NewEltVT = NVT.getVectorElementType(); // Handle bitcasts to different vector type with the smae total bit size. // // e.g. v2i64 = scalar_to_vector x:i64 // => // concat_vectors (v2i32 bitcast x:i64), (v2i32 undef) // MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT); SDValue Val = Node->getOperand(0); SDLoc SL(Node); SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, MidVT, Val); SDValue Undef = DAG.getUNDEF(MidVT); SmallVector NewElts; NewElts.push_back(CastVal); for (unsigned I = 1, NElts = OVT.getVectorNumElements(); I != NElts; ++I) NewElts.push_back(Undef); SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SL, NVT, NewElts); SDValue CvtVec = DAG.getNode(ISD::BITCAST, SL, OVT, Concat); Results.push_back(CvtVec); break; } } // Replace the original node with the legalized result. if (!Results.empty()) ReplaceNode(Node, Results.data()); } /// This is the entry point for the file. void SelectionDAG::Legalize() { AssignTopologicalOrder(); SmallPtrSet LegalizedNodes; SelectionDAGLegalize Legalizer(*this, LegalizedNodes); // Visit all the nodes. We start in topological order, so that we see // nodes with their original operands intact. Legalization can produce // new nodes which may themselves need to be legalized. Iterate until all // nodes have been legalized. for (;;) { bool AnyLegalized = false; for (auto NI = allnodes_end(); NI != allnodes_begin();) { --NI; SDNode *N = &*NI; if (N->use_empty() && N != getRoot().getNode()) { ++NI; DeleteNode(N); continue; } if (LegalizedNodes.insert(N).second) { AnyLegalized = true; Legalizer.LegalizeOp(N); if (N->use_empty() && N != getRoot().getNode()) { ++NI; DeleteNode(N); } } } if (!AnyLegalized) break; } // Remove dead nodes now. RemoveDeadNodes(); } bool SelectionDAG::LegalizeOp(SDNode *N, SmallSetVector &UpdatedNodes) { SmallPtrSet LegalizedNodes; SelectionDAGLegalize Legalizer(*this, LegalizedNodes, &UpdatedNodes); // Directly insert the node in question, and legalize it. This will recurse // as needed through operands. LegalizedNodes.insert(N); Legalizer.LegalizeOp(N); return LegalizedNodes.count(N); } Index: projects/clang390-import/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- projects/clang390-import/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (revision 305682) +++ projects/clang390-import/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (revision 305683) @@ -1,9127 +1,9118 @@ //===-- SelectionDAGBuilder.cpp - Selection-DAG building ------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This implements routines for translating from LLVM IR into SelectionDAG IR. // //===----------------------------------------------------------------------===// #include "SelectionDAGBuilder.h" #include "SDNodeDbgValue.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/Statepoint.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetSubtargetInfo.h" #include #include using namespace llvm; #define DEBUG_TYPE "isel" /// LimitFloatPrecision - Generate low-precision inline sequences for /// some float libcalls (6, 8 or 12 bits). static unsigned LimitFloatPrecision; static cl::opt LimitFPPrecision("limit-float-precision", cl::desc("Generate low-precision inline sequences " "for some float libcalls"), cl::location(LimitFloatPrecision), cl::init(0)); static cl::opt EnableFMFInDAG("enable-fmf-dag", cl::init(true), cl::Hidden, cl::desc("Enable fast-math-flags for DAG nodes")); /// Minimum jump table density for normal functions. static cl::opt JumpTableDensity("jump-table-density", cl::init(10), cl::Hidden, cl::desc("Minimum density for building a jump table in " "a normal function")); /// Minimum jump table density for -Os or -Oz functions. static cl::opt OptsizeJumpTableDensity("optsize-jump-table-density", cl::init(40), cl::Hidden, cl::desc("Minimum density for building a jump table in " "an optsize function")); // Limit the width of DAG chains. This is important in general to prevent // DAG-based analysis from blowing up. For example, alias analysis and // load clustering may not complete in reasonable time. It is difficult to // recognize and avoid this situation within each individual analysis, and // future analyses are likely to have the same behavior. Limiting DAG width is // the safe approach and will be especially important with global DAGs. // // MaxParallelChains default is arbitrarily high to avoid affecting // optimization, but could be lowered to improve compile time. Any ld-ld-st-st // sequence over this should have been converted to llvm.memcpy by the // frontend. It is easy to induce this behavior with .ll code such as: // %buffer = alloca [4096 x i8] // %data = load [4096 x i8]* %argPtr // store [4096 x i8] %data, [4096 x i8]* %buffer static const unsigned MaxParallelChains = 64; static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V); /// getCopyFromParts - Create a value that contains the specified legal parts /// combined into the value they represent. If the parts combine to a type /// larger than ValueVT then AssertOp can be used to specify whether the extra /// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT /// (ISD::AssertSext). static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V, Optional AssertOp = None) { if (ValueVT.isVector()) return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT, V); assert(NumParts > 0 && "No parts to assemble!"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Val = Parts[0]; if (NumParts > 1) { // Assemble the value from multiple parts. if (ValueVT.isInteger()) { unsigned PartBits = PartVT.getSizeInBits(); unsigned ValueBits = ValueVT.getSizeInBits(); // Assemble the power of 2 part. unsigned RoundParts = NumParts & (NumParts - 1) ? 1 << Log2_32(NumParts) : NumParts; unsigned RoundBits = PartBits * RoundParts; EVT RoundVT = RoundBits == ValueBits ? ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits); SDValue Lo, Hi; EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2); if (RoundParts > 2) { Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2, PartVT, HalfVT, V); Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2, RoundParts / 2, PartVT, HalfVT, V); } else { Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]); Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]); } if (DAG.getDataLayout().isBigEndian()) std::swap(Lo, Hi); Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi); if (RoundParts < NumParts) { // Assemble the trailing non-power-of-2 part. unsigned OddParts = NumParts - RoundParts; EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits); Hi = getCopyFromParts(DAG, DL, Parts + RoundParts, OddParts, PartVT, OddVT, V); // Combine the round and odd parts. Lo = Val; if (DAG.getDataLayout().isBigEndian()) std::swap(Lo, Hi); EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi); Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi, DAG.getConstant(Lo.getValueType().getSizeInBits(), DL, TLI.getPointerTy(DAG.getDataLayout()))); Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo); Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi); } } else if (PartVT.isFloatingPoint()) { // FP split into multiple FP parts (for ppcf128) assert(ValueVT == EVT(MVT::ppcf128) && PartVT == MVT::f64 && "Unexpected split"); SDValue Lo, Hi; Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]); Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]); if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout())) std::swap(Lo, Hi); Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi); } else { // FP split into integer parts (soft fp) assert(ValueVT.isFloatingPoint() && PartVT.isInteger() && !PartVT.isVector() && "Unexpected split"); EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V); } } // There is now one part, held in Val. Correct it to match ValueVT. // PartEVT is the type of the register class that holds the value. // ValueVT is the type of the inline asm operation. EVT PartEVT = Val.getValueType(); if (PartEVT == ValueVT) return Val; if (PartEVT.isInteger() && ValueVT.isFloatingPoint() && ValueVT.bitsLT(PartEVT)) { // For an FP value in an integer part, we need to truncate to the right // width first. PartEVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); Val = DAG.getNode(ISD::TRUNCATE, DL, PartEVT, Val); } // Handle types that have the same size. if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits()) return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); // Handle types with different sizes. if (PartEVT.isInteger() && ValueVT.isInteger()) { if (ValueVT.bitsLT(PartEVT)) { // For a truncate, see if we have any information to // indicate whether the truncated bits will always be // zero or sign-extension. if (AssertOp.hasValue()) Val = DAG.getNode(*AssertOp, DL, PartEVT, Val, DAG.getValueType(ValueVT)); return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); } return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val); } if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { // FP_ROUND's are always exact here. if (ValueVT.bitsLT(Val.getValueType())) return DAG.getNode( ISD::FP_ROUND, DL, ValueVT, Val, DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout()))); return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val); } llvm_unreachable("Unknown mismatch!"); } static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V, const Twine &ErrMsg) { const Instruction *I = dyn_cast_or_null(V); if (!V) return Ctx.emitError(ErrMsg); const char *AsmError = ", possible invalid constraint for vector type"; if (const CallInst *CI = dyn_cast(I)) if (isa(CI->getCalledValue())) return Ctx.emitError(I, ErrMsg + AsmError); return Ctx.emitError(I, ErrMsg); } /// getCopyFromPartsVector - Create a value that contains the specified legal /// parts combined into the value they represent. If the parts combine to a /// type larger than ValueVT then AssertOp can be used to specify whether the /// extra bits are known to be zero (ISD::AssertZext) or sign extended from /// ValueVT (ISD::AssertSext). static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V) { assert(ValueVT.isVector() && "Not a vector value"); assert(NumParts > 0 && "No parts to assemble!"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Val = Parts[0]; // Handle a multi-element vector. if (NumParts > 1) { EVT IntermediateVT; MVT RegisterVT; unsigned NumIntermediates; unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates, RegisterVT); assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); NumParts = NumRegs; // Silence a compiler warning. assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); assert(RegisterVT.getSizeInBits() == Parts[0].getSimpleValueType().getSizeInBits() && "Part type sizes don't match!"); // Assemble the parts into intermediate operands. SmallVector Ops(NumIntermediates); if (NumIntermediates == NumParts) { // If the register was not expanded, truncate or copy the value, // as appropriate. for (unsigned i = 0; i != NumParts; ++i) Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1, PartVT, IntermediateVT, V); } else if (NumParts > 0) { // If the intermediate type was expanded, build the intermediate // operands from the parts. assert(NumParts % NumIntermediates == 0 && "Must expand into a divisible number of parts!"); unsigned Factor = NumParts / NumIntermediates; for (unsigned i = 0; i != NumIntermediates; ++i) Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor, PartVT, IntermediateVT, V); } // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the // intermediate operands. Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, DL, ValueVT, Ops); } // There is now one part, held in Val. Correct it to match ValueVT. EVT PartEVT = Val.getValueType(); if (PartEVT == ValueVT) return Val; if (PartEVT.isVector()) { // If the element type of the source/dest vectors are the same, but the // parts vector has more elements than the value vector, then we have a // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the // elements we want. if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) { assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() && "Cannot narrow, it would be a lossy transformation"); return DAG.getNode( ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); } // Vector/Vector bitcast. if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() && "Cannot handle this kind of promotion"); // Promoted vector extract return DAG.getAnyExtOrTrunc(Val, DL, ValueVT); } // Trivial bitcast if the types are the same size and the destination // vector type is legal. if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits() && TLI.isTypeLegal(ValueVT)) return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); // Handle cases such as i8 -> <1 x i1> if (ValueVT.getVectorNumElements() != 1) { diagnosePossiblyInvalidConstraint(*DAG.getContext(), V, "non-trivial scalar-to-vector conversion"); return DAG.getUNDEF(ValueVT); } if (ValueVT.getVectorNumElements() == 1 && ValueVT.getVectorElementType() != PartEVT) Val = DAG.getAnyExtOrTrunc(Val, DL, ValueVT.getScalarType()); return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val); } static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, const Value *V); /// getCopyToParts - Create a series of nodes that contain the specified value /// split into legal parts. If the parts contain more bits than Val, then, for /// integers, ExtendKind can be used to specify how to generate the extra bits. static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, const Value *V, ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { EVT ValueVT = Val.getValueType(); // Handle the vector case separately. if (ValueVT.isVector()) return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V); unsigned PartBits = PartVT.getSizeInBits(); unsigned OrigNumParts = NumParts; assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) && "Copying to an illegal type!"); if (NumParts == 0) return; assert(!ValueVT.isVector() && "Vector case handled elsewhere"); EVT PartEVT = PartVT; if (PartEVT == ValueVT) { assert(NumParts == 1 && "No-op copy with multiple parts!"); Parts[0] = Val; return; } if (NumParts * PartBits > ValueVT.getSizeInBits()) { // If the parts cover more bits than the value has, promote the value. if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { assert(NumParts == 1 && "Do not know what to promote to!"); Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val); } else { if (ValueVT.isFloatingPoint()) { // FP values need to be bitcast, then extended if they are being put // into a larger container. ValueVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); } assert((PartVT.isInteger() || PartVT == MVT::x86mmx) && ValueVT.isInteger() && "Unknown mismatch!"); ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Val = DAG.getNode(ExtendKind, DL, ValueVT, Val); if (PartVT == MVT::x86mmx) Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); } } else if (PartBits == ValueVT.getSizeInBits()) { // Different types of the same size. assert(NumParts == 1 && PartEVT != ValueVT); Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); } else if (NumParts * PartBits < ValueVT.getSizeInBits()) { // If the parts cover less bits than value has, truncate the value. assert((PartVT.isInteger() || PartVT == MVT::x86mmx) && ValueVT.isInteger() && "Unknown mismatch!"); ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); if (PartVT == MVT::x86mmx) Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); } // The value may have changed - recompute ValueVT. ValueVT = Val.getValueType(); assert(NumParts * PartBits == ValueVT.getSizeInBits() && "Failed to tile the value with PartVT!"); if (NumParts == 1) { if (PartEVT != ValueVT) { diagnosePossiblyInvalidConstraint(*DAG.getContext(), V, "scalar-to-vector conversion failed"); Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); } Parts[0] = Val; return; } // Expand the value into multiple parts. if (NumParts & (NumParts - 1)) { // The number of parts is not a power of 2. Split off and copy the tail. assert(PartVT.isInteger() && ValueVT.isInteger() && "Do not know what to expand to!"); unsigned RoundParts = 1 << Log2_32(NumParts); unsigned RoundBits = RoundParts * PartBits; unsigned OddParts = NumParts - RoundParts; SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val, DAG.getIntPtrConstant(RoundBits, DL)); getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V); if (DAG.getDataLayout().isBigEndian()) // The odd parts were reversed by getCopyToParts - unreverse them. std::reverse(Parts + RoundParts, Parts + NumParts); NumParts = RoundParts; ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); } // The number of parts is a power of 2. Repeatedly bisect the value using // EXTRACT_ELEMENT. Parts[0] = DAG.getNode(ISD::BITCAST, DL, EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()), Val); for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) { for (unsigned i = 0; i < NumParts; i += StepSize) { unsigned ThisBits = StepSize * PartBits / 2; EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits); SDValue &Part0 = Parts[i]; SDValue &Part1 = Parts[i+StepSize/2]; Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, ThisVT, Part0, DAG.getIntPtrConstant(1, DL)); Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, ThisVT, Part0, DAG.getIntPtrConstant(0, DL)); if (ThisBits == PartBits && ThisVT != PartVT) { Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0); Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1); } } } if (DAG.getDataLayout().isBigEndian()) std::reverse(Parts, Parts + OrigNumParts); } /// getCopyToPartsVector - Create a series of nodes that contain the specified /// value split into legal parts. static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, const Value *V) { EVT ValueVT = Val.getValueType(); assert(ValueVT.isVector() && "Not a vector"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (NumParts == 1) { EVT PartEVT = PartVT; if (PartEVT == ValueVT) { // Nothing to do. } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) { // Bitconvert vector->vector case. Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); } else if (PartVT.isVector() && PartEVT.getVectorElementType() == ValueVT.getVectorElementType() && PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements()) { EVT ElementVT = PartVT.getVectorElementType(); // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in // undef elements. SmallVector Ops; for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i) Ops.push_back(DAG.getNode( ISD::EXTRACT_VECTOR_ELT, DL, ElementVT, Val, DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())))); for (unsigned i = ValueVT.getVectorNumElements(), e = PartVT.getVectorNumElements(); i != e; ++i) Ops.push_back(DAG.getUNDEF(ElementVT)); Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, Ops); // FIXME: Use CONCAT for 2x -> 4x. //SDValue UndefElts = DAG.getUNDEF(VectorTy); //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts); } else if (PartVT.isVector() && PartEVT.getVectorElementType().bitsGE( ValueVT.getVectorElementType()) && PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) { // Promoted vector extract Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); } else{ // Vector -> scalar conversion. assert(ValueVT.getVectorNumElements() == 1 && "Only trivial vector-to-scalar conversions should get here!"); Val = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); } Parts[0] = Val; return; } // Handle a multi-element vector. EVT IntermediateVT; MVT RegisterVT; unsigned NumIntermediates; unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates, RegisterVT); unsigned NumElements = ValueVT.getVectorNumElements(); assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); NumParts = NumRegs; // Silence a compiler warning. assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); // Split the vector into intermediate operands. SmallVector Ops(NumIntermediates); for (unsigned i = 0; i != NumIntermediates; ++i) { if (IntermediateVT.isVector()) Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val, DAG.getConstant(i * (NumElements / NumIntermediates), DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); else Ops[i] = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val, DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); } // Split the intermediate operands into legal parts. if (NumParts == NumIntermediates) { // If the register was not expanded, promote or copy the value, // as appropriate. for (unsigned i = 0; i != NumParts; ++i) getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V); } else if (NumParts > 0) { // If the intermediate type was expanded, split each the value into // legal parts. assert(NumIntermediates != 0 && "division by zero"); assert(NumParts % NumIntermediates == 0 && "Must expand into a divisible number of parts!"); unsigned Factor = NumParts / NumIntermediates; for (unsigned i = 0; i != NumIntermediates; ++i) getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT, V); } } RegsForValue::RegsForValue() {} RegsForValue::RegsForValue(const SmallVector ®s, MVT regvt, EVT valuevt) : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI, const DataLayout &DL, unsigned Reg, Type *Ty) { ComputeValueVTs(TLI, DL, Ty, ValueVTs); for (EVT ValueVT : ValueVTs) { unsigned NumRegs = TLI.getNumRegisters(Context, ValueVT); MVT RegisterVT = TLI.getRegisterType(Context, ValueVT); for (unsigned i = 0; i != NumRegs; ++i) Regs.push_back(Reg + i); RegVTs.push_back(RegisterVT); Reg += NumRegs; } } /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from /// this value and returns the result as a ValueVT value. This uses /// Chain/Flag as the input and updates them for the output Chain/Flag. /// If the Flag pointer is NULL, no flag is used. SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, const SDLoc &dl, SDValue &Chain, SDValue *Flag, const Value *V) const { // A Value with type {} or [0 x %t] needs no registers. if (ValueVTs.empty()) return SDValue(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // Assemble the legal parts into the final values. SmallVector Values(ValueVTs.size()); SmallVector Parts; for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { // Copy the legal parts from the registers. EVT ValueVT = ValueVTs[Value]; unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVT); MVT RegisterVT = RegVTs[Value]; Parts.resize(NumRegs); for (unsigned i = 0; i != NumRegs; ++i) { SDValue P; if (!Flag) { P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT); } else { P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag); *Flag = P.getValue(2); } Chain = P.getValue(1); Parts[i] = P; // If the source register was virtual and if we know something about it, // add an assert node. if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) || !RegisterVT.isInteger() || RegisterVT.isVector()) continue; const FunctionLoweringInfo::LiveOutInfo *LOI = FuncInfo.GetLiveOutRegInfo(Regs[Part+i]); if (!LOI) continue; unsigned RegSize = RegisterVT.getSizeInBits(); unsigned NumSignBits = LOI->NumSignBits; unsigned NumZeroBits = LOI->KnownZero.countLeadingOnes(); if (NumZeroBits == RegSize) { // The current value is a zero. // Explicitly express that as it would be easier for // optimizations to kick in. Parts[i] = DAG.getConstant(0, dl, RegisterVT); continue; } // FIXME: We capture more information than the dag can represent. For // now, just use the tightest assertzext/assertsext possible. bool isSExt = true; EVT FromVT(MVT::Other); if (NumSignBits == RegSize) { isSExt = true; // ASSERT SEXT 1 FromVT = MVT::i1; } else if (NumZeroBits >= RegSize - 1) { isSExt = false; // ASSERT ZEXT 1 FromVT = MVT::i1; } else if (NumSignBits > RegSize - 8) { isSExt = true; // ASSERT SEXT 8 FromVT = MVT::i8; } else if (NumZeroBits >= RegSize - 8) { isSExt = false; // ASSERT ZEXT 8 FromVT = MVT::i8; } else if (NumSignBits > RegSize - 16) { isSExt = true; // ASSERT SEXT 16 FromVT = MVT::i16; } else if (NumZeroBits >= RegSize - 16) { isSExt = false; // ASSERT ZEXT 16 FromVT = MVT::i16; } else if (NumSignBits > RegSize - 32) { isSExt = true; // ASSERT SEXT 32 FromVT = MVT::i32; } else if (NumZeroBits >= RegSize - 32) { isSExt = false; // ASSERT ZEXT 32 FromVT = MVT::i32; } else { continue; } // Add an assertion node. assert(FromVT != MVT::Other); Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl, RegisterVT, P, DAG.getValueType(FromVT)); } Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), NumRegs, RegisterVT, ValueVT, V); Part += NumRegs; Parts.clear(); } return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values); } /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the /// specified value into the registers specified by this object. This uses /// Chain/Flag as the input and updates them for the output Chain/Flag. /// If the Flag pointer is NULL, no flag is used. void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, const SDLoc &dl, SDValue &Chain, SDValue *Flag, const Value *V, ISD::NodeType PreferredExtendType) const { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); ISD::NodeType ExtendKind = PreferredExtendType; // Get the list of the values's legal parts. unsigned NumRegs = Regs.size(); SmallVector Parts(NumRegs); for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { EVT ValueVT = ValueVTs[Value]; unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT); MVT RegisterVT = RegVTs[Value]; if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT)) ExtendKind = ISD::ZERO_EXTEND; getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), &Parts[Part], NumParts, RegisterVT, V, ExtendKind); Part += NumParts; } // Copy the parts into the registers. SmallVector Chains(NumRegs); for (unsigned i = 0; i != NumRegs; ++i) { SDValue Part; if (!Flag) { Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]); } else { Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag); *Flag = Part.getValue(1); } Chains[i] = Part.getValue(0); } if (NumRegs == 1 || Flag) // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is // flagged to it. That is the CopyToReg nodes and the user are considered // a single scheduling unit. If we create a TokenFactor and return it as // chain, then the TokenFactor is both a predecessor (operand) of the // user as well as a successor (the TF operands are flagged to the user). // c1, f1 = CopyToReg // c2, f2 = CopyToReg // c3 = TokenFactor c1, c2 // ... // = op c3, ..., f2 Chain = Chains[NumRegs-1]; else Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); } /// AddInlineAsmOperands - Add this value to the specified inlineasm node /// operand list. This adds the code marker and includes the number of /// values added into it. void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, unsigned MatchingIdx, const SDLoc &dl, SelectionDAG &DAG, std::vector &Ops) const { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size()); if (HasMatching) Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx); else if (!Regs.empty() && TargetRegisterInfo::isVirtualRegister(Regs.front())) { // Put the register class of the virtual registers in the flag word. That // way, later passes can recompute register class constraints for inline // assembly as well as normal instructions. // Don't do this for tied operands that can use the regclass information // from the def. const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); const TargetRegisterClass *RC = MRI.getRegClass(Regs.front()); Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID()); } SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32); Ops.push_back(Res); unsigned SP = TLI.getStackPointerRegisterToSaveRestore(); for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]); MVT RegisterVT = RegVTs[Value]; for (unsigned i = 0; i != NumRegs; ++i) { assert(Reg < Regs.size() && "Mismatch in # registers expected"); unsigned TheReg = Regs[Reg++]; Ops.push_back(DAG.getRegister(TheReg, RegisterVT)); if (TheReg == SP && Code == InlineAsm::Kind_Clobber) { // If we clobbered the stack pointer, MFI should know about it. assert(DAG.getMachineFunction().getFrameInfo()-> hasOpaqueSPAdjustment()); } } } } void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa, const TargetLibraryInfo *li) { AA = &aa; GFI = gfi; LibInfo = li; DL = &DAG.getDataLayout(); Context = DAG.getContext(); LPadToCallSiteMap.clear(); } /// clear - Clear out the current SelectionDAG and the associated /// state and prepare this SelectionDAGBuilder object to be used /// for a new block. This doesn't clear out information about /// additional blocks that are needed to complete switch lowering /// or PHI node updating; that information is cleared out as it is /// consumed. void SelectionDAGBuilder::clear() { NodeMap.clear(); UnusedArgNodeMap.clear(); PendingLoads.clear(); PendingExports.clear(); CurInst = nullptr; HasTailCall = false; SDNodeOrder = LowestSDNodeOrder; StatepointLowering.clear(); } /// clearDanglingDebugInfo - Clear the dangling debug information /// map. This function is separated from the clear so that debug /// information that is dangling in a basic block can be properly /// resolved in a different basic block. This allows the /// SelectionDAG to resolve dangling debug information attached /// to PHI nodes. void SelectionDAGBuilder::clearDanglingDebugInfo() { DanglingDebugInfoMap.clear(); } /// getRoot - Return the current virtual root of the Selection DAG, /// flushing any PendingLoad items. This must be done before emitting /// a store or any other node that may need to be ordered after any /// prior load instructions. /// SDValue SelectionDAGBuilder::getRoot() { if (PendingLoads.empty()) return DAG.getRoot(); if (PendingLoads.size() == 1) { SDValue Root = PendingLoads[0]; DAG.setRoot(Root); PendingLoads.clear(); return Root; } // Otherwise, we have to make a token factor node. SDValue Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, PendingLoads); PendingLoads.clear(); DAG.setRoot(Root); return Root; } /// getControlRoot - Similar to getRoot, but instead of flushing all the /// PendingLoad items, flush all the PendingExports items. It is necessary /// to do this before emitting a terminator instruction. /// SDValue SelectionDAGBuilder::getControlRoot() { SDValue Root = DAG.getRoot(); if (PendingExports.empty()) return Root; // Turn all of the CopyToReg chains into one factored node. if (Root.getOpcode() != ISD::EntryToken) { unsigned i = 0, e = PendingExports.size(); for (; i != e; ++i) { assert(PendingExports[i].getNode()->getNumOperands() > 1); if (PendingExports[i].getNode()->getOperand(0) == Root) break; // Don't add the root if we already indirectly depend on it. } if (i == e) PendingExports.push_back(Root); } Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, PendingExports); PendingExports.clear(); DAG.setRoot(Root); return Root; } /// Copy swift error to the final virtual register at end of a basic block, as /// specified by SwiftErrorWorklist, if necessary. static void copySwiftErrorsToFinalVRegs(SelectionDAGBuilder &SDB) { const TargetLowering &TLI = SDB.DAG.getTargetLoweringInfo(); if (!TLI.supportSwiftError()) return; if (!SDB.FuncInfo.SwiftErrorWorklist.count(SDB.FuncInfo.MBB)) return; // Go through entries in SwiftErrorWorklist, and create copy as necessary. FunctionLoweringInfo::SwiftErrorVRegs &WorklistEntry = SDB.FuncInfo.SwiftErrorWorklist[SDB.FuncInfo.MBB]; FunctionLoweringInfo::SwiftErrorVRegs &MapEntry = SDB.FuncInfo.SwiftErrorMap[SDB.FuncInfo.MBB]; for (unsigned I = 0, E = WorklistEntry.size(); I < E; I++) { unsigned WorkReg = WorklistEntry[I]; // Find the swifterror virtual register for the value in SwiftErrorMap. unsigned MapReg = MapEntry[I]; assert(TargetRegisterInfo::isVirtualRegister(MapReg) && "Entries in SwiftErrorMap should be virtual registers"); if (WorkReg == MapReg) continue; // Create copy from SwiftErrorMap to SwiftWorklist. auto &DL = SDB.DAG.getDataLayout(); SDValue CopyNode = SDB.DAG.getCopyToReg( SDB.getRoot(), SDB.getCurSDLoc(), WorkReg, SDB.DAG.getRegister(MapReg, EVT(TLI.getPointerTy(DL)))); MapEntry[I] = WorkReg; SDB.DAG.setRoot(CopyNode); } } void SelectionDAGBuilder::visit(const Instruction &I) { // Set up outgoing PHI node register values before emitting the terminator. if (isa(&I)) { copySwiftErrorsToFinalVRegs(*this); HandlePHINodesInSuccessorBlocks(I.getParent()); } ++SDNodeOrder; CurInst = &I; visit(I.getOpcode(), I); if (!isa(&I) && !HasTailCall && !isStatepoint(&I)) // statepoints handle their exports internally CopyToExportRegsIfNeeded(&I); CurInst = nullptr; } void SelectionDAGBuilder::visitPHI(const PHINode &) { llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!"); } void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) { // Note: this doesn't use InstVisitor, because it has to work with // ConstantExpr's in addition to instructions. switch (Opcode) { default: llvm_unreachable("Unknown instruction type encountered!"); // Build the switch statement using the Instruction.def file. #define HANDLE_INST(NUM, OPCODE, CLASS) \ case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break; #include "llvm/IR/Instruction.def" } } // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V, // generate the debug data structures now that we've seen its definition. void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, SDValue Val) { DanglingDebugInfo &DDI = DanglingDebugInfoMap[V]; if (DDI.getDI()) { const DbgValueInst *DI = DDI.getDI(); DebugLoc dl = DDI.getdl(); unsigned DbgSDNodeOrder = DDI.getSDNodeOrder(); DILocalVariable *Variable = DI->getVariable(); DIExpression *Expr = DI->getExpression(); assert(Variable->isValidLocationForIntrinsic(dl) && "Expected inlined-at fields to agree"); uint64_t Offset = DI->getOffset(); SDDbgValue *SDV; if (Val.getNode()) { if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, Offset, false, Val)) { SDV = DAG.getDbgValue(Variable, Expr, Val.getNode(), Val.getResNo(), false, Offset, dl, DbgSDNodeOrder); DAG.AddDbgValue(SDV, Val.getNode(), false); } } else DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); DanglingDebugInfoMap[V] = DanglingDebugInfo(); } } /// getCopyFromRegs - If there was virtual register allocated for the value V /// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise. SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) { DenseMap::iterator It = FuncInfo.ValueMap.find(V); SDValue Result; if (It != FuncInfo.ValueMap.end()) { unsigned InReg = It->second; RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), DAG.getDataLayout(), InReg, Ty); SDValue Chain = DAG.getEntryNode(); Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); resolveDanglingDebugInfo(V, Result); } return Result; } /// getValue - Return an SDValue for the given Value. SDValue SelectionDAGBuilder::getValue(const Value *V) { // If we already have an SDValue for this value, use it. It's important // to do this first, so that we don't create a CopyFromReg if we already // have a regular SDValue. SDValue &N = NodeMap[V]; if (N.getNode()) return N; // If there's a virtual register allocated and initialized for this // value, use it. if (SDValue copyFromReg = getCopyFromRegs(V, V->getType())) return copyFromReg; // Otherwise create a new SDValue and remember it. SDValue Val = getValueImpl(V); NodeMap[V] = Val; resolveDanglingDebugInfo(V, Val); return Val; } // Return true if SDValue exists for the given Value bool SelectionDAGBuilder::findValue(const Value *V) const { return (NodeMap.find(V) != NodeMap.end()) || (FuncInfo.ValueMap.find(V) != FuncInfo.ValueMap.end()); } /// getNonRegisterValue - Return an SDValue for the given Value, but /// don't look in FuncInfo.ValueMap for a virtual register. SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) { // If we already have an SDValue for this value, use it. SDValue &N = NodeMap[V]; if (N.getNode()) { if (isa(N) || isa(N)) { // Remove the debug location from the node as the node is about to be used // in a location which may differ from the original debug location. This // is relevant to Constant and ConstantFP nodes because they can appear // as constant expressions inside PHI nodes. N->setDebugLoc(DebugLoc()); } return N; } // Otherwise create a new SDValue and remember it. SDValue Val = getValueImpl(V); NodeMap[V] = Val; resolveDanglingDebugInfo(V, Val); return Val; } /// getValueImpl - Helper function for getValue and getNonRegisterValue. /// Create an SDValue for the given value. SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (const Constant *C = dyn_cast(V)) { EVT VT = TLI.getValueType(DAG.getDataLayout(), V->getType(), true); if (const ConstantInt *CI = dyn_cast(C)) return DAG.getConstant(*CI, getCurSDLoc(), VT); if (const GlobalValue *GV = dyn_cast(C)) return DAG.getGlobalAddress(GV, getCurSDLoc(), VT); if (isa(C)) { unsigned AS = V->getType()->getPointerAddressSpace(); return DAG.getConstant(0, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout(), AS)); } if (const ConstantFP *CFP = dyn_cast(C)) return DAG.getConstantFP(*CFP, getCurSDLoc(), VT); if (isa(C) && !V->getType()->isAggregateType()) return DAG.getUNDEF(VT); if (const ConstantExpr *CE = dyn_cast(C)) { visit(CE->getOpcode(), *CE); SDValue N1 = NodeMap[V]; assert(N1.getNode() && "visit didn't populate the NodeMap!"); return N1; } if (isa(C) || isa(C)) { SmallVector Constants; for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end(); OI != OE; ++OI) { SDNode *Val = getValue(*OI).getNode(); // If the operand is an empty aggregate, there are no values. if (!Val) continue; // Add each leaf value from the operand to the Constants list // to form a flattened list of all the values. for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i) Constants.push_back(SDValue(Val, i)); } return DAG.getMergeValues(Constants, getCurSDLoc()); } if (const ConstantDataSequential *CDS = dyn_cast(C)) { SmallVector Ops; for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode(); // Add each leaf value from the operand to the Constants list // to form a flattened list of all the values. for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i) Ops.push_back(SDValue(Val, i)); } if (isa(CDS->getType())) return DAG.getMergeValues(Ops, getCurSDLoc()); return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, Ops); } if (C->getType()->isStructTy() || C->getType()->isArrayTy()) { assert((isa(C) || isa(C)) && "Unknown struct or array constant!"); SmallVector ValueVTs; ComputeValueVTs(TLI, DAG.getDataLayout(), C->getType(), ValueVTs); unsigned NumElts = ValueVTs.size(); if (NumElts == 0) return SDValue(); // empty struct SmallVector Constants(NumElts); for (unsigned i = 0; i != NumElts; ++i) { EVT EltVT = ValueVTs[i]; if (isa(C)) Constants[i] = DAG.getUNDEF(EltVT); else if (EltVT.isFloatingPoint()) Constants[i] = DAG.getConstantFP(0, getCurSDLoc(), EltVT); else Constants[i] = DAG.getConstant(0, getCurSDLoc(), EltVT); } return DAG.getMergeValues(Constants, getCurSDLoc()); } if (const BlockAddress *BA = dyn_cast(C)) return DAG.getBlockAddress(BA, VT); VectorType *VecTy = cast(V->getType()); unsigned NumElements = VecTy->getNumElements(); // Now that we know the number and type of the elements, get that number of // elements into the Ops array based on what kind of constant it is. SmallVector Ops; if (const ConstantVector *CV = dyn_cast(C)) { for (unsigned i = 0; i != NumElements; ++i) Ops.push_back(getValue(CV->getOperand(i))); } else { assert(isa(C) && "Unknown vector constant!"); EVT EltVT = TLI.getValueType(DAG.getDataLayout(), VecTy->getElementType()); SDValue Op; if (EltVT.isFloatingPoint()) Op = DAG.getConstantFP(0, getCurSDLoc(), EltVT); else Op = DAG.getConstant(0, getCurSDLoc(), EltVT); Ops.assign(NumElements, Op); } // Create a BUILD_VECTOR node. return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, Ops); } // If this is a static alloca, generate it as the frameindex instead of // computation. if (const AllocaInst *AI = dyn_cast(V)) { DenseMap::iterator SI = FuncInfo.StaticAllocaMap.find(AI); if (SI != FuncInfo.StaticAllocaMap.end()) return DAG.getFrameIndex(SI->second, TLI.getPointerTy(DAG.getDataLayout())); } // If this is an instruction which fast-isel has deferred, select it now. if (const Instruction *Inst = dyn_cast(V)) { unsigned InReg = FuncInfo.InitializeRegForValue(Inst); RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg, Inst->getType()); SDValue Chain = DAG.getEntryNode(); return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); } llvm_unreachable("Can't get register for value!"); } void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) { auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX; bool IsCoreCLR = Pers == EHPersonality::CoreCLR; MachineBasicBlock *CatchPadMBB = FuncInfo.MBB; // In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues. if (IsMSVCCXX || IsCoreCLR) CatchPadMBB->setIsEHFuncletEntry(); DAG.setRoot(DAG.getNode(ISD::CATCHPAD, getCurSDLoc(), MVT::Other, getControlRoot())); } void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) { // Update machine-CFG edge. MachineBasicBlock *TargetMBB = FuncInfo.MBBMap[I.getSuccessor()]; FuncInfo.MBB->addSuccessor(TargetMBB); auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); bool IsSEH = isAsynchronousEHPersonality(Pers); if (IsSEH) { // If this is not a fall-through branch or optimizations are switched off, // emit the branch. if (TargetMBB != NextBlock(FuncInfo.MBB) || TM.getOptLevel() == CodeGenOpt::None) DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(), DAG.getBasicBlock(TargetMBB))); return; } // Figure out the funclet membership for the catchret's successor. // This will be used by the FuncletLayout pass to determine how to order the // BB's. // A 'catchret' returns to the outer scope's color. Value *ParentPad = I.getCatchSwitchParentPad(); const BasicBlock *SuccessorColor; if (isa(ParentPad)) SuccessorColor = &FuncInfo.Fn->getEntryBlock(); else SuccessorColor = cast(ParentPad)->getParent(); assert(SuccessorColor && "No parent funclet for catchret!"); MachineBasicBlock *SuccessorColorMBB = FuncInfo.MBBMap[SuccessorColor]; assert(SuccessorColorMBB && "No MBB for SuccessorColor!"); // Create the terminator node. SDValue Ret = DAG.getNode(ISD::CATCHRET, getCurSDLoc(), MVT::Other, getControlRoot(), DAG.getBasicBlock(TargetMBB), DAG.getBasicBlock(SuccessorColorMBB)); DAG.setRoot(Ret); } void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) { // Don't emit any special code for the cleanuppad instruction. It just marks // the start of a funclet. FuncInfo.MBB->setIsEHFuncletEntry(); FuncInfo.MBB->setIsCleanupFuncletEntry(); } /// When an invoke or a cleanupret unwinds to the next EH pad, there are /// many places it could ultimately go. In the IR, we have a single unwind /// destination, but in the machine CFG, we enumerate all the possible blocks. /// This function skips over imaginary basic blocks that hold catchswitch /// instructions, and finds all the "real" machine /// basic block destinations. As those destinations may not be successors of /// EHPadBB, here we also calculate the edge probability to those destinations. /// The passed-in Prob is the edge probability to EHPadBB. static void findUnwindDestinations( FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB, BranchProbability Prob, SmallVectorImpl> &UnwindDests) { EHPersonality Personality = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX; bool IsCoreCLR = Personality == EHPersonality::CoreCLR; while (EHPadBB) { const Instruction *Pad = EHPadBB->getFirstNonPHI(); BasicBlock *NewEHPadBB = nullptr; if (isa(Pad)) { // Stop on landingpads. They are not funclets. UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob); break; } else if (isa(Pad)) { // Stop on cleanup pads. Cleanups are always funclet entries for all known // personalities. UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob); UnwindDests.back().first->setIsEHFuncletEntry(); break; } else if (auto *CatchSwitch = dyn_cast(Pad)) { // Add the catchpad handlers to the possible destinations. for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) { UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob); // For MSVC++ and the CLR, catchblocks are funclets and need prologues. if (IsMSVCCXX || IsCoreCLR) UnwindDests.back().first->setIsEHFuncletEntry(); } NewEHPadBB = CatchSwitch->getUnwindDest(); } else { continue; } BranchProbabilityInfo *BPI = FuncInfo.BPI; if (BPI && NewEHPadBB) Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB); EHPadBB = NewEHPadBB; } } void SelectionDAGBuilder::visitCleanupRet(const CleanupReturnInst &I) { // Update successor info. SmallVector, 1> UnwindDests; auto UnwindDest = I.getUnwindDest(); BranchProbabilityInfo *BPI = FuncInfo.BPI; BranchProbability UnwindDestProb = (BPI && UnwindDest) ? BPI->getEdgeProbability(FuncInfo.MBB->getBasicBlock(), UnwindDest) : BranchProbability::getZero(); findUnwindDestinations(FuncInfo, UnwindDest, UnwindDestProb, UnwindDests); for (auto &UnwindDest : UnwindDests) { UnwindDest.first->setIsEHPad(); addSuccessorWithProb(FuncInfo.MBB, UnwindDest.first, UnwindDest.second); } FuncInfo.MBB->normalizeSuccProbs(); // Create the terminator node. SDValue Ret = DAG.getNode(ISD::CLEANUPRET, getCurSDLoc(), MVT::Other, getControlRoot()); DAG.setRoot(Ret); } void SelectionDAGBuilder::visitCatchSwitch(const CatchSwitchInst &CSI) { report_fatal_error("visitCatchSwitch not yet implemented!"); } void SelectionDAGBuilder::visitRet(const ReturnInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); auto &DL = DAG.getDataLayout(); SDValue Chain = getControlRoot(); SmallVector Outs; SmallVector OutVals; // Calls to @llvm.experimental.deoptimize don't generate a return value, so // lower // // %val = call @llvm.experimental.deoptimize() // ret %val // // differently. if (I.getParent()->getTerminatingDeoptimizeCall()) { LowerDeoptimizingReturn(); return; } if (!FuncInfo.CanLowerReturn) { unsigned DemoteReg = FuncInfo.DemoteRegister; const Function *F = I.getParent()->getParent(); // Emit a store of the return value through the virtual register. // Leave Outs empty so that LowerReturn won't try to load return // registers the usual way. SmallVector PtrValueVTs; ComputeValueVTs(TLI, DL, PointerType::getUnqual(F->getReturnType()), PtrValueVTs); SDValue RetPtr = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), DemoteReg, PtrValueVTs[0]); SDValue RetOp = getValue(I.getOperand(0)); SmallVector ValueVTs; SmallVector Offsets; ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); // An aggregate return value cannot wrap around the address space, so // offsets to its parts don't wrap either. SDNodeFlags Flags; Flags.setNoUnsignedWrap(true); SmallVector Chains(NumValues); for (unsigned i = 0; i != NumValues; ++i) { SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), RetPtr.getValueType(), RetPtr, DAG.getIntPtrConstant(Offsets[i], getCurSDLoc()), &Flags); Chains[i] = DAG.getStore(Chain, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + i), // FIXME: better loc info would be nice. Add, MachinePointerInfo()); } Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, Chains); } else if (I.getNumOperands() != 0) { SmallVector ValueVTs; ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); if (NumValues) { SDValue RetOp = getValue(I.getOperand(0)); const Function *F = I.getParent()->getParent(); ISD::NodeType ExtendKind = ISD::ANY_EXTEND; if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) ExtendKind = ISD::SIGN_EXTEND; else if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt)) ExtendKind = ISD::ZERO_EXTEND; LLVMContext &Context = F->getContext(); bool RetInReg = F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, Attribute::InReg); for (unsigned j = 0; j != NumValues; ++j) { EVT VT = ValueVTs[j]; if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind); unsigned NumParts = TLI.getNumRegisters(Context, VT); MVT PartVT = TLI.getRegisterType(Context, VT); SmallVector Parts(NumParts); getCopyToParts(DAG, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + j), &Parts[0], NumParts, PartVT, &I, ExtendKind); // 'inreg' on function refers to return value ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); if (RetInReg) Flags.setInReg(); // Propagate extension type if any if (ExtendKind == ISD::SIGN_EXTEND) Flags.setSExt(); else if (ExtendKind == ISD::ZERO_EXTEND) Flags.setZExt(); for (unsigned i = 0; i < NumParts; ++i) { Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(), VT, /*isfixed=*/true, 0, 0)); OutVals.push_back(Parts[i]); } } } } // Push in swifterror virtual register as the last element of Outs. This makes // sure swifterror virtual register will be returned in the swifterror // physical register. const Function *F = I.getParent()->getParent(); if (TLI.supportSwiftError() && F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) { ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); Flags.setSwiftError(); Outs.push_back(ISD::OutputArg(Flags, EVT(TLI.getPointerTy(DL)) /*vt*/, EVT(TLI.getPointerTy(DL)) /*argvt*/, true /*isfixed*/, 1 /*origidx*/, 0 /*partOffs*/)); // Create SDNode for the swifterror virtual register. OutVals.push_back(DAG.getRegister(FuncInfo.SwiftErrorMap[FuncInfo.MBB][0], EVT(TLI.getPointerTy(DL)))); } bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); CallingConv::ID CallConv = DAG.getMachineFunction().getFunction()->getCallingConv(); Chain = DAG.getTargetLoweringInfo().LowerReturn( Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG); // Verify that the target's LowerReturn behaved as expected. assert(Chain.getNode() && Chain.getValueType() == MVT::Other && "LowerReturn didn't return a valid chain!"); // Update the DAG with the new chain value resulting from return lowering. DAG.setRoot(Chain); } /// CopyToExportRegsIfNeeded - If the given value has virtual registers /// created for it, emit nodes to copy the value into the virtual /// registers. void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) { // Skip empty types if (V->getType()->isEmptyTy()) return; DenseMap::iterator VMI = FuncInfo.ValueMap.find(V); if (VMI != FuncInfo.ValueMap.end()) { assert(!V->use_empty() && "Unused value assigned virtual registers!"); CopyValueToVirtualRegister(V, VMI->second); } } /// ExportFromCurrentBlock - If this condition isn't known to be exported from /// the current basic block, add it to ValueMap now so that we'll get a /// CopyTo/FromReg. void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) { // No need to export constants. if (!isa(V) && !isa(V)) return; // Already exported? if (FuncInfo.isExportedInst(V)) return; unsigned Reg = FuncInfo.InitializeRegForValue(V); CopyValueToVirtualRegister(V, Reg); } bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB) { // The operands of the setcc have to be in this block. We don't know // how to export them from some other block. if (const Instruction *VI = dyn_cast(V)) { // Can export from current BB. if (VI->getParent() == FromBB) return true; // Is already exported, noop. return FuncInfo.isExportedInst(V); } // If this is an argument, we can export it if the BB is the entry block or // if it is already exported. if (isa(V)) { if (FromBB == &FromBB->getParent()->getEntryBlock()) return true; // Otherwise, can only export this if it is already exported. return FuncInfo.isExportedInst(V); } // Otherwise, constants can always be exported. return true; } /// Return branch probability calculated by BranchProbabilityInfo for IR blocks. BranchProbability SelectionDAGBuilder::getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const { BranchProbabilityInfo *BPI = FuncInfo.BPI; const BasicBlock *SrcBB = Src->getBasicBlock(); const BasicBlock *DstBB = Dst->getBasicBlock(); if (!BPI) { // If BPI is not available, set the default probability as 1 / N, where N is // the number of successors. auto SuccSize = std::max( std::distance(succ_begin(SrcBB), succ_end(SrcBB)), 1); return BranchProbability(1, SuccSize); } return BPI->getEdgeProbability(SrcBB, DstBB); } void SelectionDAGBuilder::addSuccessorWithProb(MachineBasicBlock *Src, MachineBasicBlock *Dst, BranchProbability Prob) { if (!FuncInfo.BPI) Src->addSuccessorWithoutProb(Dst); else { if (Prob.isUnknown()) Prob = getEdgeProbability(Src, Dst); Src->addSuccessor(Dst, Prob); } } static bool InBlock(const Value *V, const BasicBlock *BB) { if (const Instruction *I = dyn_cast(V)) return I->getParent() == BB; return true; } /// EmitBranchForMergedCondition - Helper method for FindMergedConditions. /// This function emits a branch and is used at the leaves of an OR or an /// AND operator tree. /// void SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, BranchProbability TProb, BranchProbability FProb) { const BasicBlock *BB = CurBB->getBasicBlock(); // If the leaf of the tree is a comparison, merge the condition into // the caseblock. if (const CmpInst *BOp = dyn_cast(Cond)) { // The operands of the cmp have to be in this block. We don't know // how to export them from some other block. If this is the first block // of the sequence, no exporting is needed. if (CurBB == SwitchBB || (isExportableFromCurrentBlock(BOp->getOperand(0), BB) && isExportableFromCurrentBlock(BOp->getOperand(1), BB))) { ISD::CondCode Condition; if (const ICmpInst *IC = dyn_cast(Cond)) { Condition = getICmpCondCode(IC->getPredicate()); } else { const FCmpInst *FC = cast(Cond); Condition = getFCmpCondCode(FC->getPredicate()); if (TM.Options.NoNaNsFPMath) Condition = getFCmpCodeWithoutNaN(Condition); } CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr, TBB, FBB, CurBB, TProb, FProb); SwitchCases.push_back(CB); return; } } // Create a CaseBlock record representing this branch. CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()), nullptr, TBB, FBB, CurBB, TProb, FProb); SwitchCases.push_back(CB); } /// FindMergedConditions - If Cond is an expression like void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, Instruction::BinaryOps Opc, BranchProbability TProb, BranchProbability FProb) { // If this node is not part of the or/and tree, emit it as a branch. const Instruction *BOp = dyn_cast(Cond); if (!BOp || !(isa(BOp) || isa(BOp)) || (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() || BOp->getParent() != CurBB->getBasicBlock() || !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) || !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) { EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB, TProb, FProb); return; } // Create TmpBB after CurBB. MachineFunction::iterator BBI(CurBB); MachineFunction &MF = DAG.getMachineFunction(); MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock()); CurBB->getParent()->insert(++BBI, TmpBB); if (Opc == Instruction::Or) { // Codegen X | Y as: // BB1: // jmp_if_X TBB // jmp TmpBB // TmpBB: // jmp_if_Y TBB // jmp FBB // // We have flexibility in setting Prob for BB1 and Prob for TmpBB. // The requirement is that // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB) // = TrueProb for original BB. // Assuming the original probabilities are A and B, one choice is to set // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to // A/(1+B) and 2B/(1+B). This choice assumes that // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB. // Another choice is to assume TrueProb for BB1 equals to TrueProb for // TmpBB, but the math is more complicated. auto NewTrueProb = TProb / 2; auto NewFalseProb = TProb / 2 + FProb; // Emit the LHS condition. FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc, NewTrueProb, NewFalseProb); // Normalize A/2 and B to get A/(1+B) and 2B/(1+B). SmallVector Probs{TProb / 2, FProb}; BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); // Emit the RHS condition into TmpBB. FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0], Probs[1]); } else { assert(Opc == Instruction::And && "Unknown merge op!"); // Codegen X & Y as: // BB1: // jmp_if_X TmpBB // jmp FBB // TmpBB: // jmp_if_Y TBB // jmp FBB // // This requires creation of TmpBB after CurBB. // We have flexibility in setting Prob for BB1 and Prob for TmpBB. // The requirement is that // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB) // = FalseProb for original BB. // Assuming the original probabilities are A and B, one choice is to set // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 == // TrueProb for BB1 * FalseProb for TmpBB. auto NewTrueProb = TProb + FProb / 2; auto NewFalseProb = FProb / 2; // Emit the LHS condition. FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc, NewTrueProb, NewFalseProb); // Normalize A and B/2 to get 2A/(1+A) and B/(1+A). SmallVector Probs{TProb, FProb / 2}; BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); // Emit the RHS condition into TmpBB. FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0], Probs[1]); } } /// If the set of cases should be emitted as a series of branches, return true. /// If we should emit this as a bunch of and/or'd together conditions, return /// false. bool SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector &Cases) { if (Cases.size() != 2) return true; // If this is two comparisons of the same values or'd or and'd together, they // will get folded into a single comparison, so don't emit two blocks. if ((Cases[0].CmpLHS == Cases[1].CmpLHS && Cases[0].CmpRHS == Cases[1].CmpRHS) || (Cases[0].CmpRHS == Cases[1].CmpLHS && Cases[0].CmpLHS == Cases[1].CmpRHS)) { return false; } // Handle: (X != null) | (Y != null) --> (X|Y) != 0 // Handle: (X == null) & (Y == null) --> (X|Y) == 0 if (Cases[0].CmpRHS == Cases[1].CmpRHS && Cases[0].CC == Cases[1].CC && isa(Cases[0].CmpRHS) && cast(Cases[0].CmpRHS)->isNullValue()) { if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB) return false; if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB) return false; } return true; } void SelectionDAGBuilder::visitBr(const BranchInst &I) { MachineBasicBlock *BrMBB = FuncInfo.MBB; // Update machine-CFG edges. MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)]; if (I.isUnconditional()) { // Update machine-CFG edges. BrMBB->addSuccessor(Succ0MBB); // If this is not a fall-through branch or optimizations are switched off, // emit the branch. if (Succ0MBB != NextBlock(BrMBB) || TM.getOptLevel() == CodeGenOpt::None) DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(), DAG.getBasicBlock(Succ0MBB))); return; } // If this condition is one of the special cases we handle, do special stuff // now. const Value *CondVal = I.getCondition(); MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)]; // If this is a series of conditions that are or'd or and'd together, emit // this as a sequence of branches instead of setcc's with and/or operations. // As long as jumps are not expensive, this should improve performance. // For example, instead of something like: // cmp A, B // C = seteq // cmp D, E // F = setle // or C, F // jnz foo // Emit: // cmp A, B // je foo // cmp D, E // jle foo // if (const BinaryOperator *BOp = dyn_cast(CondVal)) { Instruction::BinaryOps Opcode = BOp->getOpcode(); if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() && !I.getMetadata(LLVMContext::MD_unpredictable) && (Opcode == Instruction::And || Opcode == Instruction::Or)) { FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, Opcode, getEdgeProbability(BrMBB, Succ0MBB), getEdgeProbability(BrMBB, Succ1MBB)); // If the compares in later blocks need to use values not currently // exported from this block, export them now. This block should always // be the first entry. assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!"); // Allow some cases to be rejected. if (ShouldEmitAsBranches(SwitchCases)) { for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) { ExportFromCurrentBlock(SwitchCases[i].CmpLHS); ExportFromCurrentBlock(SwitchCases[i].CmpRHS); } // Emit the branch for this block. visitSwitchCase(SwitchCases[0], BrMBB); SwitchCases.erase(SwitchCases.begin()); return; } // Okay, we decided not to do this, remove any inserted MBB's and clear // SwitchCases. for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) FuncInfo.MF->erase(SwitchCases[i].ThisBB); SwitchCases.clear(); } } // Create a CaseBlock record representing this branch. CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()), nullptr, Succ0MBB, Succ1MBB, BrMBB); // Use visitSwitchCase to actually insert the fast branch sequence for this // cond branch. visitSwitchCase(CB, BrMBB); } /// visitSwitchCase - Emits the necessary code to represent a single node in /// the binary search tree resulting from lowering a switch instruction. void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, MachineBasicBlock *SwitchBB) { SDValue Cond; SDValue CondLHS = getValue(CB.CmpLHS); SDLoc dl = getCurSDLoc(); // Build the setcc now. if (!CB.CmpMHS) { // Fold "(X == true)" to X and "(X == false)" to !X to // handle common cases produced by branch lowering. if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) && CB.CC == ISD::SETEQ) Cond = CondLHS; else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) && CB.CC == ISD::SETEQ) { SDValue True = DAG.getConstant(1, dl, CondLHS.getValueType()); Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True); } else Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC); } else { assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now"); const APInt& Low = cast(CB.CmpLHS)->getValue(); const APInt& High = cast(CB.CmpRHS)->getValue(); SDValue CmpOp = getValue(CB.CmpMHS); EVT VT = CmpOp.getValueType(); if (cast(CB.CmpLHS)->isMinValue(true)) { Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, dl, VT), ISD::SETLE); } else { SDValue SUB = DAG.getNode(ISD::SUB, dl, VT, CmpOp, DAG.getConstant(Low, dl, VT)); Cond = DAG.getSetCC(dl, MVT::i1, SUB, DAG.getConstant(High-Low, dl, VT), ISD::SETULE); } } // Update successor info addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb); // TrueBB and FalseBB are always different unless the incoming IR is // degenerate. This only happens when running llc on weird IR. if (CB.TrueBB != CB.FalseBB) addSuccessorWithProb(SwitchBB, CB.FalseBB, CB.FalseProb); SwitchBB->normalizeSuccProbs(); // If the lhs block is the next block, invert the condition so that we can // fall through to the lhs instead of the rhs block. if (CB.TrueBB == NextBlock(SwitchBB)) { std::swap(CB.TrueBB, CB.FalseBB); SDValue True = DAG.getConstant(1, dl, Cond.getValueType()); Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True); } SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, MVT::Other, getControlRoot(), Cond, DAG.getBasicBlock(CB.TrueBB)); // Insert the false branch. Do this even if it's a fall through branch, // this makes it easier to do DAG optimizations which require inverting // the branch condition. BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, DAG.getBasicBlock(CB.FalseBB)); DAG.setRoot(BrCond); } /// visitJumpTable - Emit JumpTable node in the current MBB void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) { // Emit the code for the jump table assert(JT.Reg != -1U && "Should lower JT Header first!"); EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(), JT.Reg, PTy); SDValue Table = DAG.getJumpTable(JT.JTI, PTy); SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurSDLoc(), MVT::Other, Index.getValue(1), Table, Index); DAG.setRoot(BrJumpTable); } /// visitJumpTableHeader - This function emits necessary code to produce index /// in the JumpTable from switch case. void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH, MachineBasicBlock *SwitchBB) { SDLoc dl = getCurSDLoc(); // Subtract the lowest switch case value from the value being switched on and // conditional branch to default mbb if the result is greater than the // difference between smallest and largest cases. SDValue SwitchOp = getValue(JTH.SValue); EVT VT = SwitchOp.getValueType(); SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp, DAG.getConstant(JTH.First, dl, VT)); // The SDNode we just created, which holds the value being switched on minus // the smallest case value, needs to be copied to a virtual register so it // can be used as an index into the jump table in a subsequent basic block. // This value may be smaller or larger than the target's pointer type, and // therefore require extension or truncating. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SwitchOp = DAG.getZExtOrTrunc(Sub, dl, TLI.getPointerTy(DAG.getDataLayout())); unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy(DAG.getDataLayout())); SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, JumpTableReg, SwitchOp); JT.Reg = JumpTableReg; // Emit the range check for the jump table, and branch to the default block // for the switch statement if the value being switched on exceeds the largest // case in the switch. SDValue CMP = DAG.getSetCC( dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), Sub.getValueType()), Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), ISD::SETUGT); SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, MVT::Other, CopyTo, CMP, DAG.getBasicBlock(JT.Default)); // Avoid emitting unnecessary branches to the next block. if (JT.MBB != NextBlock(SwitchBB)) BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, DAG.getBasicBlock(JT.MBB)); DAG.setRoot(BrCond); } /// Create a LOAD_STACK_GUARD node, and let it carry the target specific global /// variable if there exists one. static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL, SDValue &Chain) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); MachineFunction &MF = DAG.getMachineFunction(); Value *Global = TLI.getSDagStackGuard(*MF.getFunction()->getParent()); MachineSDNode *Node = DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, DL, PtrTy, Chain); if (Global) { MachinePointerInfo MPInfo(Global); MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(1); auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant; *MemRefs = MF.getMachineMemOperand(MPInfo, Flags, PtrTy.getSizeInBits() / 8, DAG.getEVTAlignment(PtrTy)); Node->setMemRefs(MemRefs, MemRefs + 1); } return SDValue(Node, 0); } /// Codegen a new tail for a stack protector check ParentMBB which has had its /// tail spliced into a stack protector check success bb. /// /// For a high level explanation of how this fits into the stack protector /// generation see the comment on the declaration of class /// StackProtectorDescriptor. void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, MachineBasicBlock *ParentBB) { // First create the loads to the guard/stack slot for the comparison. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); MachineFrameInfo *MFI = ParentBB->getParent()->getFrameInfo(); int FI = MFI->getStackProtectorIndex(); SDValue Guard; SDLoc dl = getCurSDLoc(); SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy); const Module &M = *ParentBB->getParent()->getFunction()->getParent(); unsigned Align = DL->getPrefTypeAlignment(Type::getInt8PtrTy(M.getContext())); // Generate code to load the content of the guard slot. SDValue StackSlot = DAG.getLoad( PtrTy, dl, DAG.getEntryNode(), StackSlotPtr, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), Align, MachineMemOperand::MOVolatile); // Retrieve guard check function, nullptr if instrumentation is inlined. if (const Value *GuardCheck = TLI.getSSPStackGuardCheck(M)) { // The target provides a guard check function to validate the guard value. // Generate a call to that function with the content of the guard slot as // argument. auto *Fn = cast(GuardCheck); FunctionType *FnTy = Fn->getFunctionType(); assert(FnTy->getNumParams() == 1 && "Invalid function signature"); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = StackSlot; Entry.Ty = FnTy->getParamType(0); if (Fn->hasAttribute(1, Attribute::AttrKind::InReg)) Entry.isInReg = true; Args.push_back(Entry); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(getCurSDLoc()) .setChain(DAG.getEntryNode()) .setCallee(Fn->getCallingConv(), FnTy->getReturnType(), getValue(GuardCheck), std::move(Args)); std::pair Result = TLI.LowerCallTo(CLI); DAG.setRoot(Result.second); return; } // If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD. // Otherwise, emit a volatile load to retrieve the stack guard value. SDValue Chain = DAG.getEntryNode(); if (TLI.useLoadStackGuardNode()) { Guard = getLoadStackGuard(DAG, dl, Chain); } else { const Value *IRGuard = TLI.getSDagStackGuard(M); SDValue GuardPtr = getValue(IRGuard); Guard = DAG.getLoad(PtrTy, dl, Chain, GuardPtr, MachinePointerInfo(IRGuard, 0), Align, MachineMemOperand::MOVolatile); } // Perform the comparison via a subtract/getsetcc. EVT VT = Guard.getValueType(); SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, StackSlot); SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), Sub.getValueType()), Sub, DAG.getConstant(0, dl, VT), ISD::SETNE); // If the sub is not 0, then we know the guard/stackslot do not equal, so // branch to failure MBB. SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, MVT::Other, StackSlot.getOperand(0), Cmp, DAG.getBasicBlock(SPD.getFailureMBB())); // Otherwise branch to success MBB. SDValue Br = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, DAG.getBasicBlock(SPD.getSuccessMBB())); DAG.setRoot(Br); } /// Codegen the failure basic block for a stack protector check. /// /// A failure stack protector machine basic block consists simply of a call to /// __stack_chk_fail(). /// /// For a high level explanation of how this fits into the stack protector /// generation see the comment on the declaration of class /// StackProtectorDescriptor. void SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Chain = TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid, None, false, getCurSDLoc(), false, false).second; DAG.setRoot(Chain); } /// visitBitTestHeader - This function emits necessary code to produce value /// suitable for "bit tests" void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB) { SDLoc dl = getCurSDLoc(); // Subtract the minimum value SDValue SwitchOp = getValue(B.SValue); EVT VT = SwitchOp.getValueType(); SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp, DAG.getConstant(B.First, dl, VT)); // Check range const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue RangeCmp = DAG.getSetCC( dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), Sub.getValueType()), Sub, DAG.getConstant(B.Range, dl, VT), ISD::SETUGT); // Determine the type of the test operands. bool UsePtrType = false; if (!TLI.isTypeLegal(VT)) UsePtrType = true; else { for (unsigned i = 0, e = B.Cases.size(); i != e; ++i) if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) { // Switch table case range are encoded into series of masks. // Just use pointer type, it's guaranteed to fit. UsePtrType = true; break; } } if (UsePtrType) { VT = TLI.getPointerTy(DAG.getDataLayout()); Sub = DAG.getZExtOrTrunc(Sub, dl, VT); } B.RegVT = VT.getSimpleVT(); B.Reg = FuncInfo.CreateReg(B.RegVT); SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, B.Reg, Sub); MachineBasicBlock* MBB = B.Cases[0].ThisBB; addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb); addSuccessorWithProb(SwitchBB, MBB, B.Prob); SwitchBB->normalizeSuccProbs(); SDValue BrRange = DAG.getNode(ISD::BRCOND, dl, MVT::Other, CopyTo, RangeCmp, DAG.getBasicBlock(B.Default)); // Avoid emitting unnecessary branches to the next block. if (MBB != NextBlock(SwitchBB)) BrRange = DAG.getNode(ISD::BR, dl, MVT::Other, BrRange, DAG.getBasicBlock(MBB)); DAG.setRoot(BrRange); } /// visitBitTestCase - this function produces one "bit test" void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, MachineBasicBlock* NextMBB, BranchProbability BranchProbToNext, unsigned Reg, BitTestCase &B, MachineBasicBlock *SwitchBB) { SDLoc dl = getCurSDLoc(); MVT VT = BB.RegVT; SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), dl, Reg, VT); SDValue Cmp; unsigned PopCount = countPopulation(B.Mask); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (PopCount == 1) { // Testing for a single bit; just compare the shift count with what it // would need to be to shift a 1 bit in that position. Cmp = DAG.getSetCC( dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), ShiftOp, DAG.getConstant(countTrailingZeros(B.Mask), dl, VT), ISD::SETEQ); } else if (PopCount == BB.Range) { // There is only one zero bit in the range, test for it directly. Cmp = DAG.getSetCC( dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), ShiftOp, DAG.getConstant(countTrailingOnes(B.Mask), dl, VT), ISD::SETNE); } else { // Make desired shift SDValue SwitchVal = DAG.getNode(ISD::SHL, dl, VT, DAG.getConstant(1, dl, VT), ShiftOp); // Emit bit tests and jumps SDValue AndOp = DAG.getNode(ISD::AND, dl, VT, SwitchVal, DAG.getConstant(B.Mask, dl, VT)); Cmp = DAG.getSetCC( dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), AndOp, DAG.getConstant(0, dl, VT), ISD::SETNE); } // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb. addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb); // The branch probability from SwitchBB to NextMBB is BranchProbToNext. addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext); // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is // one as they are relative probabilities (and thus work more like weights), // and hence we need to normalize them to let the sum of them become one. SwitchBB->normalizeSuccProbs(); SDValue BrAnd = DAG.getNode(ISD::BRCOND, dl, MVT::Other, getControlRoot(), Cmp, DAG.getBasicBlock(B.TargetBB)); // Avoid emitting unnecessary branches to the next block. if (NextMBB != NextBlock(SwitchBB)) BrAnd = DAG.getNode(ISD::BR, dl, MVT::Other, BrAnd, DAG.getBasicBlock(NextMBB)); DAG.setRoot(BrAnd); } void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { MachineBasicBlock *InvokeMBB = FuncInfo.MBB; // Retrieve successors. Look through artificial IR level blocks like // catchswitch for successors. MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)]; const BasicBlock *EHPadBB = I.getSuccessor(1); // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't // have to do anything here to lower funclet bundles. assert(!I.hasOperandBundlesOtherThan( {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) && "Cannot lower invokes with arbitrary operand bundles yet!"); const Value *Callee(I.getCalledValue()); const Function *Fn = dyn_cast(Callee); if (isa(Callee)) visitInlineAsm(&I); else if (Fn && Fn->isIntrinsic()) { switch (Fn->getIntrinsicID()) { default: llvm_unreachable("Cannot invoke this intrinsic"); case Intrinsic::donothing: // Ignore invokes to @llvm.donothing: jump directly to the next BB. break; case Intrinsic::experimental_patchpoint_void: case Intrinsic::experimental_patchpoint_i64: visitPatchpoint(&I, EHPadBB); break; case Intrinsic::experimental_gc_statepoint: LowerStatepoint(ImmutableStatepoint(&I), EHPadBB); break; } } else if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) { // Currently we do not lower any intrinsic calls with deopt operand bundles. // Eventually we will support lowering the @llvm.experimental.deoptimize // intrinsic, and right now there are no plans to support other intrinsics // with deopt state. LowerCallSiteWithDeoptBundle(&I, getValue(Callee), EHPadBB); } else { LowerCallTo(&I, getValue(Callee), false, EHPadBB); } // If the value of the invoke is used outside of its defining block, make it // available as a virtual register. // We already took care of the exported value for the statepoint instruction // during call to the LowerStatepoint. if (!isStatepoint(I)) { CopyToExportRegsIfNeeded(&I); } SmallVector, 1> UnwindDests; BranchProbabilityInfo *BPI = FuncInfo.BPI; BranchProbability EHPadBBProb = BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB) : BranchProbability::getZero(); findUnwindDestinations(FuncInfo, EHPadBB, EHPadBBProb, UnwindDests); // Update successor info. addSuccessorWithProb(InvokeMBB, Return); for (auto &UnwindDest : UnwindDests) { UnwindDest.first->setIsEHPad(); addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second); } InvokeMBB->normalizeSuccProbs(); // Drop into normal successor. DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(), DAG.getBasicBlock(Return))); } void SelectionDAGBuilder::visitResume(const ResumeInst &RI) { llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!"); } void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { assert(FuncInfo.MBB->isEHPad() && "Call to landingpad not in landing pad!"); MachineBasicBlock *MBB = FuncInfo.MBB; MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); AddLandingPadInfo(LP, MMI, MBB); // If there aren't registers to copy the values into (e.g., during SjLj // exceptions), then don't bother to create these DAG nodes. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const Constant *PersonalityFn = FuncInfo.Fn->getPersonalityFn(); if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 && TLI.getExceptionSelectorRegister(PersonalityFn) == 0) return; // If landingpad's return type is token type, we don't create DAG nodes // for its exception pointer and selector value. The extraction of exception // pointer or selector value from token type landingpads is not currently // supported. if (LP.getType()->isTokenTy()) return; SmallVector ValueVTs; SDLoc dl = getCurSDLoc(); ComputeValueVTs(TLI, DAG.getDataLayout(), LP.getType(), ValueVTs); assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported"); // Get the two live-in registers as SDValues. The physregs have already been // copied into virtual registers. SDValue Ops[2]; if (FuncInfo.ExceptionPointerVirtReg) { Ops[0] = DAG.getZExtOrTrunc( DAG.getCopyFromReg(DAG.getEntryNode(), dl, FuncInfo.ExceptionPointerVirtReg, TLI.getPointerTy(DAG.getDataLayout())), dl, ValueVTs[0]); } else { Ops[0] = DAG.getConstant(0, dl, TLI.getPointerTy(DAG.getDataLayout())); } Ops[1] = DAG.getZExtOrTrunc( DAG.getCopyFromReg(DAG.getEntryNode(), dl, FuncInfo.ExceptionSelectorVirtReg, TLI.getPointerTy(DAG.getDataLayout())), dl, ValueVTs[1]); // Merge into one. SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Ops); setValue(&LP, Res); } void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) { #ifndef NDEBUG for (const CaseCluster &CC : Clusters) assert(CC.Low == CC.High && "Input clusters must be single-case"); #endif std::sort(Clusters.begin(), Clusters.end(), [](const CaseCluster &a, const CaseCluster &b) { return a.Low->getValue().slt(b.Low->getValue()); }); // Merge adjacent clusters with the same destination. const unsigned N = Clusters.size(); unsigned DstIndex = 0; for (unsigned SrcIndex = 0; SrcIndex < N; ++SrcIndex) { CaseCluster &CC = Clusters[SrcIndex]; const ConstantInt *CaseVal = CC.Low; MachineBasicBlock *Succ = CC.MBB; if (DstIndex != 0 && Clusters[DstIndex - 1].MBB == Succ && (CaseVal->getValue() - Clusters[DstIndex - 1].High->getValue()) == 1) { // If this case has the same successor and is a neighbour, merge it into // the previous cluster. Clusters[DstIndex - 1].High = CaseVal; Clusters[DstIndex - 1].Prob += CC.Prob; } else { std::memmove(&Clusters[DstIndex++], &Clusters[SrcIndex], sizeof(Clusters[SrcIndex])); } } Clusters.resize(DstIndex); } void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last) { // Update JTCases. for (unsigned i = 0, e = JTCases.size(); i != e; ++i) if (JTCases[i].first.HeaderBB == First) JTCases[i].first.HeaderBB = Last; // Update BitTestCases. for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i) if (BitTestCases[i].Parent == First) BitTestCases[i].Parent = Last; } void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB; // Update machine-CFG edges with unique successors. SmallSet Done; for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) { BasicBlock *BB = I.getSuccessor(i); bool Inserted = Done.insert(BB).second; if (!Inserted) continue; MachineBasicBlock *Succ = FuncInfo.MBBMap[BB]; addSuccessorWithProb(IndirectBrMBB, Succ); } IndirectBrMBB->normalizeSuccProbs(); DAG.setRoot(DAG.getNode(ISD::BRIND, getCurSDLoc(), MVT::Other, getControlRoot(), getValue(I.getAddress()))); } void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) { if (DAG.getTarget().Options.TrapUnreachable) DAG.setRoot( DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot())); } void SelectionDAGBuilder::visitFSub(const User &I) { // -0.0 - X --> fneg Type *Ty = I.getType(); if (isa(I.getOperand(0)) && I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) { SDValue Op2 = getValue(I.getOperand(1)); setValue(&I, DAG.getNode(ISD::FNEG, getCurSDLoc(), Op2.getValueType(), Op2)); return; } visitBinary(I, ISD::FSUB); } /// Checks if the given instruction performs a vector reduction, in which case /// we have the freedom to alter the elements in the result as long as the /// reduction of them stays unchanged. static bool isVectorReductionOp(const User *I) { const Instruction *Inst = dyn_cast(I); if (!Inst || !Inst->getType()->isVectorTy()) return false; auto OpCode = Inst->getOpcode(); switch (OpCode) { case Instruction::Add: case Instruction::Mul: case Instruction::And: case Instruction::Or: case Instruction::Xor: break; case Instruction::FAdd: case Instruction::FMul: if (const FPMathOperator *FPOp = dyn_cast(Inst)) if (FPOp->getFastMathFlags().unsafeAlgebra()) break; // Fall through. default: return false; } unsigned ElemNum = Inst->getType()->getVectorNumElements(); unsigned ElemNumToReduce = ElemNum; // Do DFS search on the def-use chain from the given instruction. We only // allow four kinds of operations during the search until we reach the // instruction that extracts the first element from the vector: // // 1. The reduction operation of the same opcode as the given instruction. // // 2. PHI node. // // 3. ShuffleVector instruction together with a reduction operation that // does a partial reduction. // // 4. ExtractElement that extracts the first element from the vector, and we // stop searching the def-use chain here. // // 3 & 4 above perform a reduction on all elements of the vector. We push defs // from 1-3 to the stack to continue the DFS. The given instruction is not // a reduction operation if we meet any other instructions other than those // listed above. SmallVector UsersToVisit{Inst}; SmallPtrSet Visited; bool ReduxExtracted = false; while (!UsersToVisit.empty()) { auto User = UsersToVisit.back(); UsersToVisit.pop_back(); if (!Visited.insert(User).second) continue; for (const auto &U : User->users()) { auto Inst = dyn_cast(U); if (!Inst) return false; if (Inst->getOpcode() == OpCode || isa(U)) { if (const FPMathOperator *FPOp = dyn_cast(Inst)) if (!isa(FPOp) && !FPOp->getFastMathFlags().unsafeAlgebra()) return false; UsersToVisit.push_back(U); } else if (const ShuffleVectorInst *ShufInst = dyn_cast(U)) { // Detect the following pattern: A ShuffleVector instruction together // with a reduction that do partial reduction on the first and second // ElemNumToReduce / 2 elements, and store the result in // ElemNumToReduce / 2 elements in another vector. unsigned ResultElements = ShufInst->getType()->getVectorNumElements(); if (ResultElements < ElemNum) return false; if (ElemNumToReduce == 1) return false; if (!isa(U->getOperand(1))) return false; for (unsigned i = 0; i < ElemNumToReduce / 2; ++i) if (ShufInst->getMaskValue(i) != int(i + ElemNumToReduce / 2)) return false; for (unsigned i = ElemNumToReduce / 2; i < ElemNum; ++i) if (ShufInst->getMaskValue(i) != -1) return false; // There is only one user of this ShuffleVector instruction, which // must be a reduction operation. if (!U->hasOneUse()) return false; auto U2 = dyn_cast(*U->user_begin()); if (!U2 || U2->getOpcode() != OpCode) return false; // Check operands of the reduction operation. if ((U2->getOperand(0) == U->getOperand(0) && U2->getOperand(1) == U) || (U2->getOperand(1) == U->getOperand(0) && U2->getOperand(0) == U)) { UsersToVisit.push_back(U2); ElemNumToReduce /= 2; } else return false; } else if (isa(U)) { // At this moment we should have reduced all elements in the vector. if (ElemNumToReduce != 1) return false; const ConstantInt *Val = dyn_cast(U->getOperand(1)); if (!Val || Val->getZExtValue() != 0) return false; ReduxExtracted = true; } else return false; } } return ReduxExtracted; } void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); bool nuw = false; bool nsw = false; bool exact = false; bool vec_redux = false; FastMathFlags FMF; if (const OverflowingBinaryOperator *OFBinOp = dyn_cast(&I)) { nuw = OFBinOp->hasNoUnsignedWrap(); nsw = OFBinOp->hasNoSignedWrap(); } if (const PossiblyExactOperator *ExactOp = dyn_cast(&I)) exact = ExactOp->isExact(); if (const FPMathOperator *FPOp = dyn_cast(&I)) FMF = FPOp->getFastMathFlags(); if (isVectorReductionOp(&I)) { vec_redux = true; DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n"); } SDNodeFlags Flags; Flags.setExact(exact); Flags.setNoSignedWrap(nsw); Flags.setNoUnsignedWrap(nuw); Flags.setVectorReduction(vec_redux); if (EnableFMFInDAG) { Flags.setAllowReciprocal(FMF.allowReciprocal()); Flags.setNoInfs(FMF.noInfs()); Flags.setNoNaNs(FMF.noNaNs()); Flags.setNoSignedZeros(FMF.noSignedZeros()); Flags.setUnsafeAlgebra(FMF.unsafeAlgebra()); } SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(), Op1, Op2, &Flags); setValue(&I, BinNodeValue); } void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); EVT ShiftTy = DAG.getTargetLoweringInfo().getShiftAmountTy( Op2.getValueType(), DAG.getDataLayout()); // Coerce the shift amount to the right type if we can. if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) { unsigned ShiftSize = ShiftTy.getSizeInBits(); unsigned Op2Size = Op2.getValueType().getSizeInBits(); SDLoc DL = getCurSDLoc(); // If the operand is smaller than the shift count type, promote it. if (ShiftSize > Op2Size) Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2); // If the operand is larger than the shift count type but the shift // count type has enough bits to represent any shift value, truncate // it now. This is a common case and it exposes the truncate to // optimization early. else if (ShiftSize >= Log2_32_Ceil(Op2.getValueType().getSizeInBits())) Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2); // Otherwise we'll need to temporarily settle for some other convenient // type. Type legalization will make adjustments once the shiftee is split. else Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32); } bool nuw = false; bool nsw = false; bool exact = false; if (Opcode == ISD::SRL || Opcode == ISD::SRA || Opcode == ISD::SHL) { if (const OverflowingBinaryOperator *OFBinOp = dyn_cast(&I)) { nuw = OFBinOp->hasNoUnsignedWrap(); nsw = OFBinOp->hasNoSignedWrap(); } if (const PossiblyExactOperator *ExactOp = dyn_cast(&I)) exact = ExactOp->isExact(); } SDNodeFlags Flags; Flags.setExact(exact); Flags.setNoSignedWrap(nsw); Flags.setNoUnsignedWrap(nuw); SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2, &Flags); setValue(&I, Res); } void SelectionDAGBuilder::visitSDiv(const User &I) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); SDNodeFlags Flags; Flags.setExact(isa(&I) && cast(&I)->isExact()); setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1, Op2, &Flags)); } void SelectionDAGBuilder::visitICmp(const User &I) { ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE; if (const ICmpInst *IC = dyn_cast(&I)) predicate = IC->getPredicate(); else if (const ConstantExpr *IC = dyn_cast(&I)) predicate = ICmpInst::Predicate(IC->getPredicate()); SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); ISD::CondCode Opcode = getICmpCondCode(predicate); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode)); } void SelectionDAGBuilder::visitFCmp(const User &I) { FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE; if (const FCmpInst *FC = dyn_cast(&I)) predicate = FC->getPredicate(); else if (const ConstantExpr *FC = dyn_cast(&I)) predicate = FCmpInst::Predicate(FC->getPredicate()); SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); ISD::CondCode Condition = getFCmpCondCode(predicate); // FIXME: Fcmp instructions have fast-math-flags in IR, so we should use them. // FIXME: We should propagate the fast-math-flags to the DAG node itself for // further optimization, but currently FMF is only applicable to binary nodes. if (TM.Options.NoNaNsFPMath) Condition = getFCmpCodeWithoutNaN(Condition); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition)); } // Check if the condition of the select has one use or two users that are both // selects with the same condition. static bool hasOnlySelectUsers(const Value *Cond) { return std::all_of(Cond->user_begin(), Cond->user_end(), [](const Value *V) { return isa(V); }); } void SelectionDAGBuilder::visitSelect(const User &I) { SmallVector ValueVTs; ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; SmallVector Values(NumValues); SDValue Cond = getValue(I.getOperand(0)); SDValue LHSVal = getValue(I.getOperand(1)); SDValue RHSVal = getValue(I.getOperand(2)); auto BaseOps = {Cond}; ISD::NodeType OpCode = Cond.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT; // Min/max matching is only viable if all output VTs are the same. if (std::equal(ValueVTs.begin(), ValueVTs.end(), ValueVTs.begin())) { EVT VT = ValueVTs[0]; LLVMContext &Ctx = *DAG.getContext(); auto &TLI = DAG.getTargetLoweringInfo(); // We care about the legality of the operation after it has been type // legalized. while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal && VT != TLI.getTypeToTransformTo(Ctx, VT)) VT = TLI.getTypeToTransformTo(Ctx, VT); // If the vselect is legal, assume we want to leave this as a vector setcc + // vselect. Otherwise, if this is going to be scalarized, we want to see if // min/max is legal on the scalar type. bool UseScalarMinMax = VT.isVector() && !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT); Value *LHS, *RHS; auto SPR = matchSelectPattern(const_cast(&I), LHS, RHS); ISD::NodeType Opc = ISD::DELETED_NODE; switch (SPR.Flavor) { case SPF_UMAX: Opc = ISD::UMAX; break; case SPF_UMIN: Opc = ISD::UMIN; break; case SPF_SMAX: Opc = ISD::SMAX; break; case SPF_SMIN: Opc = ISD::SMIN; break; case SPF_FMINNUM: switch (SPR.NaNBehavior) { case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?"); case SPNB_RETURNS_NAN: Opc = ISD::FMINNAN; break; case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break; case SPNB_RETURNS_ANY: { if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT)) Opc = ISD::FMINNUM; else if (TLI.isOperationLegalOrCustom(ISD::FMINNAN, VT)) Opc = ISD::FMINNAN; else if (UseScalarMinMax) Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType()) ? ISD::FMINNUM : ISD::FMINNAN; break; } } break; case SPF_FMAXNUM: switch (SPR.NaNBehavior) { case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?"); case SPNB_RETURNS_NAN: Opc = ISD::FMAXNAN; break; case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break; case SPNB_RETURNS_ANY: if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT)) Opc = ISD::FMAXNUM; else if (TLI.isOperationLegalOrCustom(ISD::FMAXNAN, VT)) Opc = ISD::FMAXNAN; else if (UseScalarMinMax) Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType()) ? ISD::FMAXNUM : ISD::FMAXNAN; break; } break; default: break; } if (Opc != ISD::DELETED_NODE && (TLI.isOperationLegalOrCustom(Opc, VT) || (UseScalarMinMax && TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) && // If the underlying comparison instruction is used by any other // instruction, the consumed instructions won't be destroyed, so it is // not profitable to convert to a min/max. hasOnlySelectUsers(cast(I).getCondition())) { OpCode = Opc; LHSVal = getValue(LHS); RHSVal = getValue(RHS); BaseOps = {}; } } for (unsigned i = 0; i != NumValues; ++i) { SmallVector Ops(BaseOps.begin(), BaseOps.end()); Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i)); Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i)); Values[i] = DAG.getNode(OpCode, getCurSDLoc(), LHSVal.getNode()->getValueType(LHSVal.getResNo()+i), Ops); } setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), DAG.getVTList(ValueVTs), Values)); } void SelectionDAGBuilder::visitTrunc(const User &I) { // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest). SDValue N = getValue(I.getOperand(0)); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitZExt(const User &I) { // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest). // ZExt also can't be a cast to bool for same reason. So, nothing much to do SDValue N = getValue(I.getOperand(0)); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitSExt(const User &I) { // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest). // SExt also can't be a cast to bool for same reason. So, nothing much to do SDValue N = getValue(I.getOperand(0)); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitFPTrunc(const User &I) { // FPTrunc is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); SDLoc dl = getCurSDLoc(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N, DAG.getTargetConstant( 0, dl, TLI.getPointerTy(DAG.getDataLayout())))); } void SelectionDAGBuilder::visitFPExt(const User &I) { // FPExt is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitFPToUI(const User &I) { // FPToUI is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitFPToSI(const User &I) { // FPToSI is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitUIToFP(const User &I) { // UIToFP is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitSIToFP(const User &I) { // SIToFP is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitPtrToInt(const User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT)); } void SelectionDAGBuilder::visitIntToPtr(const User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT)); } void SelectionDAGBuilder::visitBitCast(const User &I) { SDValue N = getValue(I.getOperand(0)); SDLoc dl = getCurSDLoc(); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); // BitCast assures us that source and destination are the same size so this is // either a BITCAST or a no-op. if (DestVT != N.getValueType()) setValue(&I, DAG.getNode(ISD::BITCAST, dl, DestVT, N)); // convert types. // Check if the original LLVM IR Operand was a ConstantInt, because getValue() // might fold any kind of constant expression to an integer constant and that // is not what we are looking for. Only regcognize a bitcast of a genuine // constant integer as an opaque constant. else if(ConstantInt *C = dyn_cast(I.getOperand(0))) setValue(&I, DAG.getConstant(C->getValue(), dl, DestVT, /*isTarget=*/false, /*isOpaque*/true)); else setValue(&I, N); // noop cast. } void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const Value *SV = I.getOperand(0); SDValue N = getValue(SV); EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); unsigned SrcAS = SV->getType()->getPointerAddressSpace(); unsigned DestAS = I.getType()->getPointerAddressSpace(); if (!TLI.isNoopAddrSpaceCast(SrcAS, DestAS)) N = DAG.getAddrSpaceCast(getCurSDLoc(), DestVT, N, SrcAS, DestAS); setValue(&I, N); } void SelectionDAGBuilder::visitInsertElement(const User &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue InVec = getValue(I.getOperand(0)); SDValue InVal = getValue(I.getOperand(1)); SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(), TLI.getVectorIdxTy(DAG.getDataLayout())); setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(), TLI.getValueType(DAG.getDataLayout(), I.getType()), InVec, InVal, InIdx)); } void SelectionDAGBuilder::visitExtractElement(const User &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue InVec = getValue(I.getOperand(0)); SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(), TLI.getVectorIdxTy(DAG.getDataLayout())); setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(), TLI.getValueType(DAG.getDataLayout(), I.getType()), InVec, InIdx)); } void SelectionDAGBuilder::visitShuffleVector(const User &I) { SDValue Src1 = getValue(I.getOperand(0)); SDValue Src2 = getValue(I.getOperand(1)); SmallVector Mask; ShuffleVectorInst::getShuffleMask(cast(I.getOperand(2)), Mask); unsigned MaskNumElts = Mask.size(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); EVT SrcVT = Src1.getValueType(); unsigned SrcNumElts = SrcVT.getVectorNumElements(); if (SrcNumElts == MaskNumElts) { setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, Mask)); return; } // Normalize the shuffle vector since mask and vector length don't match. if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) { // Mask is longer than the source vectors and is a multiple of the source // vectors. We can use concatenate vector to make the mask and vectors // lengths match. unsigned NumConcat = MaskNumElts / SrcNumElts; // Check if the shuffle is some kind of concatenation of the input vectors. bool IsConcat = true; SmallVector ConcatSrcs(NumConcat, -1); for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; if (Idx < 0) continue; // Ensure the indices in each SrcVT sized piece are sequential and that // the same source is used for the whole piece. if ((Idx % SrcNumElts != (i % SrcNumElts)) || (ConcatSrcs[i / SrcNumElts] >= 0 && ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) { IsConcat = false; break; } // Remember which source this index came from. ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts; } // The shuffle is concatenating multiple vectors together. Just emit // a CONCAT_VECTORS operation. if (IsConcat) { SmallVector ConcatOps; for (auto Src : ConcatSrcs) { if (Src < 0) ConcatOps.push_back(DAG.getUNDEF(SrcVT)); else if (Src == 0) ConcatOps.push_back(Src1); else ConcatOps.push_back(Src2); } setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurSDLoc(), VT, ConcatOps)); return; } // Pad both vectors with undefs to make them the same length as the mask. SDValue UndefVal = DAG.getUNDEF(SrcVT); SmallVector MOps1(NumConcat, UndefVal); SmallVector MOps2(NumConcat, UndefVal); MOps1[0] = Src1; MOps2[0] = Src2; Src1 = Src1.isUndef() ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, getCurSDLoc(), VT, MOps1); Src2 = Src2.isUndef() ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, getCurSDLoc(), VT, MOps2); // Readjust mask for new input vector length. SmallVector MappedOps; for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts - MaskNumElts; MappedOps.push_back(Idx); } setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, MappedOps)); return; } if (SrcNumElts > MaskNumElts) { // Analyze the access pattern of the vector to see if we can extract // two subvectors and do the shuffle. The analysis is done by calculating // the range of elements the mask access on both vectors. int MinRange[2] = { static_cast(SrcNumElts), static_cast(SrcNumElts)}; int MaxRange[2] = {-1, -1}; for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; unsigned Input = 0; if (Idx < 0) continue; if (Idx >= (int)SrcNumElts) { Input = 1; Idx -= SrcNumElts; } if (Idx > MaxRange[Input]) MaxRange[Input] = Idx; if (Idx < MinRange[Input]) MinRange[Input] = Idx; } // Check if the access is smaller than the vector size and can we find // a reasonable extract index. int RangeUse[2] = { -1, -1 }; // 0 = Unused, 1 = Extract, -1 = Can not // Extract. int StartIdx[2]; // StartIdx to extract from for (unsigned Input = 0; Input < 2; ++Input) { if (MinRange[Input] >= (int)SrcNumElts && MaxRange[Input] < 0) { RangeUse[Input] = 0; // Unused StartIdx[Input] = 0; continue; } // Find a good start index that is a multiple of the mask length. Then // see if the rest of the elements are in range. StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts; if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts && StartIdx[Input] + MaskNumElts <= SrcNumElts) RangeUse[Input] = 1; // Extract from a multiple of the mask length. } if (RangeUse[0] == 0 && RangeUse[1] == 0) { setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used. return; } if (RangeUse[0] >= 0 && RangeUse[1] >= 0) { // Extract appropriate subvector and generate a vector shuffle for (unsigned Input = 0; Input < 2; ++Input) { SDValue &Src = Input == 0 ? Src1 : Src2; if (RangeUse[Input] == 0) Src = DAG.getUNDEF(VT); else { SDLoc dl = getCurSDLoc(); Src = DAG.getNode( ISD::EXTRACT_SUBVECTOR, dl, VT, Src, DAG.getConstant(StartIdx[Input], dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); } } // Calculate new mask. SmallVector MappedOps; for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; if (Idx >= 0) { if (Idx < (int)SrcNumElts) Idx -= StartIdx[0]; else Idx -= SrcNumElts + StartIdx[1] - MaskNumElts; } MappedOps.push_back(Idx); } setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, MappedOps)); return; } } // We can't use either concat vectors or extract subvectors so fall back to // replacing the shuffle with extract and build vector. // to insert and build vector. EVT EltVT = VT.getVectorElementType(); EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); SDLoc dl = getCurSDLoc(); SmallVector Ops; for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; SDValue Res; if (Idx < 0) { Res = DAG.getUNDEF(EltVT); } else { SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2; if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts; Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Src, DAG.getConstant(Idx, dl, IdxVT)); } Ops.push_back(Res); } setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops)); } void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { const Value *Op0 = I.getOperand(0); const Value *Op1 = I.getOperand(1); Type *AggTy = I.getType(); Type *ValTy = Op1->getType(); bool IntoUndef = isa(Op0); bool FromUndef = isa(Op1); unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector AggValueVTs; ComputeValueVTs(TLI, DAG.getDataLayout(), AggTy, AggValueVTs); SmallVector ValValueVTs; ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs); unsigned NumAggValues = AggValueVTs.size(); unsigned NumValValues = ValValueVTs.size(); SmallVector Values(NumAggValues); // Ignore an insertvalue that produces an empty object if (!NumAggValues) { setValue(&I, DAG.getUNDEF(MVT(MVT::Other))); return; } SDValue Agg = getValue(Op0); unsigned i = 0; // Copy the beginning value(s) from the original aggregate. for (; i != LinearIndex; ++i) Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : SDValue(Agg.getNode(), Agg.getResNo() + i); // Copy values from the inserted value(s). if (NumValValues) { SDValue Val = getValue(Op1); for (; i != LinearIndex + NumValValues; ++i) Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) : SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex); } // Copy remaining value(s) from the original aggregate. for (; i != NumAggValues; ++i) Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : SDValue(Agg.getNode(), Agg.getResNo() + i); setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), DAG.getVTList(AggValueVTs), Values)); } void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { const Value *Op0 = I.getOperand(0); Type *AggTy = Op0->getType(); Type *ValTy = I.getType(); bool OutOfUndef = isa(Op0); unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector ValValueVTs; ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs); unsigned NumValValues = ValValueVTs.size(); // Ignore a extractvalue that produces an empty object if (!NumValValues) { setValue(&I, DAG.getUNDEF(MVT(MVT::Other))); return; } SmallVector Values(NumValValues); SDValue Agg = getValue(Op0); // Copy out the selected value(s). for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i) Values[i - LinearIndex] = OutOfUndef ? DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) : SDValue(Agg.getNode(), Agg.getResNo() + i); setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), DAG.getVTList(ValValueVTs), Values)); } void SelectionDAGBuilder::visitGetElementPtr(const User &I) { Value *Op0 = I.getOperand(0); // Note that the pointer operand may be a vector of pointers. Take the scalar // element which holds a pointer. unsigned AS = Op0->getType()->getScalarType()->getPointerAddressSpace(); SDValue N = getValue(Op0); SDLoc dl = getCurSDLoc(); // Normalize Vector GEP - all scalar operands should be converted to the // splat vector. unsigned VectorWidth = I.getType()->isVectorTy() ? cast(I.getType())->getVectorNumElements() : 0; if (VectorWidth && !N.getValueType().isVector()) { LLVMContext &Context = *DAG.getContext(); EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorWidth); SmallVector Ops(VectorWidth, N); N = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I); GTI != E; ++GTI) { const Value *Idx = GTI.getOperand(); if (StructType *StTy = dyn_cast(*GTI)) { unsigned Field = cast(Idx)->getUniqueInteger().getZExtValue(); if (Field) { // N = N + Offset uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field); // In an inbouds GEP with an offset that is nonnegative even when // interpreted as signed, assume there is no unsigned overflow. SDNodeFlags Flags; if (int64_t(Offset) >= 0 && cast(I).isInBounds()) Flags.setNoUnsignedWrap(true); N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, DAG.getConstant(Offset, dl, N.getValueType()), &Flags); } } else { MVT PtrTy = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout(), AS); unsigned PtrSize = PtrTy.getSizeInBits(); APInt ElementSize(PtrSize, DL->getTypeAllocSize(GTI.getIndexedType())); // If this is a scalar constant or a splat vector of constants, // handle it quickly. const auto *CI = dyn_cast(Idx); if (!CI && isa(Idx) && cast(Idx)->getSplatValue()) CI = cast(cast(Idx)->getSplatValue()); if (CI) { if (CI->isZero()) continue; APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize); SDValue OffsVal = VectorWidth ? DAG.getConstant(Offs, dl, MVT::getVectorVT(PtrTy, VectorWidth)) : DAG.getConstant(Offs, dl, PtrTy); // In an inbouds GEP with an offset that is nonnegative even when // interpreted as signed, assume there is no unsigned overflow. SDNodeFlags Flags; if (Offs.isNonNegative() && cast(I).isInBounds()) Flags.setNoUnsignedWrap(true); N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, &Flags); continue; } // N = N + Idx * ElementSize; SDValue IdxN = getValue(Idx); if (!IdxN.getValueType().isVector() && VectorWidth) { MVT VT = MVT::getVectorVT(IdxN.getValueType().getSimpleVT(), VectorWidth); SmallVector Ops(VectorWidth, IdxN); IdxN = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } // If the index is smaller or larger than intptr_t, truncate or extend // it. IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType()); // If this is a multiply by a power of two, turn it into a shl // immediately. This is a very common case. if (ElementSize != 1) { if (ElementSize.isPowerOf2()) { unsigned Amt = ElementSize.logBase2(); IdxN = DAG.getNode(ISD::SHL, dl, N.getValueType(), IdxN, DAG.getConstant(Amt, dl, IdxN.getValueType())); } else { SDValue Scale = DAG.getConstant(ElementSize, dl, IdxN.getValueType()); IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, Scale); } } N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, IdxN); } } setValue(&I, N); } void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { // If this is a fixed sized alloca in the entry block of the function, // allocate it statically on the stack. if (FuncInfo.StaticAllocaMap.count(&I)) return; // getValue will auto-populate this. SDLoc dl = getCurSDLoc(); Type *Ty = I.getAllocatedType(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); auto &DL = DAG.getDataLayout(); uint64_t TySize = DL.getTypeAllocSize(Ty); unsigned Align = std::max((unsigned)DL.getPrefTypeAlignment(Ty), I.getAlignment()); SDValue AllocSize = getValue(I.getArraySize()); EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout()); if (AllocSize.getValueType() != IntPtr) AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr); AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr, AllocSize, DAG.getConstant(TySize, dl, IntPtr)); // Handle alignment. If the requested alignment is less than or equal to // the stack alignment, ignore it. If the size is greater than or equal to // the stack alignment, we note this in the DYNAMIC_STACKALLOC node. unsigned StackAlign = DAG.getSubtarget().getFrameLowering()->getStackAlignment(); if (Align <= StackAlign) Align = 0; // Round the size of the allocation up to the stack alignment size // by add SA-1 to the size. This doesn't overflow because we're computing // an address inside an alloca. SDNodeFlags Flags; Flags.setNoUnsignedWrap(true); AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize, DAG.getIntPtrConstant(StackAlign - 1, dl), &Flags); // Mask out the low bits for alignment purposes. AllocSize = DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize, DAG.getIntPtrConstant(~(uint64_t)(StackAlign - 1), dl)); SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align, dl) }; SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other); SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, VTs, Ops); setValue(&I, DSA); DAG.setRoot(DSA.getValue(1)); assert(FuncInfo.MF->getFrameInfo()->hasVarSizedObjects()); } void SelectionDAGBuilder::visitLoad(const LoadInst &I) { if (I.isAtomic()) return visitAtomicLoad(I); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const Value *SV = I.getOperand(0); if (TLI.supportSwiftError()) { // Swifterror values can come from either a function parameter with // swifterror attribute or an alloca with swifterror attribute. if (const Argument *Arg = dyn_cast(SV)) { if (Arg->hasSwiftErrorAttr()) return visitLoadFromSwiftError(I); } if (const AllocaInst *Alloca = dyn_cast(SV)) { if (Alloca->isSwiftError()) return visitLoadFromSwiftError(I); } } SDValue Ptr = getValue(SV); Type *Ty = I.getType(); bool isVolatile = I.isVolatile(); bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr; // The IR notion of invariant_load only guarantees that all *non-faulting* // invariant loads result in the same value. The MI notion of invariant load // guarantees that the load can be legally moved to any location within its // containing function. The MI notion of invariant_load is stronger than the // IR notion of invariant_load -- an MI invariant_load is an IR invariant_load // with a guarantee that the location being loaded from is dereferenceable // throughout the function's lifetime. bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr && isDereferenceablePointer(SV, DAG.getDataLayout()); unsigned Alignment = I.getAlignment(); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); SmallVector ValueVTs; SmallVector Offsets; ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; SDValue Root; bool ConstantMemory = false; if (isVolatile || NumValues > MaxParallelChains) // Serialize volatile loads with other side effects. Root = getRoot(); else if (AA->pointsToConstantMemory(MemoryLocation( SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) { // Do not serialize (non-volatile) loads of constant memory with anything. Root = DAG.getEntryNode(); ConstantMemory = true; } else { // Do not serialize non-volatile loads against each other. Root = DAG.getRoot(); } SDLoc dl = getCurSDLoc(); if (isVolatile) Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG); // An aggregate load cannot wrap around the address space, so offsets to its // parts don't wrap either. SDNodeFlags Flags; Flags.setNoUnsignedWrap(true); SmallVector Values(NumValues); SmallVector Chains(std::min(MaxParallelChains, NumValues)); EVT PtrVT = Ptr.getValueType(); unsigned ChainI = 0; for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { // Serializing loads here may result in excessive register pressure, and // TokenFactor places arbitrary choke points on the scheduler. SD scheduling // could recover a bit by hoisting nodes upward in the chain by recognizing // they are side-effect free or do not alias. The optimizer should really // avoid this case by converting large object/array copies to llvm.memcpy // (MaxParallelChains should always remain as failsafe). if (ChainI == MaxParallelChains) { assert(PendingLoads.empty() && "PendingLoads must be serialized first"); SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, makeArrayRef(Chains.data(), ChainI)); Root = Chain; ChainI = 0; } SDValue A = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, DAG.getConstant(Offsets[i], dl, PtrVT), &Flags); auto MMOFlags = MachineMemOperand::MONone; if (isVolatile) MMOFlags |= MachineMemOperand::MOVolatile; if (isNonTemporal) MMOFlags |= MachineMemOperand::MONonTemporal; if (isInvariant) MMOFlags |= MachineMemOperand::MOInvariant; SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, A, MachinePointerInfo(SV, Offsets[i]), Alignment, MMOFlags, AAInfo, Ranges); Values[i] = L; Chains[ChainI] = L.getValue(1); } if (!ConstantMemory) { SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, makeArrayRef(Chains.data(), ChainI)); if (isVolatile) DAG.setRoot(Chain); else PendingLoads.push_back(Chain); } setValue(&I, DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values)); } void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); assert(TLI.supportSwiftError() && "call visitStoreToSwiftError when backend supports swifterror"); SmallVector ValueVTs; SmallVector Offsets; const Value *SrcV = I.getOperand(0); ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), SrcV->getType(), ValueVTs, &Offsets); assert(ValueVTs.size() == 1 && Offsets[0] == 0 && "expect a single EVT for swifterror"); SDValue Src = getValue(SrcV); // Create a virtual register, then update the virtual register. auto &DL = DAG.getDataLayout(); const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL)); unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC); // Chain, DL, Reg, N or Chain, DL, Reg, N, Glue // Chain can be getRoot or getControlRoot. SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg, SDValue(Src.getNode(), Src.getResNo())); DAG.setRoot(CopyNode); FuncInfo.setSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg); } void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) { assert(DAG.getTargetLoweringInfo().supportSwiftError() && "call visitLoadFromSwiftError when backend supports swifterror"); assert(!I.isVolatile() && I.getMetadata(LLVMContext::MD_nontemporal) == nullptr && I.getMetadata(LLVMContext::MD_invariant_load) == nullptr && "Support volatile, non temporal, invariant for load_from_swift_error"); const Value *SV = I.getOperand(0); Type *Ty = I.getType(); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); assert(!AA->pointsToConstantMemory(MemoryLocation( SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo)) && "load_from_swift_error should not be constant memory"); SmallVector ValueVTs; SmallVector Offsets; ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), Ty, ValueVTs, &Offsets); assert(ValueVTs.size() == 1 && Offsets[0] == 0 && "expect a single EVT for swifterror"); // Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT SDValue L = DAG.getCopyFromReg(getRoot(), getCurSDLoc(), FuncInfo.findSwiftErrorVReg(FuncInfo.MBB, SV), ValueVTs[0]); setValue(&I, L); } void SelectionDAGBuilder::visitStore(const StoreInst &I) { if (I.isAtomic()) return visitAtomicStore(I); const Value *SrcV = I.getOperand(0); const Value *PtrV = I.getOperand(1); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (TLI.supportSwiftError()) { // Swifterror values can come from either a function parameter with // swifterror attribute or an alloca with swifterror attribute. if (const Argument *Arg = dyn_cast(PtrV)) { if (Arg->hasSwiftErrorAttr()) return visitStoreToSwiftError(I); } if (const AllocaInst *Alloca = dyn_cast(PtrV)) { if (Alloca->isSwiftError()) return visitStoreToSwiftError(I); } } SmallVector ValueVTs; SmallVector Offsets; ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), SrcV->getType(), ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; // Get the lowered operands. Note that we do this after // checking if NumResults is zero, because with zero results // the operands won't have values in the map. SDValue Src = getValue(SrcV); SDValue Ptr = getValue(PtrV); SDValue Root = getRoot(); SmallVector Chains(std::min(MaxParallelChains, NumValues)); SDLoc dl = getCurSDLoc(); EVT PtrVT = Ptr.getValueType(); unsigned Alignment = I.getAlignment(); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); auto MMOFlags = MachineMemOperand::MONone; if (I.isVolatile()) MMOFlags |= MachineMemOperand::MOVolatile; if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr) MMOFlags |= MachineMemOperand::MONonTemporal; // An aggregate load cannot wrap around the address space, so offsets to its // parts don't wrap either. SDNodeFlags Flags; Flags.setNoUnsignedWrap(true); unsigned ChainI = 0; for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { // See visitLoad comments. if (ChainI == MaxParallelChains) { SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, makeArrayRef(Chains.data(), ChainI)); Root = Chain; ChainI = 0; } SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, DAG.getConstant(Offsets[i], dl, PtrVT), &Flags); SDValue St = DAG.getStore( Root, dl, SDValue(Src.getNode(), Src.getResNo() + i), Add, MachinePointerInfo(PtrV, Offsets[i]), Alignment, MMOFlags, AAInfo); Chains[ChainI] = St; } SDValue StoreNode = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, makeArrayRef(Chains.data(), ChainI)); DAG.setRoot(StoreNode); } void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) { SDLoc sdl = getCurSDLoc(); // llvm.masked.store.*(Src0, Ptr, alignment, Mask) Value *PtrOperand = I.getArgOperand(1); SDValue Ptr = getValue(PtrOperand); SDValue Src0 = getValue(I.getArgOperand(0)); SDValue Mask = getValue(I.getArgOperand(3)); EVT VT = Src0.getValueType(); unsigned Alignment = (cast(I.getArgOperand(2)))->getZExtValue(); if (!Alignment) Alignment = DAG.getEVTAlignment(VT); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore, VT.getStoreSize(), Alignment, AAInfo); SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, VT, MMO, false); DAG.setRoot(StoreNode); setValue(&I, StoreNode); } // Get a uniform base for the Gather/Scatter intrinsic. // The first argument of the Gather/Scatter intrinsic is a vector of pointers. // We try to represent it as a base pointer + vector of indices. // Usually, the vector of pointers comes from a 'getelementptr' instruction. // The first operand of the GEP may be a single pointer or a vector of pointers // Example: // %gep.ptr = getelementptr i32, <8 x i32*> %vptr, <8 x i32> %ind // or // %gep.ptr = getelementptr i32, i32* %ptr, <8 x i32> %ind // %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.ptr, .. // // When the first GEP operand is a single pointer - it is the uniform base we // are looking for. If first operand of the GEP is a splat vector - we // extract the spalt value and use it as a uniform base. // In all other cases the function returns 'false'. // static bool getUniformBase(const Value *& Ptr, SDValue& Base, SDValue& Index, SelectionDAGBuilder* SDB) { SelectionDAG& DAG = SDB->DAG; LLVMContext &Context = *DAG.getContext(); assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type"); const GetElementPtrInst *GEP = dyn_cast(Ptr); if (!GEP || GEP->getNumOperands() > 2) return false; const Value *GEPPtr = GEP->getPointerOperand(); if (!GEPPtr->getType()->isVectorTy()) Ptr = GEPPtr; else if (!(Ptr = getSplatValue(GEPPtr))) return false; Value *IndexVal = GEP->getOperand(1); // The operands of the GEP may be defined in another basic block. // In this case we'll not find nodes for the operands. if (!SDB->findValue(Ptr) || !SDB->findValue(IndexVal)) return false; Base = SDB->getValue(Ptr); Index = SDB->getValue(IndexVal); // Suppress sign extension. if (SExtInst* Sext = dyn_cast(IndexVal)) { if (SDB->findValue(Sext->getOperand(0))) { IndexVal = Sext->getOperand(0); Index = SDB->getValue(IndexVal); } } if (!Index.getValueType().isVector()) { unsigned GEPWidth = GEP->getType()->getVectorNumElements(); EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth); SmallVector Ops(GEPWidth, Index); Index = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Index), VT, Ops); } return true; } void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { SDLoc sdl = getCurSDLoc(); // llvm.masked.scatter.*(Src0, Ptrs, alignemt, Mask) const Value *Ptr = I.getArgOperand(1); SDValue Src0 = getValue(I.getArgOperand(0)); SDValue Mask = getValue(I.getArgOperand(3)); EVT VT = Src0.getValueType(); unsigned Alignment = (cast(I.getArgOperand(2)))->getZExtValue(); if (!Alignment) Alignment = DAG.getEVTAlignment(VT); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); SDValue Base; SDValue Index; const Value *BasePtr = Ptr; bool UniformBase = getUniformBase(BasePtr, Base, Index, this); const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr; MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(MachinePointerInfo(MemOpBasePtr), MachineMemOperand::MOStore, VT.getStoreSize(), Alignment, AAInfo); if (!UniformBase) { Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(Ptr); } SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index }; SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl, Ops, MMO); DAG.setRoot(Scatter); setValue(&I, Scatter); } void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) { SDLoc sdl = getCurSDLoc(); // @llvm.masked.load.*(Ptr, alignment, Mask, Src0) Value *PtrOperand = I.getArgOperand(0); SDValue Ptr = getValue(PtrOperand); SDValue Src0 = getValue(I.getArgOperand(3)); SDValue Mask = getValue(I.getArgOperand(2)); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); unsigned Alignment = (cast(I.getArgOperand(1)))->getZExtValue(); if (!Alignment) Alignment = DAG.getEVTAlignment(VT); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); // Do not serialize masked loads of constant memory with anything. bool AddToChain = !AA->pointsToConstantMemory(MemoryLocation( PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()), AAInfo)); SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad, VT.getStoreSize(), Alignment, AAInfo, Ranges); SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO, ISD::NON_EXTLOAD); if (AddToChain) { SDValue OutChain = Load.getValue(1); DAG.setRoot(OutChain); } setValue(&I, Load); } void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { SDLoc sdl = getCurSDLoc(); // @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0) const Value *Ptr = I.getArgOperand(0); SDValue Src0 = getValue(I.getArgOperand(3)); SDValue Mask = getValue(I.getArgOperand(2)); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); unsigned Alignment = (cast(I.getArgOperand(1)))->getZExtValue(); if (!Alignment) Alignment = DAG.getEVTAlignment(VT); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); SDValue Root = DAG.getRoot(); SDValue Base; SDValue Index; const Value *BasePtr = Ptr; bool UniformBase = getUniformBase(BasePtr, Base, Index, this); bool ConstantMemory = false; if (UniformBase && AA->pointsToConstantMemory(MemoryLocation( BasePtr, DAG.getDataLayout().getTypeStoreSize(I.getType()), AAInfo))) { // Do not serialize (non-volatile) loads of constant memory with anything. Root = DAG.getEntryNode(); ConstantMemory = true; } MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(MachinePointerInfo(UniformBase ? BasePtr : nullptr), MachineMemOperand::MOLoad, VT.getStoreSize(), Alignment, AAInfo, Ranges); if (!UniformBase) { Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(Ptr); } SDValue Ops[] = { Root, Src0, Mask, Base, Index }; SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl, Ops, MMO); SDValue OutChain = Gather.getValue(1); if (!ConstantMemory) PendingLoads.push_back(OutChain); setValue(&I, Gather); } void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { SDLoc dl = getCurSDLoc(); AtomicOrdering SuccessOrder = I.getSuccessOrdering(); AtomicOrdering FailureOrder = I.getFailureOrdering(); SynchronizationScope Scope = I.getSynchScope(); SDValue InChain = getRoot(); MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType(); SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other); SDValue L = DAG.getAtomicCmpSwap( ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain, getValue(I.getPointerOperand()), getValue(I.getCompareOperand()), getValue(I.getNewValOperand()), MachinePointerInfo(I.getPointerOperand()), /*Alignment=*/ 0, SuccessOrder, FailureOrder, Scope); SDValue OutChain = L.getValue(2); setValue(&I, L); DAG.setRoot(OutChain); } void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { SDLoc dl = getCurSDLoc(); ISD::NodeType NT; switch (I.getOperation()) { default: llvm_unreachable("Unknown atomicrmw operation"); case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break; case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break; case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break; case AtomicRMWInst::And: NT = ISD::ATOMIC_LOAD_AND; break; case AtomicRMWInst::Nand: NT = ISD::ATOMIC_LOAD_NAND; break; case AtomicRMWInst::Or: NT = ISD::ATOMIC_LOAD_OR; break; case AtomicRMWInst::Xor: NT = ISD::ATOMIC_LOAD_XOR; break; case AtomicRMWInst::Max: NT = ISD::ATOMIC_LOAD_MAX; break; case AtomicRMWInst::Min: NT = ISD::ATOMIC_LOAD_MIN; break; case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break; case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break; } AtomicOrdering Order = I.getOrdering(); SynchronizationScope Scope = I.getSynchScope(); SDValue InChain = getRoot(); SDValue L = DAG.getAtomic(NT, dl, getValue(I.getValOperand()).getSimpleValueType(), InChain, getValue(I.getPointerOperand()), getValue(I.getValOperand()), I.getPointerOperand(), /* Alignment=*/ 0, Order, Scope); SDValue OutChain = L.getValue(1); setValue(&I, L); DAG.setRoot(OutChain); } void SelectionDAGBuilder::visitFence(const FenceInst &I) { SDLoc dl = getCurSDLoc(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Ops[3]; Ops[0] = getRoot(); Ops[1] = DAG.getConstant((unsigned)I.getOrdering(), dl, TLI.getPointerTy(DAG.getDataLayout())); Ops[2] = DAG.getConstant(I.getSynchScope(), dl, TLI.getPointerTy(DAG.getDataLayout())); DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops)); } void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { SDLoc dl = getCurSDLoc(); AtomicOrdering Order = I.getOrdering(); SynchronizationScope Scope = I.getSynchScope(); SDValue InChain = getRoot(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); if (I.getAlignment() < VT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic load"); MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad, VT.getStoreSize(), I.getAlignment() ? I.getAlignment() : DAG.getEVTAlignment(VT)); InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG); SDValue L = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain, getValue(I.getPointerOperand()), MMO, Order, Scope); SDValue OutChain = L.getValue(1); setValue(&I, L); DAG.setRoot(OutChain); } void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { SDLoc dl = getCurSDLoc(); AtomicOrdering Order = I.getOrdering(); SynchronizationScope Scope = I.getSynchScope(); SDValue InChain = getRoot(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getValueOperand()->getType()); if (I.getAlignment() < VT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic store"); SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT, InChain, getValue(I.getPointerOperand()), getValue(I.getValueOperand()), I.getPointerOperand(), I.getAlignment(), Order, Scope); DAG.setRoot(OutChain); } /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC /// node. void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic) { bool HasChain = !I.doesNotAccessMemory(); bool OnlyLoad = HasChain && I.onlyReadsMemory(); // Build the operand list. SmallVector Ops; if (HasChain) { // If this intrinsic has side-effects, chainify it. if (OnlyLoad) { // We don't need to serialize loads against other loads. Ops.push_back(DAG.getRoot()); } else { Ops.push_back(getRoot()); } } // Info is set by getTgtMemInstrinsic TargetLowering::IntrinsicInfo Info; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic); // Add the intrinsic ID as an integer operand if it's not a target intrinsic. if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID || Info.opc == ISD::INTRINSIC_W_CHAIN) Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); // Add all operands of the call to the operand list. for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { SDValue Op = getValue(I.getArgOperand(i)); Ops.push_back(Op); } SmallVector ValueVTs; ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs); if (HasChain) ValueVTs.push_back(MVT::Other); SDVTList VTs = DAG.getVTList(ValueVTs); // Create the node. SDValue Result; if (IsTgtIntrinsic) { // This is target intrinsic that touches memory Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT, MachinePointerInfo(Info.ptrVal, Info.offset), Info.align, Info.vol, Info.readMem, Info.writeMem, Info.size); } else if (!HasChain) { Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops); } else if (!I.getType()->isVoidTy()) { Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops); } else { Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops); } if (HasChain) { SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1); if (OnlyLoad) PendingLoads.push_back(Chain); else DAG.setRoot(Chain); } if (!I.getType()->isVoidTy()) { if (VectorType *PTy = dyn_cast(I.getType())) { EVT VT = TLI.getValueType(DAG.getDataLayout(), PTy); Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result); } else Result = lowerRangeToAssertZExt(DAG, I, Result); setValue(&I, Result); } } /// GetSignificand - Get the significand and build it into a floating-point /// number with exponent of 1: /// /// Op = (Op & 0x007fffff) | 0x3f800000; /// /// where Op is the hexadecimal representation of floating point value. static SDValue GetSignificand(SelectionDAG &DAG, SDValue Op, const SDLoc &dl) { SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, DAG.getConstant(0x007fffff, dl, MVT::i32)); SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1, DAG.getConstant(0x3f800000, dl, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2); } /// GetExponent - Get the exponent: /// /// (float)(int)(((Op & 0x7f800000) >> 23) - 127); /// /// where Op is the hexadecimal representation of floating point value. static SDValue GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI, const SDLoc &dl) { SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, DAG.getConstant(0x7f800000, dl, MVT::i32)); SDValue t1 = DAG.getNode( ISD::SRL, dl, MVT::i32, t0, DAG.getConstant(23, dl, TLI.getPointerTy(DAG.getDataLayout()))); SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1, DAG.getConstant(127, dl, MVT::i32)); return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2); } /// getF32Constant - Get 32-bit floating point constant. static SDValue getF32Constant(SelectionDAG &DAG, unsigned Flt, const SDLoc &dl) { return DAG.getConstantFP(APFloat(APFloat::IEEEsingle, APInt(32, Flt)), dl, MVT::f32); } static SDValue getLimitedPrecisionExp2(SDValue t0, const SDLoc &dl, SelectionDAG &DAG) { // TODO: What fast-math-flags should be set on the floating-point nodes? // IntegerPartOfX = ((int32_t)(t0); SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); // FractionalPartOfX = t0 - (float)IntegerPartOfX; SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); // IntegerPartOfX <<= 23; IntegerPartOfX = DAG.getNode( ISD::SHL, dl, MVT::i32, IntegerPartOfX, DAG.getConstant(23, dl, DAG.getTargetLoweringInfo().getPointerTy( DAG.getDataLayout()))); SDValue TwoToFractionalPartOfX; if (LimitFloatPrecision <= 6) { // For floating-point precision of 6: // // TwoToFractionalPartOfX = // 0.997535578f + // (0.735607626f + 0.252464424f * x) * x; // // error 0.0144103317, which is 6 bits SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0x3e814304, dl)); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, getF32Constant(DAG, 0x3f3c50c8, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x3f7f5e7e, dl)); } else if (LimitFloatPrecision <= 12) { // For floating-point precision of 12: // // TwoToFractionalPartOfX = // 0.999892986f + // (0.696457318f + // (0.224338339f + 0.792043434e-1f * x) * x) * x; // // error 0.000107046256, which is 13 to 14 bits SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0x3da235e3, dl)); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, getF32Constant(DAG, 0x3e65b8f3, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x3f324b07, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, getF32Constant(DAG, 0x3f7ff8fd, dl)); } else { // LimitFloatPrecision <= 18 // For floating-point precision of 18: // // TwoToFractionalPartOfX = // 0.999999982f + // (0.693148872f + // (0.240227044f + // (0.554906021e-1f + // (0.961591928e-2f + // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; // error 2.47208000*10^(-7), which is better than 18 bits SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0x3924b03e, dl)); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, getF32Constant(DAG, 0x3ab24b87, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x3c1d8c17, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, getF32Constant(DAG, 0x3d634a1d, dl)); SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, getF32Constant(DAG, 0x3e75fe14, dl)); SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, getF32Constant(DAG, 0x3f317234, dl)); SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, getF32Constant(DAG, 0x3f800000, dl)); } // Add the exponent into the result in integer domain. SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFractionalPartOfX); return DAG.getNode(ISD::BITCAST, dl, MVT::f32, DAG.getNode(ISD::ADD, dl, MVT::i32, t13, IntegerPartOfX)); } /// expandExp - Lower an exp intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { // Put the exponent in the right bit position for later addition to the // final result: // // #define LOG2OFe 1.4426950f // t0 = Op * LOG2OFe // TODO: What fast-math-flags should be set here? SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op, getF32Constant(DAG, 0x3fb8aa3b, dl)); return getLimitedPrecisionExp2(t0, dl, DAG); } // No special expansion. return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op); } /// expandLog - Lower a log intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { // TODO: What fast-math-flags should be set on the floating-point nodes? if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); // Scale the exponent by log(2) [0.69314718f]. SDValue Exp = GetExponent(DAG, Op1, TLI, dl); SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, getF32Constant(DAG, 0x3f317218, dl)); // Get the significand and build it into a floating-point number with // exponent of 1. SDValue X = GetSignificand(DAG, Op1, dl); SDValue LogOfMantissa; if (LimitFloatPrecision <= 6) { // For floating-point precision of 6: // // LogofMantissa = // -1.1609546f + // (1.4034025f - 0.23903021f * x) * x; // // error 0.0034276066, which is better than 8 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0xbe74c456, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, getF32Constant(DAG, 0x3fb3a2b1, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, getF32Constant(DAG, 0x3f949a29, dl)); } else if (LimitFloatPrecision <= 12) { // For floating-point precision of 12: // // LogOfMantissa = // -1.7417939f + // (2.8212026f + // (-1.4699568f + // (0.44717955f - 0.56570851e-1f * x) * x) * x) * x; // // error 0.000061011436, which is 14 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0xbd67b6d6, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, getF32Constant(DAG, 0x3ee4f4b8, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, getF32Constant(DAG, 0x3fbc278b, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x40348e95, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, getF32Constant(DAG, 0x3fdef31a, dl)); } else { // LimitFloatPrecision <= 18 // For floating-point precision of 18: // // LogOfMantissa = // -2.1072184f + // (4.2372794f + // (-3.7029485f + // (2.2781945f + // (-0.87823314f + // (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x; // // error 0.0000023660568, which is better than 18 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0xbc91e5ac, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, getF32Constant(DAG, 0x3e4350aa, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, getF32Constant(DAG, 0x3f60d3e3, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x4011cdf0, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, getF32Constant(DAG, 0x406cfd1c, dl)); SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, getF32Constant(DAG, 0x408797cb, dl)); SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, getF32Constant(DAG, 0x4006dcab, dl)); } return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa); } // No special expansion. return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op); } /// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { // TODO: What fast-math-flags should be set on the floating-point nodes? if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); // Get the exponent. SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl); // Get the significand and build it into a floating-point number with // exponent of 1. SDValue X = GetSignificand(DAG, Op1, dl); // Different possible minimax approximations of significand in // floating-point for various degrees of accuracy over [1,2]. SDValue Log2ofMantissa; if (LimitFloatPrecision <= 6) { // For floating-point precision of 6: // // Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x; // // error 0.0049451742, which is more than 7 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0xbeb08fe0, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, getF32Constant(DAG, 0x40019463, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, getF32Constant(DAG, 0x3fd6633d, dl)); } else if (LimitFloatPrecision <= 12) { // For floating-point precision of 12: // // Log2ofMantissa = // -2.51285454f + // (4.07009056f + // (-2.12067489f + // (.645142248f - 0.816157886e-1f * x) * x) * x) * x; // // error 0.0000876136000, which is better than 13 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0xbda7262e, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, getF32Constant(DAG, 0x3f25280b, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, getF32Constant(DAG, 0x4007b923, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x40823e2f, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, getF32Constant(DAG, 0x4020d29c, dl)); } else { // LimitFloatPrecision <= 18 // For floating-point precision of 18: // // Log2ofMantissa = // -3.0400495f + // (6.1129976f + // (-5.3420409f + // (3.2865683f + // (-1.2669343f + // (0.27515199f - // 0.25691327e-1f * x) * x) * x) * x) * x) * x; // // error 0.0000018516, which is better than 18 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0xbcd2769e, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, getF32Constant(DAG, 0x3e8ce0b9, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, getF32Constant(DAG, 0x3fa22ae7, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x40525723, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, getF32Constant(DAG, 0x40aaf200, dl)); SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, getF32Constant(DAG, 0x40c39dad, dl)); SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, getF32Constant(DAG, 0x4042902c, dl)); } return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa); } // No special expansion. return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op); } /// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandLog10(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { // TODO: What fast-math-flags should be set on the floating-point nodes? if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); // Scale the exponent by log10(2) [0.30102999f]. SDValue Exp = GetExponent(DAG, Op1, TLI, dl); SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, getF32Constant(DAG, 0x3e9a209a, dl)); // Get the significand and build it into a floating-point number with // exponent of 1. SDValue X = GetSignificand(DAG, Op1, dl); SDValue Log10ofMantissa; if (LimitFloatPrecision <= 6) { // For floating-point precision of 6: // // Log10ofMantissa = // -0.50419619f + // (0.60948995f - 0.10380950f * x) * x; // // error 0.0014886165, which is 6 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0xbdd49a13, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, getF32Constant(DAG, 0x3f1c0789, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, getF32Constant(DAG, 0x3f011300, dl)); } else if (LimitFloatPrecision <= 12) { // For floating-point precision of 12: // // Log10ofMantissa = // -0.64831180f + // (0.91751397f + // (-0.31664806f + 0.47637168e-1f * x) * x) * x; // // error 0.00019228036, which is better than 12 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0x3d431f31, dl)); SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, getF32Constant(DAG, 0x3ea21fb2, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, getF32Constant(DAG, 0x3f6ae232, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, getF32Constant(DAG, 0x3f25f7c3, dl)); } else { // LimitFloatPrecision <= 18 // For floating-point precision of 18: // // Log10ofMantissa = // -0.84299375f + // (1.5327582f + // (-1.0688956f + // (0.49102474f + // (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x; // // error 0.0000037995730, which is better than 18 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0x3c5d51ce, dl)); SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, getF32Constant(DAG, 0x3e00685a, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, getF32Constant(DAG, 0x3efb6798, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, getF32Constant(DAG, 0x3f88d192, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, getF32Constant(DAG, 0x3fc4316c, dl)); SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8, getF32Constant(DAG, 0x3f57ce70, dl)); } return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa); } // No special expansion. return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op); } /// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandExp2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) return getLimitedPrecisionExp2(Op, dl, DAG); // No special expansion. return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op); } /// visitPow - Lower a pow intrinsic. Handles the special sequences for /// limited-precision mode with x == 10.0f. static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const TargetLowering &TLI) { bool IsExp10 = false; if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { if (ConstantFPSDNode *LHSC = dyn_cast(LHS)) { APFloat Ten(10.0f); IsExp10 = LHSC->isExactlyValue(Ten); } } // TODO: What fast-math-flags should be set on the FMUL node? if (IsExp10) { // Put the exponent in the right bit position for later addition to the // final result: // // #define LOG2OF10 3.3219281f // t0 = Op * LOG2OF10; SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS, getF32Constant(DAG, 0x40549a78, dl)); return getLimitedPrecisionExp2(t0, dl, DAG); } // No special expansion. return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS); } /// ExpandPowI - Expand a llvm.powi intrinsic. static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS, SelectionDAG &DAG) { // If RHS is a constant, we can expand this out to a multiplication tree, // otherwise we end up lowering to a call to __powidf2 (for example). When // optimizing for size, we only want to do this if the expansion would produce // a small number of multiplies, otherwise we do the full expansion. if (ConstantSDNode *RHSC = dyn_cast(RHS)) { // Get the exponent as a positive value. unsigned Val = RHSC->getSExtValue(); if ((int)Val < 0) Val = -Val; // powi(x, 0) -> 1.0 if (Val == 0) return DAG.getConstantFP(1.0, DL, LHS.getValueType()); const Function *F = DAG.getMachineFunction().getFunction(); if (!F->optForSize() || // If optimizing for size, don't insert too many multiplies. // This inserts up to 5 multiplies. countPopulation(Val) + Log2_32(Val) < 7) { // We use the simple binary decomposition method to generate the multiply // sequence. There are more optimal ways to do this (for example, // powi(x,15) generates one more multiply than it should), but this has // the benefit of being both really simple and much better than a libcall. SDValue Res; // Logically starts equal to 1.0 SDValue CurSquare = LHS; // TODO: Intrinsics should have fast-math-flags that propagate to these // nodes. while (Val) { if (Val & 1) { if (Res.getNode()) Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare); else Res = CurSquare; // 1.0*CurSquare. } CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(), CurSquare, CurSquare); Val >>= 1; } // If the original was negative, invert the result, producing 1/(x*x*x). if (RHSC->getSExtValue() < 0) Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(), DAG.getConstantFP(1.0, DL, LHS.getValueType()), Res); return Res; } } // Otherwise, expand to a libcall. return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS); } // getUnderlyingArgReg - Find underlying register used for a truncated or // bitcasted argument. static unsigned getUnderlyingArgReg(const SDValue &N) { switch (N.getOpcode()) { case ISD::CopyFromReg: return cast(N.getOperand(1))->getReg(); case ISD::BITCAST: case ISD::AssertZext: case ISD::AssertSext: case ISD::TRUNCATE: return getUnderlyingArgReg(N.getOperand(0)); default: return 0; } } /// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function /// argument, create the corresponding DBG_VALUE machine instruction for it now. /// At the end of instruction selection, they will be inserted to the entry BB. bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( const Value *V, DILocalVariable *Variable, DIExpression *Expr, DILocation *DL, int64_t Offset, bool IsIndirect, const SDValue &N) { const Argument *Arg = dyn_cast(V); if (!Arg) return false; MachineFunction &MF = DAG.getMachineFunction(); const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); // Ignore inlined function arguments here. // // FIXME: Should we be checking DL->inlinedAt() to determine this? if (!Variable->getScope()->getSubprogram()->describes(MF.getFunction())) return false; Optional Op; // Some arguments' frame index is recorded during argument lowering. if (int FI = FuncInfo.getArgumentFrameIndex(Arg)) Op = MachineOperand::CreateFI(FI); if (!Op && N.getNode()) { unsigned Reg = getUnderlyingArgReg(N); if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) { MachineRegisterInfo &RegInfo = MF.getRegInfo(); unsigned PR = RegInfo.getLiveInPhysReg(Reg); if (PR) Reg = PR; } if (Reg) Op = MachineOperand::CreateReg(Reg, false); } if (!Op) { // Check if ValueMap has reg number. DenseMap::iterator VMI = FuncInfo.ValueMap.find(V); if (VMI != FuncInfo.ValueMap.end()) Op = MachineOperand::CreateReg(VMI->second, false); } if (!Op && N.getNode()) // Check if frame index is available. if (LoadSDNode *LNode = dyn_cast(N.getNode())) if (FrameIndexSDNode *FINode = dyn_cast(LNode->getBasePtr().getNode())) Op = MachineOperand::CreateFI(FINode->getIndex()); if (!Op) return false; assert(Variable->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); if (Op->isReg()) FuncInfo.ArgDbgValues.push_back( BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect, Op->getReg(), Offset, Variable, Expr)); else FuncInfo.ArgDbgValues.push_back( BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE)) .addOperand(*Op) .addImm(Offset) .addMetadata(Variable) .addMetadata(Expr)); return true; } // VisualStudio defines setjmp as _setjmp #if defined(_MSC_VER) && defined(setjmp) && \ !defined(setjmp_undefined_for_msvc) # pragma push_macro("setjmp") # undef setjmp # define setjmp_undefined_for_msvc #endif /// visitIntrinsicCall - Lower the call to the specified intrinsic function. If /// we want to emit this as a call to a named external function, return the name /// otherwise lower it and return null. const char * SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDLoc sdl = getCurSDLoc(); DebugLoc dl = getCurDebugLoc(); SDValue Res; switch (Intrinsic) { default: // By default, turn this into a target intrinsic node. visitTargetIntrinsic(I, Intrinsic); return nullptr; case Intrinsic::vastart: visitVAStart(I); return nullptr; case Intrinsic::vaend: visitVAEnd(I); return nullptr; case Intrinsic::vacopy: visitVACopy(I); return nullptr; case Intrinsic::returnaddress: setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl, TLI.getPointerTy(DAG.getDataLayout()), getValue(I.getArgOperand(0)))); return nullptr; case Intrinsic::frameaddress: setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, TLI.getPointerTy(DAG.getDataLayout()), getValue(I.getArgOperand(0)))); return nullptr; case Intrinsic::read_register: { Value *Reg = I.getArgOperand(0); SDValue Chain = getRoot(); SDValue RegName = DAG.getMDNode(cast(cast(Reg)->getMetadata())); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); Res = DAG.getNode(ISD::READ_REGISTER, sdl, DAG.getVTList(VT, MVT::Other), Chain, RegName); setValue(&I, Res); DAG.setRoot(Res.getValue(1)); return nullptr; } case Intrinsic::write_register: { Value *Reg = I.getArgOperand(0); Value *RegValue = I.getArgOperand(1); SDValue Chain = getRoot(); SDValue RegName = DAG.getMDNode(cast(cast(Reg)->getMetadata())); DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain, RegName, getValue(RegValue))); return nullptr; } case Intrinsic::setjmp: return &"_setjmp"[!TLI.usesUnderscoreSetJmp()]; case Intrinsic::longjmp: return &"_longjmp"[!TLI.usesUnderscoreLongJmp()]; case Intrinsic::memcpy: { SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); unsigned Align = cast(I.getArgOperand(3))->getZExtValue(); if (!Align) Align = 1; // @llvm.memcpy defines 0 and 1 to both mean no alignment. bool isVol = cast(I.getArgOperand(4))->getZExtValue(); bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, false, isTC, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1))); updateDAGForMaybeTailCall(MC); return nullptr; } case Intrinsic::memset: { SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); unsigned Align = cast(I.getArgOperand(3))->getZExtValue(); if (!Align) Align = 1; // @llvm.memset defines 0 and 1 to both mean no alignment. bool isVol = cast(I.getArgOperand(4))->getZExtValue(); bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); SDValue MS = DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, isTC, MachinePointerInfo(I.getArgOperand(0))); updateDAGForMaybeTailCall(MS); return nullptr; } case Intrinsic::memmove: { SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); unsigned Align = cast(I.getArgOperand(3))->getZExtValue(); if (!Align) Align = 1; // @llvm.memmove defines 0 and 1 to both mean no alignment. bool isVol = cast(I.getArgOperand(4))->getZExtValue(); bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); SDValue MM = DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, isTC, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1))); updateDAGForMaybeTailCall(MM); return nullptr; } case Intrinsic::dbg_declare: { const DbgDeclareInst &DI = cast(I); DILocalVariable *Variable = DI.getVariable(); DIExpression *Expression = DI.getExpression(); const Value *Address = DI.getAddress(); assert(Variable && "Missing variable"); if (!Address) { DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); return nullptr; } // Check if address has undef value. if (isa(Address) || (Address->use_empty() && !isa(Address))) { DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); return nullptr; } SDValue &N = NodeMap[Address]; if (!N.getNode() && isa(Address)) // Check unused arguments map. N = UnusedArgNodeMap[Address]; SDDbgValue *SDV; if (N.getNode()) { if (const BitCastInst *BCI = dyn_cast(Address)) Address = BCI->getOperand(0); // Parameters are handled specially. bool isParameter = Variable->isParameter() || isa(Address); auto FINode = dyn_cast(N.getNode()); if (isParameter && FINode) { // Byval parameter. We have a frame index at this point. SDV = DAG.getFrameIndexDbgValue(Variable, Expression, FINode->getIndex(), 0, dl, SDNodeOrder); } else if (isa(Address)) { // Address is an argument, so try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false, N); return nullptr; } else { SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), true, 0, dl, SDNodeOrder); } DAG.AddDbgValue(SDV, N.getNode(), isParameter); } else { // If Address is an argument then try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false, N)) { // If variable is pinned by a alloca in dominating bb then // use StaticAllocaMap. if (const AllocaInst *AI = dyn_cast(Address)) { if (AI->getParent() != DI.getParent()) { DenseMap::iterator SI = FuncInfo.StaticAllocaMap.find(AI); if (SI != FuncInfo.StaticAllocaMap.end()) { SDV = DAG.getFrameIndexDbgValue(Variable, Expression, SI->second, 0, dl, SDNodeOrder); DAG.AddDbgValue(SDV, nullptr, false); return nullptr; } } } DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); } } return nullptr; } case Intrinsic::dbg_value: { const DbgValueInst &DI = cast(I); assert(DI.getVariable() && "Missing variable"); DILocalVariable *Variable = DI.getVariable(); DIExpression *Expression = DI.getExpression(); uint64_t Offset = DI.getOffset(); const Value *V = DI.getValue(); if (!V) return nullptr; SDDbgValue *SDV; if (isa(V) || isa(V) || isa(V)) { SDV = DAG.getConstantDbgValue(Variable, Expression, V, Offset, dl, SDNodeOrder); DAG.AddDbgValue(SDV, nullptr, false); } else { // Do not use getValue() in here; we don't want to generate code at // this point if it hasn't been done yet. SDValue N = NodeMap[V]; if (!N.getNode() && isa(V)) // Check unused arguments map. N = UnusedArgNodeMap[V]; if (N.getNode()) { if (!EmitFuncArgumentDbgValue(V, Variable, Expression, dl, Offset, false, N)) { SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), false, Offset, dl, SDNodeOrder); DAG.AddDbgValue(SDV, N.getNode(), false); } } else if (!V->use_empty() ) { // Do not call getValue(V) yet, as we don't want to generate code. // Remember it for later. DanglingDebugInfo DDI(&DI, dl, SDNodeOrder); DanglingDebugInfoMap[V] = DDI; } else { // We may expand this to cover more cases. One case where we have no // data available is an unreferenced parameter. DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); } } // Build a debug info table entry. if (const BitCastInst *BCI = dyn_cast(V)) V = BCI->getOperand(0); const AllocaInst *AI = dyn_cast(V); // Don't handle byval struct arguments or VLAs, for example. if (!AI) { DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n"); DEBUG(dbgs() << " Last seen at:\n " << *V << "\n"); return nullptr; } DenseMap::iterator SI = FuncInfo.StaticAllocaMap.find(AI); if (SI == FuncInfo.StaticAllocaMap.end()) return nullptr; // VLAs. return nullptr; } case Intrinsic::eh_typeid_for: { // Find the type id for the given typeinfo. GlobalValue *GV = ExtractTypeInfo(I.getArgOperand(0)); unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV); Res = DAG.getConstant(TypeID, sdl, MVT::i32); setValue(&I, Res); return nullptr; } case Intrinsic::eh_return_i32: case Intrinsic::eh_return_i64: DAG.getMachineFunction().getMMI().setCallsEHReturn(true); DAG.setRoot(DAG.getNode(ISD::EH_RETURN, sdl, MVT::Other, getControlRoot(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)))); return nullptr; case Intrinsic::eh_unwind_init: DAG.getMachineFunction().getMMI().setCallsUnwindInit(true); return nullptr; case Intrinsic::eh_dwarf_cfa: { - SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), sdl, - TLI.getPointerTy(DAG.getDataLayout())); - SDValue Offset = DAG.getNode(ISD::ADD, sdl, - CfaArg.getValueType(), - DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, sdl, - CfaArg.getValueType()), - CfaArg); - SDValue FA = DAG.getNode( - ISD::FRAMEADDR, sdl, TLI.getPointerTy(DAG.getDataLayout()), - DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()))); - setValue(&I, DAG.getNode(ISD::ADD, sdl, FA.getValueType(), - FA, Offset)); + setValue(&I, DAG.getNode(ISD::EH_DWARF_CFA, sdl, + TLI.getPointerTy(DAG.getDataLayout()), + getValue(I.getArgOperand(0)))); return nullptr; } case Intrinsic::eh_sjlj_callsite: { MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); ConstantInt *CI = dyn_cast(I.getArgOperand(0)); assert(CI && "Non-constant call site value in eh.sjlj.callsite!"); assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!"); MMI.setCurrentCallSite(CI->getZExtValue()); return nullptr; } case Intrinsic::eh_sjlj_functioncontext: { // Get and store the index of the function context. MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); AllocaInst *FnCtx = cast(I.getArgOperand(0)->stripPointerCasts()); int FI = FuncInfo.StaticAllocaMap[FnCtx]; MFI->setFunctionContextIndex(FI); return nullptr; } case Intrinsic::eh_sjlj_setjmp: { SDValue Ops[2]; Ops[0] = getRoot(); Ops[1] = getValue(I.getArgOperand(0)); SDValue Op = DAG.getNode(ISD::EH_SJLJ_SETJMP, sdl, DAG.getVTList(MVT::i32, MVT::Other), Ops); setValue(&I, Op.getValue(0)); DAG.setRoot(Op.getValue(1)); return nullptr; } case Intrinsic::eh_sjlj_longjmp: { DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other, getRoot(), getValue(I.getArgOperand(0)))); return nullptr; } case Intrinsic::eh_sjlj_setup_dispatch: { DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other, getRoot())); return nullptr; } case Intrinsic::masked_gather: visitMaskedGather(I); return nullptr; case Intrinsic::masked_load: visitMaskedLoad(I); return nullptr; case Intrinsic::masked_scatter: visitMaskedScatter(I); return nullptr; case Intrinsic::masked_store: visitMaskedStore(I); return nullptr; case Intrinsic::x86_mmx_pslli_w: case Intrinsic::x86_mmx_pslli_d: case Intrinsic::x86_mmx_pslli_q: case Intrinsic::x86_mmx_psrli_w: case Intrinsic::x86_mmx_psrli_d: case Intrinsic::x86_mmx_psrli_q: case Intrinsic::x86_mmx_psrai_w: case Intrinsic::x86_mmx_psrai_d: { SDValue ShAmt = getValue(I.getArgOperand(1)); if (isa(ShAmt)) { visitTargetIntrinsic(I, Intrinsic); return nullptr; } unsigned NewIntrinsic = 0; EVT ShAmtVT = MVT::v2i32; switch (Intrinsic) { case Intrinsic::x86_mmx_pslli_w: NewIntrinsic = Intrinsic::x86_mmx_psll_w; break; case Intrinsic::x86_mmx_pslli_d: NewIntrinsic = Intrinsic::x86_mmx_psll_d; break; case Intrinsic::x86_mmx_pslli_q: NewIntrinsic = Intrinsic::x86_mmx_psll_q; break; case Intrinsic::x86_mmx_psrli_w: NewIntrinsic = Intrinsic::x86_mmx_psrl_w; break; case Intrinsic::x86_mmx_psrli_d: NewIntrinsic = Intrinsic::x86_mmx_psrl_d; break; case Intrinsic::x86_mmx_psrli_q: NewIntrinsic = Intrinsic::x86_mmx_psrl_q; break; case Intrinsic::x86_mmx_psrai_w: NewIntrinsic = Intrinsic::x86_mmx_psra_w; break; case Intrinsic::x86_mmx_psrai_d: NewIntrinsic = Intrinsic::x86_mmx_psra_d; break; default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. } // The vector shift intrinsics with scalars uses 32b shift amounts but // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits // to be zero. // We must do this early because v2i32 is not a legal type. SDValue ShOps[2]; ShOps[0] = ShAmt; ShOps[1] = DAG.getConstant(0, sdl, MVT::i32); ShAmt = DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, ShOps); EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt); Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT, DAG.getConstant(NewIntrinsic, sdl, MVT::i32), getValue(I.getArgOperand(0)), ShAmt); setValue(&I, Res); return nullptr; } case Intrinsic::convertff: case Intrinsic::convertfsi: case Intrinsic::convertfui: case Intrinsic::convertsif: case Intrinsic::convertuif: case Intrinsic::convertss: case Intrinsic::convertsu: case Intrinsic::convertus: case Intrinsic::convertuu: { ISD::CvtCode Code = ISD::CVT_INVALID; switch (Intrinsic) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. case Intrinsic::convertff: Code = ISD::CVT_FF; break; case Intrinsic::convertfsi: Code = ISD::CVT_FS; break; case Intrinsic::convertfui: Code = ISD::CVT_FU; break; case Intrinsic::convertsif: Code = ISD::CVT_SF; break; case Intrinsic::convertuif: Code = ISD::CVT_UF; break; case Intrinsic::convertss: Code = ISD::CVT_SS; break; case Intrinsic::convertsu: Code = ISD::CVT_SU; break; case Intrinsic::convertus: Code = ISD::CVT_US; break; case Intrinsic::convertuu: Code = ISD::CVT_UU; break; } EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); const Value *Op1 = I.getArgOperand(0); Res = DAG.getConvertRndSat(DestVT, sdl, getValue(Op1), DAG.getValueType(DestVT), DAG.getValueType(getValue(Op1).getValueType()), getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)), Code); setValue(&I, Res); return nullptr; } case Intrinsic::powi: setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), DAG)); return nullptr; case Intrinsic::log: setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); return nullptr; case Intrinsic::log2: setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); return nullptr; case Intrinsic::log10: setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); return nullptr; case Intrinsic::exp: setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); return nullptr; case Intrinsic::exp2: setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); return nullptr; case Intrinsic::pow: setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), DAG, TLI)); return nullptr; case Intrinsic::sqrt: case Intrinsic::fabs: case Intrinsic::sin: case Intrinsic::cos: case Intrinsic::floor: case Intrinsic::ceil: case Intrinsic::trunc: case Intrinsic::rint: case Intrinsic::nearbyint: case Intrinsic::round: case Intrinsic::canonicalize: { unsigned Opcode; switch (Intrinsic) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. case Intrinsic::sqrt: Opcode = ISD::FSQRT; break; case Intrinsic::fabs: Opcode = ISD::FABS; break; case Intrinsic::sin: Opcode = ISD::FSIN; break; case Intrinsic::cos: Opcode = ISD::FCOS; break; case Intrinsic::floor: Opcode = ISD::FFLOOR; break; case Intrinsic::ceil: Opcode = ISD::FCEIL; break; case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; case Intrinsic::rint: Opcode = ISD::FRINT; break; case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; case Intrinsic::round: Opcode = ISD::FROUND; break; case Intrinsic::canonicalize: Opcode = ISD::FCANONICALIZE; break; } setValue(&I, DAG.getNode(Opcode, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)))); return nullptr; } case Intrinsic::minnum: { auto VT = getValue(I.getArgOperand(0)).getValueType(); unsigned Opc = I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMINNAN, VT) ? ISD::FMINNAN : ISD::FMINNUM; setValue(&I, DAG.getNode(Opc, sdl, VT, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)))); return nullptr; } case Intrinsic::maxnum: { auto VT = getValue(I.getArgOperand(0)).getValueType(); unsigned Opc = I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMAXNAN, VT) ? ISD::FMAXNAN : ISD::FMAXNUM; setValue(&I, DAG.getNode(Opc, sdl, VT, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)))); return nullptr; } case Intrinsic::copysign: setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)))); return nullptr; case Intrinsic::fma: setValue(&I, DAG.getNode(ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)))); return nullptr; case Intrinsic::fmuladd: { EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && TLI.isFMAFasterThanFMulAndFAdd(VT)) { setValue(&I, DAG.getNode(ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)))); } else { // TODO: Intrinsic calls should have fast-math-flags. SDValue Mul = DAG.getNode(ISD::FMUL, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1))); SDValue Add = DAG.getNode(ISD::FADD, sdl, getValue(I.getArgOperand(0)).getValueType(), Mul, getValue(I.getArgOperand(2))); setValue(&I, Add); } return nullptr; } case Intrinsic::convert_to_fp16: setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16, DAG.getNode(ISD::FP_ROUND, sdl, MVT::f16, getValue(I.getArgOperand(0)), DAG.getTargetConstant(0, sdl, MVT::i32)))); return nullptr; case Intrinsic::convert_from_fp16: setValue(&I, DAG.getNode(ISD::FP_EXTEND, sdl, TLI.getValueType(DAG.getDataLayout(), I.getType()), DAG.getNode(ISD::BITCAST, sdl, MVT::f16, getValue(I.getArgOperand(0))))); return nullptr; case Intrinsic::pcmarker: { SDValue Tmp = getValue(I.getArgOperand(0)); DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp)); return nullptr; } case Intrinsic::readcyclecounter: { SDValue Op = getRoot(); Res = DAG.getNode(ISD::READCYCLECOUNTER, sdl, DAG.getVTList(MVT::i64, MVT::Other), Op); setValue(&I, Res); DAG.setRoot(Res.getValue(1)); return nullptr; } case Intrinsic::bitreverse: setValue(&I, DAG.getNode(ISD::BITREVERSE, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)))); return nullptr; case Intrinsic::bswap: setValue(&I, DAG.getNode(ISD::BSWAP, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)))); return nullptr; case Intrinsic::cttz: { SDValue Arg = getValue(I.getArgOperand(0)); ConstantInt *CI = cast(I.getArgOperand(1)); EVT Ty = Arg.getValueType(); setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF, sdl, Ty, Arg)); return nullptr; } case Intrinsic::ctlz: { SDValue Arg = getValue(I.getArgOperand(0)); ConstantInt *CI = cast(I.getArgOperand(1)); EVT Ty = Arg.getValueType(); setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF, sdl, Ty, Arg)); return nullptr; } case Intrinsic::ctpop: { SDValue Arg = getValue(I.getArgOperand(0)); EVT Ty = Arg.getValueType(); setValue(&I, DAG.getNode(ISD::CTPOP, sdl, Ty, Arg)); return nullptr; } case Intrinsic::stacksave: { SDValue Op = getRoot(); Res = DAG.getNode( ISD::STACKSAVE, sdl, DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), Op); setValue(&I, Res); DAG.setRoot(Res.getValue(1)); return nullptr; } case Intrinsic::stackrestore: { Res = getValue(I.getArgOperand(0)); DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res)); return nullptr; } case Intrinsic::get_dynamic_area_offset: { SDValue Op = getRoot(); EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); EVT ResTy = TLI.getValueType(DAG.getDataLayout(), I.getType()); // Result type for @llvm.get.dynamic.area.offset should match PtrTy for // target. if (PtrTy != ResTy) report_fatal_error("Wrong result type for @llvm.get.dynamic.area.offset" " intrinsic!"); Res = DAG.getNode(ISD::GET_DYNAMIC_AREA_OFFSET, sdl, DAG.getVTList(ResTy), Op); DAG.setRoot(Op); setValue(&I, Res); return nullptr; } case Intrinsic::stackguard: { EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); MachineFunction &MF = DAG.getMachineFunction(); const Module &M = *MF.getFunction()->getParent(); SDValue Chain = getRoot(); if (TLI.useLoadStackGuardNode()) { Res = getLoadStackGuard(DAG, sdl, Chain); } else { const Value *Global = TLI.getSDagStackGuard(M); unsigned Align = DL->getPrefTypeAlignment(Global->getType()); Res = DAG.getLoad(PtrTy, sdl, Chain, getValue(Global), MachinePointerInfo(Global, 0), Align, MachineMemOperand::MOVolatile); } DAG.setRoot(Chain); setValue(&I, Res); return nullptr; } case Intrinsic::stackprotector: { // Emit code into the DAG to store the stack guard onto the stack. MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); SDValue Src, Chain = getRoot(); if (TLI.useLoadStackGuardNode()) Src = getLoadStackGuard(DAG, sdl, Chain); else Src = getValue(I.getArgOperand(0)); // The guard's value. AllocaInst *Slot = cast(I.getArgOperand(1)); int FI = FuncInfo.StaticAllocaMap[Slot]; MFI->setStackProtectorIndex(FI); SDValue FIN = DAG.getFrameIndex(FI, PtrTy); // Store the stack protector onto the stack. Res = DAG.getStore(Chain, sdl, Src, FIN, MachinePointerInfo::getFixedStack( DAG.getMachineFunction(), FI), /* Alignment = */ 0, MachineMemOperand::MOVolatile); setValue(&I, Res); DAG.setRoot(Res); return nullptr; } case Intrinsic::objectsize: { // If we don't know by now, we're never going to know. ConstantInt *CI = dyn_cast(I.getArgOperand(1)); assert(CI && "Non-constant type in __builtin_object_size?"); SDValue Arg = getValue(I.getCalledValue()); EVT Ty = Arg.getValueType(); if (CI->isZero()) Res = DAG.getConstant(-1ULL, sdl, Ty); else Res = DAG.getConstant(0, sdl, Ty); setValue(&I, Res); return nullptr; } case Intrinsic::annotation: case Intrinsic::ptr_annotation: // Drop the intrinsic, but forward the value setValue(&I, getValue(I.getOperand(0))); return nullptr; case Intrinsic::assume: case Intrinsic::var_annotation: // Discard annotate attributes and assumptions return nullptr; case Intrinsic::init_trampoline: { const Function *F = cast(I.getArgOperand(1)->stripPointerCasts()); SDValue Ops[6]; Ops[0] = getRoot(); Ops[1] = getValue(I.getArgOperand(0)); Ops[2] = getValue(I.getArgOperand(1)); Ops[3] = getValue(I.getArgOperand(2)); Ops[4] = DAG.getSrcValue(I.getArgOperand(0)); Ops[5] = DAG.getSrcValue(F); Res = DAG.getNode(ISD::INIT_TRAMPOLINE, sdl, MVT::Other, Ops); DAG.setRoot(Res); return nullptr; } case Intrinsic::adjust_trampoline: { setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl, TLI.getPointerTy(DAG.getDataLayout()), getValue(I.getArgOperand(0)))); return nullptr; } case Intrinsic::gcroot: { MachineFunction &MF = DAG.getMachineFunction(); const Function *F = MF.getFunction(); (void)F; assert(F->hasGC() && "only valid in functions with gc specified, enforced by Verifier"); assert(GFI && "implied by previous"); const Value *Alloca = I.getArgOperand(0)->stripPointerCasts(); const Constant *TypeMap = cast(I.getArgOperand(1)); FrameIndexSDNode *FI = cast(getValue(Alloca).getNode()); GFI->addStackRoot(FI->getIndex(), TypeMap); return nullptr; } case Intrinsic::gcread: case Intrinsic::gcwrite: llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!"); case Intrinsic::flt_rounds: setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, sdl, MVT::i32)); return nullptr; case Intrinsic::expect: { // Just replace __builtin_expect(exp, c) with EXP. setValue(&I, getValue(I.getArgOperand(0))); return nullptr; } case Intrinsic::debugtrap: case Intrinsic::trap: { StringRef TrapFuncName = I.getAttributes() .getAttribute(AttributeSet::FunctionIndex, "trap-func-name") .getValueAsString(); if (TrapFuncName.empty()) { ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ? ISD::TRAP : ISD::DEBUGTRAP; DAG.setRoot(DAG.getNode(Op, sdl,MVT::Other, getRoot())); return nullptr; } TargetLowering::ArgListTy Args; TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(sdl).setChain(getRoot()).setCallee( CallingConv::C, I.getType(), DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy(DAG.getDataLayout())), std::move(Args)); std::pair Result = TLI.LowerCallTo(CLI); DAG.setRoot(Result.second); return nullptr; } case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: case Intrinsic::usub_with_overflow: case Intrinsic::ssub_with_overflow: case Intrinsic::umul_with_overflow: case Intrinsic::smul_with_overflow: { ISD::NodeType Op; switch (Intrinsic) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. case Intrinsic::uadd_with_overflow: Op = ISD::UADDO; break; case Intrinsic::sadd_with_overflow: Op = ISD::SADDO; break; case Intrinsic::usub_with_overflow: Op = ISD::USUBO; break; case Intrinsic::ssub_with_overflow: Op = ISD::SSUBO; break; case Intrinsic::umul_with_overflow: Op = ISD::UMULO; break; case Intrinsic::smul_with_overflow: Op = ISD::SMULO; break; } SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1); setValue(&I, DAG.getNode(Op, sdl, VTs, Op1, Op2)); return nullptr; } case Intrinsic::prefetch: { SDValue Ops[5]; unsigned rw = cast(I.getArgOperand(1))->getZExtValue(); Ops[0] = getRoot(); Ops[1] = getValue(I.getArgOperand(0)); Ops[2] = getValue(I.getArgOperand(1)); Ops[3] = getValue(I.getArgOperand(2)); Ops[4] = getValue(I.getArgOperand(3)); DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, sdl, DAG.getVTList(MVT::Other), Ops, EVT::getIntegerVT(*Context, 8), MachinePointerInfo(I.getArgOperand(0)), 0, /* align */ false, /* volatile */ rw==0, /* read */ rw==1)); /* write */ return nullptr; } case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: { bool IsStart = (Intrinsic == Intrinsic::lifetime_start); // Stack coloring is not enabled in O0, discard region information. if (TM.getOptLevel() == CodeGenOpt::None) return nullptr; SmallVector Allocas; GetUnderlyingObjects(I.getArgOperand(1), Allocas, *DL); for (SmallVectorImpl::iterator Object = Allocas.begin(), E = Allocas.end(); Object != E; ++Object) { AllocaInst *LifetimeObject = dyn_cast_or_null(*Object); // Could not find an Alloca. if (!LifetimeObject) continue; // First check that the Alloca is static, otherwise it won't have a // valid frame index. auto SI = FuncInfo.StaticAllocaMap.find(LifetimeObject); if (SI == FuncInfo.StaticAllocaMap.end()) return nullptr; int FI = SI->second; SDValue Ops[2]; Ops[0] = getRoot(); Ops[1] = DAG.getFrameIndex(FI, TLI.getPointerTy(DAG.getDataLayout()), true); unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END); Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops); DAG.setRoot(Res); } return nullptr; } case Intrinsic::invariant_start: // Discard region information. setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout()))); return nullptr; case Intrinsic::invariant_end: // Discard region information. return nullptr; case Intrinsic::clear_cache: return TLI.getClearCacheBuiltinName(); case Intrinsic::donothing: // ignore return nullptr; case Intrinsic::experimental_stackmap: { visitStackmap(I); return nullptr; } case Intrinsic::experimental_patchpoint_void: case Intrinsic::experimental_patchpoint_i64: { visitPatchpoint(&I); return nullptr; } case Intrinsic::experimental_gc_statepoint: { LowerStatepoint(ImmutableStatepoint(&I)); return nullptr; } case Intrinsic::experimental_gc_result: { visitGCResult(cast(I)); return nullptr; } case Intrinsic::experimental_gc_relocate: { visitGCRelocate(cast(I)); return nullptr; } case Intrinsic::instrprof_increment: llvm_unreachable("instrprof failed to lower an increment"); case Intrinsic::instrprof_value_profile: llvm_unreachable("instrprof failed to lower a value profiling call"); case Intrinsic::localescape: { MachineFunction &MF = DAG.getMachineFunction(); const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission // is the same on all targets. for (unsigned Idx = 0, E = I.getNumArgOperands(); Idx < E; ++Idx) { Value *Arg = I.getArgOperand(Idx)->stripPointerCasts(); if (isa(Arg)) continue; // Skip null pointers. They represent a hole in index space. AllocaInst *Slot = cast(Arg); assert(FuncInfo.StaticAllocaMap.count(Slot) && "can only escape static allocas"); int FI = FuncInfo.StaticAllocaMap[Slot]; MCSymbol *FrameAllocSym = MF.getMMI().getContext().getOrCreateFrameAllocSymbol( GlobalValue::getRealLinkageName(MF.getName()), Idx); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl, TII->get(TargetOpcode::LOCAL_ESCAPE)) .addSym(FrameAllocSym) .addFrameIndex(FI); } return nullptr; } case Intrinsic::localrecover: { // i8* @llvm.localrecover(i8* %fn, i8* %fp, i32 %idx) MachineFunction &MF = DAG.getMachineFunction(); MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout(), 0); // Get the symbol that defines the frame offset. auto *Fn = cast(I.getArgOperand(0)->stripPointerCasts()); auto *Idx = cast(I.getArgOperand(2)); unsigned IdxVal = unsigned(Idx->getLimitedValue(INT_MAX)); MCSymbol *FrameAllocSym = MF.getMMI().getContext().getOrCreateFrameAllocSymbol( GlobalValue::getRealLinkageName(Fn->getName()), IdxVal); // Create a MCSymbol for the label to avoid any target lowering // that would make this PC relative. SDValue OffsetSym = DAG.getMCSymbol(FrameAllocSym, PtrVT); SDValue OffsetVal = DAG.getNode(ISD::LOCAL_RECOVER, sdl, PtrVT, OffsetSym); // Add the offset to the FP. Value *FP = I.getArgOperand(1); SDValue FPVal = getValue(FP); SDValue Add = DAG.getNode(ISD::ADD, sdl, PtrVT, FPVal, OffsetVal); setValue(&I, Add); return nullptr; } case Intrinsic::eh_exceptionpointer: case Intrinsic::eh_exceptioncode: { // Get the exception pointer vreg, copy from it, and resize it to fit. const auto *CPI = cast(I.getArgOperand(0)); MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT); unsigned VReg = FuncInfo.getCatchPadExceptionPointerVReg(CPI, PtrRC); SDValue N = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), VReg, PtrVT); if (Intrinsic == Intrinsic::eh_exceptioncode) N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32); setValue(&I, N); return nullptr; } case Intrinsic::experimental_deoptimize: LowerDeoptimizeCall(&I); return nullptr; } } std::pair SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, const BasicBlock *EHPadBB) { MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); MCSymbol *BeginLabel = nullptr; if (EHPadBB) { // Insert a label before the invoke call to mark the try range. This can be // used to detect deletion of the invoke via the MachineModuleInfo. BeginLabel = MMI.getContext().createTempSymbol(); // For SjLj, keep track of which landing pads go with which invokes // so as to maintain the ordering of pads in the LSDA. unsigned CallSiteIndex = MMI.getCurrentCallSite(); if (CallSiteIndex) { MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex); LPadToCallSiteMap[FuncInfo.MBBMap[EHPadBB]].push_back(CallSiteIndex); // Now that the call site is handled, stop tracking it. MMI.setCurrentCallSite(0); } // Both PendingLoads and PendingExports must be flushed here; // this call might not return. (void)getRoot(); DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getControlRoot(), BeginLabel)); CLI.setChain(getRoot()); } const TargetLowering &TLI = DAG.getTargetLoweringInfo(); std::pair Result = TLI.LowerCallTo(CLI); assert((CLI.IsTailCall || Result.second.getNode()) && "Non-null chain expected with non-tail call!"); assert((Result.second.getNode() || !Result.first.getNode()) && "Null value expected with tail call!"); if (!Result.second.getNode()) { // As a special case, a null chain means that a tail call has been emitted // and the DAG root is already updated. HasTailCall = true; // Since there's no actual continuation from this block, nothing can be // relying on us setting vregs for them. PendingExports.clear(); } else { DAG.setRoot(Result.second); } if (EHPadBB) { // Insert a label at the end of the invoke call to mark the try range. This // can be used to detect deletion of the invoke via the MachineModuleInfo. MCSymbol *EndLabel = MMI.getContext().createTempSymbol(); DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel)); // Inform MachineModuleInfo of range. if (MMI.hasEHFunclets()) { assert(CLI.CS); WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo(); EHInfo->addIPToStateRange(cast(CLI.CS->getInstruction()), BeginLabel, EndLabel); } else { MMI.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel); } } return Result; } void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool isTailCall, const BasicBlock *EHPadBB) { auto &DL = DAG.getDataLayout(); FunctionType *FTy = CS.getFunctionType(); Type *RetTy = CS.getType(); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Args.reserve(CS.arg_size()); const Value *SwiftErrorVal = nullptr; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) { const Value *V = *i; // Skip empty types if (V->getType()->isEmptyTy()) continue; SDValue ArgNode = getValue(V); Entry.Node = ArgNode; Entry.Ty = V->getType(); // Skip the first return-type Attribute to get to params. Entry.setAttributes(&CS, i - CS.arg_begin() + 1); // Use swifterror virtual register as input to the call. if (Entry.isSwiftError && TLI.supportSwiftError()) { SwiftErrorVal = V; // We find the virtual register for the actual swifterror argument. // Instead of using the Value, we use the virtual register instead. Entry.Node = DAG.getRegister( FuncInfo.findSwiftErrorVReg(FuncInfo.MBB, V), EVT(TLI.getPointerTy(DL))); } Args.push_back(Entry); // If we have an explicit sret argument that is an Instruction, (i.e., it // might point to function-local memory), we can't meaningfully tail-call. if (Entry.isSRet && isa(V)) isTailCall = false; } // Check if target-independent constraints permit a tail call here. // Target-dependent constraints are checked within TLI->LowerCallTo. if (isTailCall && !isInTailCallPosition(CS, DAG.getTarget())) isTailCall = false; TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(getCurSDLoc()) .setChain(getRoot()) .setCallee(RetTy, FTy, Callee, std::move(Args), CS) .setTailCall(isTailCall) .setConvergent(CS.isConvergent()); std::pair Result = lowerInvokable(CLI, EHPadBB); if (Result.first.getNode()) { const Instruction *Inst = CS.getInstruction(); Result.first = lowerRangeToAssertZExt(DAG, *Inst, Result.first); setValue(Inst, Result.first); } // The last element of CLI.InVals has the SDValue for swifterror return. // Here we copy it to a virtual register and update SwiftErrorMap for // book-keeping. if (SwiftErrorVal && TLI.supportSwiftError()) { // Get the last element of InVals. SDValue Src = CLI.InVals.back(); const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL)); unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC); SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src); // We update the virtual register for the actual swifterror argument. FuncInfo.setSwiftErrorVReg(FuncInfo.MBB, SwiftErrorVal, VReg); DAG.setRoot(CopyNode); } } /// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the /// value is equal or not-equal to zero. static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) { for (const User *U : V->users()) { if (const ICmpInst *IC = dyn_cast(U)) if (IC->isEquality()) if (const Constant *C = dyn_cast(IC->getOperand(1))) if (C->isNullValue()) continue; // Unknown instruction. return false; } return true; } static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, Type *LoadTy, SelectionDAGBuilder &Builder) { // Check to see if this load can be trivially constant folded, e.g. if the // input is from a string literal. if (const Constant *LoadInput = dyn_cast(PtrVal)) { // Cast pointer to the type we really want to load. LoadInput = ConstantExpr::getBitCast(const_cast(LoadInput), PointerType::getUnqual(LoadTy)); if (const Constant *LoadCst = ConstantFoldLoadFromConstPtr( const_cast(LoadInput), LoadTy, *Builder.DL)) return Builder.getValue(LoadCst); } // Otherwise, we have to emit the load. If the pointer is to unfoldable but // still constant memory, the input chain can be the entry node. SDValue Root; bool ConstantMemory = false; // Do not serialize (non-volatile) loads of constant memory with anything. if (Builder.AA->pointsToConstantMemory(PtrVal)) { Root = Builder.DAG.getEntryNode(); ConstantMemory = true; } else { // Do not serialize non-volatile loads against each other. Root = Builder.DAG.getRoot(); } SDValue Ptr = Builder.getValue(PtrVal); SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurSDLoc(), Root, Ptr, MachinePointerInfo(PtrVal), /* Alignment = */ 1); if (!ConstantMemory) Builder.PendingLoads.push_back(LoadVal.getValue(1)); return LoadVal; } /// processIntegerCallValue - Record the value for an instruction that /// produces an integer result, converting the type where necessary. void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I, SDValue Value, bool IsSigned) { EVT VT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType(), true); if (IsSigned) Value = DAG.getSExtOrTrunc(Value, getCurSDLoc(), VT); else Value = DAG.getZExtOrTrunc(Value, getCurSDLoc(), VT); setValue(&I, Value); } /// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form. /// If so, return true and lower it, otherwise return false and it will be /// lowered like a normal call. bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { // Verify that the prototype makes sense. int memcmp(void*,void*,size_t) if (I.getNumArgOperands() != 3) return false; const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1); if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() || !I.getArgOperand(2)->getType()->isIntegerTy() || !I.getType()->isIntegerTy()) return false; const Value *Size = I.getArgOperand(2); const ConstantInt *CSize = dyn_cast(Size); if (CSize && CSize->getZExtValue() == 0) { EVT CallVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType(), true); setValue(&I, DAG.getConstant(0, getCurSDLoc(), CallVT)); return true; } const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair Res = TSI.EmitTargetCodeForMemcmp(DAG, getCurSDLoc(), DAG.getRoot(), getValue(LHS), getValue(RHS), getValue(Size), MachinePointerInfo(LHS), MachinePointerInfo(RHS)); if (Res.first.getNode()) { processIntegerCallValue(I, Res.first, true); PendingLoads.push_back(Res.second); return true; } // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0 // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0 if (CSize && IsOnlyUsedInZeroEqualityComparison(&I)) { bool ActuallyDoIt = true; MVT LoadVT; Type *LoadTy; switch (CSize->getZExtValue()) { default: LoadVT = MVT::Other; LoadTy = nullptr; ActuallyDoIt = false; break; case 2: LoadVT = MVT::i16; LoadTy = Type::getInt16Ty(CSize->getContext()); break; case 4: LoadVT = MVT::i32; LoadTy = Type::getInt32Ty(CSize->getContext()); break; case 8: LoadVT = MVT::i64; LoadTy = Type::getInt64Ty(CSize->getContext()); break; /* case 16: LoadVT = MVT::v4i32; LoadTy = Type::getInt32Ty(CSize->getContext()); LoadTy = VectorType::get(LoadTy, 4); break; */ } // This turns into unaligned loads. We only do this if the target natively // supports the MVT we'll be loading or if it is small enough (<= 4) that // we'll only produce a small number of byte loads. // Require that we can find a legal MVT, and only do this if the target // supports unaligned loads of that type. Expanding into byte loads would // bloat the code. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (ActuallyDoIt && CSize->getZExtValue() > 4) { unsigned DstAS = LHS->getType()->getPointerAddressSpace(); unsigned SrcAS = RHS->getType()->getPointerAddressSpace(); // TODO: Handle 5 byte compare as 4-byte + 1 byte. // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads. // TODO: Check alignment of src and dest ptrs. if (!TLI.isTypeLegal(LoadVT) || !TLI.allowsMisalignedMemoryAccesses(LoadVT, SrcAS) || !TLI.allowsMisalignedMemoryAccesses(LoadVT, DstAS)) ActuallyDoIt = false; } if (ActuallyDoIt) { SDValue LHSVal = getMemCmpLoad(LHS, LoadVT, LoadTy, *this); SDValue RHSVal = getMemCmpLoad(RHS, LoadVT, LoadTy, *this); SDValue Res = DAG.getSetCC(getCurSDLoc(), MVT::i1, LHSVal, RHSVal, ISD::SETNE); processIntegerCallValue(I, Res, false); return true; } } return false; } /// visitMemChrCall -- See if we can lower a memchr call into an optimized /// form. If so, return true and lower it, otherwise return false and it /// will be lowered like a normal call. bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) { // Verify that the prototype makes sense. void *memchr(void *, int, size_t) if (I.getNumArgOperands() != 3) return false; const Value *Src = I.getArgOperand(0); const Value *Char = I.getArgOperand(1); const Value *Length = I.getArgOperand(2); if (!Src->getType()->isPointerTy() || !Char->getType()->isIntegerTy() || !Length->getType()->isIntegerTy() || !I.getType()->isPointerTy()) return false; const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair Res = TSI.EmitTargetCodeForMemchr(DAG, getCurSDLoc(), DAG.getRoot(), getValue(Src), getValue(Char), getValue(Length), MachinePointerInfo(Src)); if (Res.first.getNode()) { setValue(&I, Res.first); PendingLoads.push_back(Res.second); return true; } return false; } /// visitStrCpyCall -- See if we can lower a strcpy or stpcpy call into an /// optimized form. If so, return true and lower it, otherwise return false /// and it will be lowered like a normal call. bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) { // Verify that the prototype makes sense. char *strcpy(char *, char *) if (I.getNumArgOperands() != 2) return false; const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); if (!Arg0->getType()->isPointerTy() || !Arg1->getType()->isPointerTy() || !I.getType()->isPointerTy()) return false; const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair Res = TSI.EmitTargetCodeForStrcpy(DAG, getCurSDLoc(), getRoot(), getValue(Arg0), getValue(Arg1), MachinePointerInfo(Arg0), MachinePointerInfo(Arg1), isStpcpy); if (Res.first.getNode()) { setValue(&I, Res.first); DAG.setRoot(Res.second); return true; } return false; } /// visitStrCmpCall - See if we can lower a call to strcmp in an optimized form. /// If so, return true and lower it, otherwise return false and it will be /// lowered like a normal call. bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) { // Verify that the prototype makes sense. int strcmp(void*,void*) if (I.getNumArgOperands() != 2) return false; const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); if (!Arg0->getType()->isPointerTy() || !Arg1->getType()->isPointerTy() || !I.getType()->isIntegerTy()) return false; const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair Res = TSI.EmitTargetCodeForStrcmp(DAG, getCurSDLoc(), DAG.getRoot(), getValue(Arg0), getValue(Arg1), MachinePointerInfo(Arg0), MachinePointerInfo(Arg1)); if (Res.first.getNode()) { processIntegerCallValue(I, Res.first, true); PendingLoads.push_back(Res.second); return true; } return false; } /// visitStrLenCall -- See if we can lower a strlen call into an optimized /// form. If so, return true and lower it, otherwise return false and it /// will be lowered like a normal call. bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) { // Verify that the prototype makes sense. size_t strlen(char *) if (I.getNumArgOperands() != 1) return false; const Value *Arg0 = I.getArgOperand(0); if (!Arg0->getType()->isPointerTy() || !I.getType()->isIntegerTy()) return false; const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair Res = TSI.EmitTargetCodeForStrlen(DAG, getCurSDLoc(), DAG.getRoot(), getValue(Arg0), MachinePointerInfo(Arg0)); if (Res.first.getNode()) { processIntegerCallValue(I, Res.first, false); PendingLoads.push_back(Res.second); return true; } return false; } /// visitStrNLenCall -- See if we can lower a strnlen call into an optimized /// form. If so, return true and lower it, otherwise return false and it /// will be lowered like a normal call. bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) { // Verify that the prototype makes sense. size_t strnlen(char *, size_t) if (I.getNumArgOperands() != 2) return false; const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); if (!Arg0->getType()->isPointerTy() || !Arg1->getType()->isIntegerTy() || !I.getType()->isIntegerTy()) return false; const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair Res = TSI.EmitTargetCodeForStrnlen(DAG, getCurSDLoc(), DAG.getRoot(), getValue(Arg0), getValue(Arg1), MachinePointerInfo(Arg0)); if (Res.first.getNode()) { processIntegerCallValue(I, Res.first, false); PendingLoads.push_back(Res.second); return true; } return false; } /// visitUnaryFloatCall - If a call instruction is a unary floating-point /// operation (as expected), translate it to an SDNode with the specified opcode /// and return true. bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I, unsigned Opcode) { // Sanity check that it really is a unary floating-point call. if (I.getNumArgOperands() != 1 || !I.getArgOperand(0)->getType()->isFloatingPointTy() || I.getType() != I.getArgOperand(0)->getType() || !I.onlyReadsMemory()) return false; SDValue Tmp = getValue(I.getArgOperand(0)); setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), Tmp.getValueType(), Tmp)); return true; } /// visitBinaryFloatCall - If a call instruction is a binary floating-point /// operation (as expected), translate it to an SDNode with the specified opcode /// and return true. bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I, unsigned Opcode) { // Sanity check that it really is a binary floating-point call. if (I.getNumArgOperands() != 2 || !I.getArgOperand(0)->getType()->isFloatingPointTy() || I.getType() != I.getArgOperand(0)->getType() || I.getType() != I.getArgOperand(1)->getType() || !I.onlyReadsMemory()) return false; SDValue Tmp0 = getValue(I.getArgOperand(0)); SDValue Tmp1 = getValue(I.getArgOperand(1)); EVT VT = Tmp0.getValueType(); setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), VT, Tmp0, Tmp1)); return true; } void SelectionDAGBuilder::visitCall(const CallInst &I) { // Handle inline assembly differently. if (isa(I.getCalledValue())) { visitInlineAsm(&I); return; } MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); ComputeUsesVAFloatArgument(I, &MMI); const char *RenameFn = nullptr; if (Function *F = I.getCalledFunction()) { if (F->isDeclaration()) { if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo()) { if (unsigned IID = II->getIntrinsicID(F)) { RenameFn = visitIntrinsicCall(I, IID); if (!RenameFn) return; } } if (Intrinsic::ID IID = F->getIntrinsicID()) { RenameFn = visitIntrinsicCall(I, IID); if (!RenameFn) return; } } // Check for well-known libc/libm calls. If the function is internal, it // can't be a library call. Don't do the check if marked as nobuiltin for // some reason. LibFunc::Func Func; if (!I.isNoBuiltin() && !F->hasLocalLinkage() && F->hasName() && LibInfo->getLibFunc(F->getName(), Func) && LibInfo->hasOptimizedCodeGen(Func)) { switch (Func) { default: break; case LibFunc::copysign: case LibFunc::copysignf: case LibFunc::copysignl: if (I.getNumArgOperands() == 2 && // Basic sanity checks. I.getArgOperand(0)->getType()->isFloatingPointTy() && I.getType() == I.getArgOperand(0)->getType() && I.getType() == I.getArgOperand(1)->getType() && I.onlyReadsMemory()) { SDValue LHS = getValue(I.getArgOperand(0)); SDValue RHS = getValue(I.getArgOperand(1)); setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurSDLoc(), LHS.getValueType(), LHS, RHS)); return; } break; case LibFunc::fabs: case LibFunc::fabsf: case LibFunc::fabsl: if (visitUnaryFloatCall(I, ISD::FABS)) return; break; case LibFunc::fmin: case LibFunc::fminf: case LibFunc::fminl: if (visitBinaryFloatCall(I, ISD::FMINNUM)) return; break; case LibFunc::fmax: case LibFunc::fmaxf: case LibFunc::fmaxl: if (visitBinaryFloatCall(I, ISD::FMAXNUM)) return; break; case LibFunc::sin: case LibFunc::sinf: case LibFunc::sinl: if (visitUnaryFloatCall(I, ISD::FSIN)) return; break; case LibFunc::cos: case LibFunc::cosf: case LibFunc::cosl: if (visitUnaryFloatCall(I, ISD::FCOS)) return; break; case LibFunc::sqrt: case LibFunc::sqrtf: case LibFunc::sqrtl: case LibFunc::sqrt_finite: case LibFunc::sqrtf_finite: case LibFunc::sqrtl_finite: if (visitUnaryFloatCall(I, ISD::FSQRT)) return; break; case LibFunc::floor: case LibFunc::floorf: case LibFunc::floorl: if (visitUnaryFloatCall(I, ISD::FFLOOR)) return; break; case LibFunc::nearbyint: case LibFunc::nearbyintf: case LibFunc::nearbyintl: if (visitUnaryFloatCall(I, ISD::FNEARBYINT)) return; break; case LibFunc::ceil: case LibFunc::ceilf: case LibFunc::ceill: if (visitUnaryFloatCall(I, ISD::FCEIL)) return; break; case LibFunc::rint: case LibFunc::rintf: case LibFunc::rintl: if (visitUnaryFloatCall(I, ISD::FRINT)) return; break; case LibFunc::round: case LibFunc::roundf: case LibFunc::roundl: if (visitUnaryFloatCall(I, ISD::FROUND)) return; break; case LibFunc::trunc: case LibFunc::truncf: case LibFunc::truncl: if (visitUnaryFloatCall(I, ISD::FTRUNC)) return; break; case LibFunc::log2: case LibFunc::log2f: case LibFunc::log2l: if (visitUnaryFloatCall(I, ISD::FLOG2)) return; break; case LibFunc::exp2: case LibFunc::exp2f: case LibFunc::exp2l: if (visitUnaryFloatCall(I, ISD::FEXP2)) return; break; case LibFunc::memcmp: if (visitMemCmpCall(I)) return; break; case LibFunc::memchr: if (visitMemChrCall(I)) return; break; case LibFunc::strcpy: if (visitStrCpyCall(I, false)) return; break; case LibFunc::stpcpy: if (visitStrCpyCall(I, true)) return; break; case LibFunc::strcmp: if (visitStrCmpCall(I)) return; break; case LibFunc::strlen: if (visitStrLenCall(I)) return; break; case LibFunc::strnlen: if (visitStrNLenCall(I)) return; break; } } } SDValue Callee; if (!RenameFn) Callee = getValue(I.getCalledValue()); else Callee = DAG.getExternalSymbol( RenameFn, DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout())); // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't // have to do anything here to lower funclet bundles. assert(!I.hasOperandBundlesOtherThan( {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) && "Cannot lower calls with arbitrary operand bundles!"); if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) LowerCallSiteWithDeoptBundle(&I, Callee, nullptr); else // Check if we can potentially perform a tail call. More detailed checking // is be done within LowerCallTo, after more information about the call is // known. LowerCallTo(&I, Callee, I.isTailCall()); } namespace { /// AsmOperandInfo - This contains information for each constraint that we are /// lowering. class SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo { public: /// CallOperand - If this is the result output operand or a clobber /// this is null, otherwise it is the incoming operand to the CallInst. /// This gets modified as the asm is processed. SDValue CallOperand; /// AssignedRegs - If this is a register or register class operand, this /// contains the set of register corresponding to the operand. RegsForValue AssignedRegs; explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info) : TargetLowering::AsmOperandInfo(info), CallOperand(nullptr,0) { } /// getCallOperandValEVT - Return the EVT of the Value* that this operand /// corresponds to. If there is no Value* for this operand, it returns /// MVT::Other. EVT getCallOperandValEVT(LLVMContext &Context, const TargetLowering &TLI, const DataLayout &DL) const { if (!CallOperandVal) return MVT::Other; if (isa(CallOperandVal)) return TLI.getPointerTy(DL); llvm::Type *OpTy = CallOperandVal->getType(); // FIXME: code duplicated from TargetLowering::ParseConstraints(). // If this is an indirect operand, the operand is a pointer to the // accessed type. if (isIndirect) { llvm::PointerType *PtrTy = dyn_cast(OpTy); if (!PtrTy) report_fatal_error("Indirect operand for inline asm not a pointer!"); OpTy = PtrTy->getElementType(); } // Look for vector wrapped in a struct. e.g. { <16 x i8> }. if (StructType *STy = dyn_cast(OpTy)) if (STy->getNumElements() == 1) OpTy = STy->getElementType(0); // If OpTy is not a single value, it may be a struct/union that we // can tile with integers. if (!OpTy->isSingleValueType() && OpTy->isSized()) { unsigned BitSize = DL.getTypeSizeInBits(OpTy); switch (BitSize) { default: break; case 1: case 8: case 16: case 32: case 64: case 128: OpTy = IntegerType::get(Context, BitSize); break; } } return TLI.getValueType(DL, OpTy, true); } }; typedef SmallVector SDISelAsmOperandInfoVector; } // end anonymous namespace /// GetRegistersForValue - Assign registers (virtual or physical) for the /// specified operand. We prefer to assign virtual registers, to allow the /// register allocator to handle the assignment process. However, if the asm /// uses features that we can't model on machineinstrs, we have SDISel do the /// allocation. This produces generally horrible, but correct, code. /// /// OpInfo describes the operand. /// static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI, const SDLoc &DL, SDISelAsmOperandInfo &OpInfo) { LLVMContext &Context = *DAG.getContext(); MachineFunction &MF = DAG.getMachineFunction(); SmallVector Regs; // If this is a constraint for a single physreg, or a constraint for a // register class, find it. std::pair PhysReg = TLI.getRegForInlineAsmConstraint(MF.getSubtarget().getRegisterInfo(), OpInfo.ConstraintCode, OpInfo.ConstraintVT); unsigned NumRegs = 1; if (OpInfo.ConstraintVT != MVT::Other) { // If this is a FP input in an integer register (or visa versa) insert a bit // cast of the input value. More generally, handle any case where the input // value disagrees with the register class we plan to stick this in. if (OpInfo.Type == InlineAsm::isInput && PhysReg.second && !PhysReg.second->hasType(OpInfo.ConstraintVT)) { // Try to convert to the first EVT that the reg class contains. If the // types are identical size, use a bitcast to convert (e.g. two differing // vector types). MVT RegVT = *PhysReg.second->vt_begin(); if (RegVT.getSizeInBits() == OpInfo.CallOperand.getValueSizeInBits()) { OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand); OpInfo.ConstraintVT = RegVT; } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) { // If the input is a FP value and we want it in FP registers, do a // bitcast to the corresponding integer type. This turns an f64 value // into i64, which can be passed with two i32 values on a 32-bit // machine. RegVT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits()); OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand); OpInfo.ConstraintVT = RegVT; } } NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT); } MVT RegVT; EVT ValueVT = OpInfo.ConstraintVT; // If this is a constraint for a specific physical register, like {r17}, // assign it now. if (unsigned AssignedReg = PhysReg.first) { const TargetRegisterClass *RC = PhysReg.second; if (OpInfo.ConstraintVT == MVT::Other) ValueVT = *RC->vt_begin(); // Get the actual register value type. This is important, because the user // may have asked for (e.g.) the AX register in i32 type. We need to // remember that AX is actually i16 to get the right extension. RegVT = *RC->vt_begin(); // This is a explicit reference to a physical register. Regs.push_back(AssignedReg); // If this is an expanded reference, add the rest of the regs to Regs. if (NumRegs != 1) { TargetRegisterClass::iterator I = RC->begin(); for (; *I != AssignedReg; ++I) assert(I != RC->end() && "Didn't find reg!"); // Already added the first reg. --NumRegs; ++I; for (; NumRegs; --NumRegs, ++I) { assert(I != RC->end() && "Ran out of registers to allocate!"); Regs.push_back(*I); } } OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); return; } // Otherwise, if this was a reference to an LLVM register class, create vregs // for this reference. if (const TargetRegisterClass *RC = PhysReg.second) { RegVT = *RC->vt_begin(); if (OpInfo.ConstraintVT == MVT::Other) ValueVT = RegVT; // Create the appropriate number of virtual registers. MachineRegisterInfo &RegInfo = MF.getRegInfo(); for (; NumRegs; --NumRegs) Regs.push_back(RegInfo.createVirtualRegister(RC)); OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); return; } // Otherwise, we couldn't allocate enough registers for this. } /// visitInlineAsm - Handle a call to an InlineAsm object. /// void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { const InlineAsm *IA = cast(CS.getCalledValue()); /// ConstraintOperands - Information about all of the constraints. SDISelAsmOperandInfoVector ConstraintOperands; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints( DAG.getDataLayout(), DAG.getSubtarget().getRegisterInfo(), CS); bool hasMemory = false; unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. unsigned ResNo = 0; // ResNo - The result number of the next output. for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { ConstraintOperands.push_back(SDISelAsmOperandInfo(TargetConstraints[i])); SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back(); MVT OpVT = MVT::Other; // Compute the value type for each operand. switch (OpInfo.Type) { case InlineAsm::isOutput: // Indirect outputs just consume an argument. if (OpInfo.isIndirect) { OpInfo.CallOperandVal = const_cast(CS.getArgument(ArgNo++)); break; } // The return value of the call is this value. As such, there is no // corresponding argument. assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); if (StructType *STy = dyn_cast(CS.getType())) { OpVT = TLI.getSimpleValueType(DAG.getDataLayout(), STy->getElementType(ResNo)); } else { assert(ResNo == 0 && "Asm only has one result!"); OpVT = TLI.getSimpleValueType(DAG.getDataLayout(), CS.getType()); } ++ResNo; break; case InlineAsm::isInput: OpInfo.CallOperandVal = const_cast(CS.getArgument(ArgNo++)); break; case InlineAsm::isClobber: // Nothing to do. break; } // If this is an input or an indirect output, process the call argument. // BasicBlocks are labels, currently appearing only in asm's. if (OpInfo.CallOperandVal) { if (const BasicBlock *BB = dyn_cast(OpInfo.CallOperandVal)) { OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]); } else { OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); } OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, DAG.getDataLayout()).getSimpleVT(); } OpInfo.ConstraintVT = OpVT; // Indirect operand accesses access memory. if (OpInfo.isIndirect) hasMemory = true; else { for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) { TargetLowering::ConstraintType CType = TLI.getConstraintType(OpInfo.Codes[j]); if (CType == TargetLowering::C_Memory) { hasMemory = true; break; } } } } SDValue Chain, Flag; // We won't need to flush pending loads if this asm doesn't touch // memory and is nonvolatile. if (hasMemory || IA->hasSideEffects()) Chain = getRoot(); else Chain = DAG.getRoot(); // Second pass over the constraints: compute which constraint option to use // and assign registers to constraints that want a specific physreg. for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; // If this is an output operand with a matching input operand, look up the // matching input. If their types mismatch, e.g. one is an integer, the // other is floating point, or their sizes are different, flag it as an // error. if (OpInfo.hasMatchingInput()) { SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; if (OpInfo.ConstraintVT != Input.ConstraintVT) { const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo(); std::pair MatchRC = TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode, OpInfo.ConstraintVT); std::pair InputRC = TLI.getRegForInlineAsmConstraint(TRI, Input.ConstraintCode, Input.ConstraintVT); if ((OpInfo.ConstraintVT.isInteger() != Input.ConstraintVT.isInteger()) || (MatchRC.second != InputRC.second)) { report_fatal_error("Unsupported asm: input constraint" " with a matching output constraint of" " incompatible type!"); } Input.ConstraintVT = OpInfo.ConstraintVT; } } // Compute the constraint code and ConstraintType to use. TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG); if (OpInfo.ConstraintType == TargetLowering::C_Memory && OpInfo.Type == InlineAsm::isClobber) continue; // If this is a memory input, and if the operand is not indirect, do what we // need to to provide an address for the memory input. if (OpInfo.ConstraintType == TargetLowering::C_Memory && !OpInfo.isIndirect) { assert((OpInfo.isMultipleAlternative || (OpInfo.Type == InlineAsm::isInput)) && "Can only indirectify direct input operands!"); // Memory operands really want the address of the value. If we don't have // an indirect input, put it in the constpool if we can, otherwise spill // it to a stack slot. // TODO: This isn't quite right. We need to handle these according to // the addressing mode that the constraint wants. Also, this may take // an additional register for the computation and we don't want that // either. // If the operand is a float, integer, or vector constant, spill to a // constant pool entry to get its address. const Value *OpVal = OpInfo.CallOperandVal; if (isa(OpVal) || isa(OpVal) || isa(OpVal) || isa(OpVal)) { OpInfo.CallOperand = DAG.getConstantPool( cast(OpVal), TLI.getPointerTy(DAG.getDataLayout())); } else { // Otherwise, create a stack slot and emit a store to it before the // asm. Type *Ty = OpVal->getType(); auto &DL = DAG.getDataLayout(); uint64_t TySize = DL.getTypeAllocSize(Ty); unsigned Align = DL.getPrefTypeAlignment(Ty); MachineFunction &MF = DAG.getMachineFunction(); int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy(DAG.getDataLayout())); Chain = DAG.getStore( Chain, getCurSDLoc(), OpInfo.CallOperand, StackSlot, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI)); OpInfo.CallOperand = StackSlot; } // There is no longer a Value* corresponding to this operand. OpInfo.CallOperandVal = nullptr; // It is now an indirect operand. OpInfo.isIndirect = true; } // If this constraint is for a specific register, allocate it before // anything else. if (OpInfo.ConstraintType == TargetLowering::C_Register) GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo); } // Second pass - Loop over all of the operands, assigning virtual or physregs // to register class operands. for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; // C_Register operands have already been allocated, Other/Memory don't need // to be. if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass) GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo); } // AsmNodeOperands - The operands for the ISD::INLINEASM node. std::vector AsmNodeOperands; AsmNodeOperands.push_back(SDValue()); // reserve space for input chain AsmNodeOperands.push_back(DAG.getTargetExternalSymbol( IA->getAsmString().c_str(), TLI.getPointerTy(DAG.getDataLayout()))); // If we have a !srcloc metadata node associated with it, we want to attach // this to the ultimately generated inline asm machineinstr. To do this, we // pass in the third operand as this (potentially null) inline asm MDNode. const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc"); AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc)); // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore // bits as operand 3. unsigned ExtraInfo = 0; if (IA->hasSideEffects()) ExtraInfo |= InlineAsm::Extra_HasSideEffects; if (IA->isAlignStack()) ExtraInfo |= InlineAsm::Extra_IsAlignStack; if (CS.isConvergent()) ExtraInfo |= InlineAsm::Extra_IsConvergent; // Set the asm dialect. ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect; // Determine if this InlineAsm MayLoad or MayStore based on the constraints. for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; // Compute the constraint code and ConstraintType to use. TLI.ComputeConstraintToUse(OpInfo, SDValue()); // Ideally, we would only check against memory constraints. However, the // meaning of an other constraint can be target-specific and we can't easily // reason about it. Therefore, be conservative and set MayLoad/MayStore // for other constriants as well. if (OpInfo.ConstraintType == TargetLowering::C_Memory || OpInfo.ConstraintType == TargetLowering::C_Other) { if (OpInfo.Type == InlineAsm::isInput) ExtraInfo |= InlineAsm::Extra_MayLoad; else if (OpInfo.Type == InlineAsm::isOutput) ExtraInfo |= InlineAsm::Extra_MayStore; else if (OpInfo.Type == InlineAsm::isClobber) ExtraInfo |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore); } } AsmNodeOperands.push_back(DAG.getTargetConstant( ExtraInfo, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); // Loop over all of the inputs, copying the operand values into the // appropriate registers and processing the output regs. RegsForValue RetValRegs; // IndirectStoresToEmit - The set of stores to emit after the inline asm node. std::vector > IndirectStoresToEmit; for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; switch (OpInfo.Type) { case InlineAsm::isOutput: { if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass && OpInfo.ConstraintType != TargetLowering::C_Register) { // Memory output, or 'other' output (e.g. 'X' constraint). assert(OpInfo.isIndirect && "Memory output must be indirect operand"); unsigned ConstraintID = TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode); assert(ConstraintID != InlineAsm::Constraint_Unknown && "Failed to convert memory constraint code to constraint id."); // Add information to the INLINEASM node to know about this output. unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID); AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, getCurSDLoc(), MVT::i32)); AsmNodeOperands.push_back(OpInfo.CallOperand); break; } // Otherwise, this is a register or register class output. // Copy the output from the appropriate register. Find a register that // we can use. if (OpInfo.AssignedRegs.Regs.empty()) { emitInlineAsmError( CS, "couldn't allocate output register for constraint '" + Twine(OpInfo.ConstraintCode) + "'"); return; } // If this is an indirect operand, store through the pointer after the // asm. if (OpInfo.isIndirect) { IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs, OpInfo.CallOperandVal)); } else { // This is the result value of the call. assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); // Concatenate this output onto the outputs list. RetValRegs.append(OpInfo.AssignedRegs); } // Add information to the INLINEASM node to know that this register is // set. OpInfo.AssignedRegs .AddInlineAsmOperands(OpInfo.isEarlyClobber ? InlineAsm::Kind_RegDefEarlyClobber : InlineAsm::Kind_RegDef, false, 0, getCurSDLoc(), DAG, AsmNodeOperands); break; } case InlineAsm::isInput: { SDValue InOperandVal = OpInfo.CallOperand; if (OpInfo.isMatchingInputConstraint()) { // Matching constraint? // If this is required to match an output register we have already set, // just use its register. unsigned OperandNo = OpInfo.getMatchedOperand(); // Scan until we find the definition we already emitted of this operand. // When we find it, create a RegsForValue operand. unsigned CurOp = InlineAsm::Op_FirstOperand; for (; OperandNo; --OperandNo) { // Advance to the next operand. unsigned OpFlag = cast(AsmNodeOperands[CurOp])->getZExtValue(); assert((InlineAsm::isRegDefKind(OpFlag) || InlineAsm::isRegDefEarlyClobberKind(OpFlag) || InlineAsm::isMemKind(OpFlag)) && "Skipped past definitions?"); CurOp += InlineAsm::getNumOperandRegisters(OpFlag)+1; } unsigned OpFlag = cast(AsmNodeOperands[CurOp])->getZExtValue(); if (InlineAsm::isRegDefKind(OpFlag) || InlineAsm::isRegDefEarlyClobberKind(OpFlag)) { // Add (OpFlag&0xffff)>>3 registers to MatchedRegs. if (OpInfo.isIndirect) { // This happens on gcc/testsuite/gcc.dg/pr8788-1.c emitInlineAsmError(CS, "inline asm not supported yet:" " don't know how to handle tied " "indirect register inputs"); return; } RegsForValue MatchedRegs; MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType()); MVT RegVT = AsmNodeOperands[CurOp+1].getSimpleValueType(); MatchedRegs.RegVTs.push_back(RegVT); MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo(); for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag); i != e; ++i) { if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT)) MatchedRegs.Regs.push_back(RegInfo.createVirtualRegister(RC)); else { emitInlineAsmError( CS, "inline asm error: This value" " type register class is not natively supported!"); return; } } SDLoc dl = getCurSDLoc(); // Use the produced MatchedRegs object to MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag, CS.getInstruction()); MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, true, OpInfo.getMatchedOperand(), dl, DAG, AsmNodeOperands); break; } assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!"); assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 && "Unexpected number of operands"); // Add information to the INLINEASM node to know about this input. // See InlineAsm.h isUseOperandTiedToDef. OpFlag = InlineAsm::convertMemFlagWordToMatchingFlagWord(OpFlag); OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag, OpInfo.getMatchedOperand()); AsmNodeOperands.push_back(DAG.getTargetConstant( OpFlag, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]); break; } // Treat indirect 'X' constraint as memory. if (OpInfo.ConstraintType == TargetLowering::C_Other && OpInfo.isIndirect) OpInfo.ConstraintType = TargetLowering::C_Memory; if (OpInfo.ConstraintType == TargetLowering::C_Other) { std::vector Ops; TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode, Ops, DAG); if (Ops.empty()) { emitInlineAsmError(CS, "invalid operand for inline asm constraint '" + Twine(OpInfo.ConstraintCode) + "'"); return; } // Add information to the INLINEASM node to know about this input. unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size()); AsmNodeOperands.push_back(DAG.getTargetConstant( ResOpType, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end()); break; } if (OpInfo.ConstraintType == TargetLowering::C_Memory) { assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!"); assert(InOperandVal.getValueType() == TLI.getPointerTy(DAG.getDataLayout()) && "Memory operands expect pointer values"); unsigned ConstraintID = TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode); assert(ConstraintID != InlineAsm::Constraint_Unknown && "Failed to convert memory constraint code to constraint id."); // Add information to the INLINEASM node to know about this input. unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); ResOpType = InlineAsm::getFlagWordForMem(ResOpType, ConstraintID); AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, getCurSDLoc(), MVT::i32)); AsmNodeOperands.push_back(InOperandVal); break; } assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass || OpInfo.ConstraintType == TargetLowering::C_Register) && "Unknown constraint type!"); // TODO: Support this. if (OpInfo.isIndirect) { emitInlineAsmError( CS, "Don't know how to handle indirect register inputs yet " "for constraint '" + Twine(OpInfo.ConstraintCode) + "'"); return; } // Copy the input into the appropriate registers. if (OpInfo.AssignedRegs.Regs.empty()) { emitInlineAsmError(CS, "couldn't allocate input reg for constraint '" + Twine(OpInfo.ConstraintCode) + "'"); return; } SDLoc dl = getCurSDLoc(); OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag, CS.getInstruction()); OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0, dl, DAG, AsmNodeOperands); break; } case InlineAsm::isClobber: { // Add the clobbered value to the operand list, so that the register // allocator is aware that the physreg got clobbered. if (!OpInfo.AssignedRegs.Regs.empty()) OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber, false, 0, getCurSDLoc(), DAG, AsmNodeOperands); break; } } } // Finish up input operands. Set the input chain and add the flag last. AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; if (Flag.getNode()) AsmNodeOperands.push_back(Flag); Chain = DAG.getNode(ISD::INLINEASM, getCurSDLoc(), DAG.getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); Flag = Chain.getValue(1); // If this asm returns a register value, copy the result from that register // and set it as the value of the call. if (!RetValRegs.Regs.empty()) { SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, &Flag, CS.getInstruction()); // FIXME: Why don't we do this for inline asms with MRVs? if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) { EVT ResultType = TLI.getValueType(DAG.getDataLayout(), CS.getType()); // If any of the results of the inline asm is a vector, it may have the // wrong width/num elts. This can happen for register classes that can // contain multiple different value types. The preg or vreg allocated may // not have the same VT as was expected. Convert it to the right type // with bit_convert. if (ResultType != Val.getValueType() && Val.getValueType().isVector()) { Val = DAG.getNode(ISD::BITCAST, getCurSDLoc(), ResultType, Val); } else if (ResultType != Val.getValueType() && ResultType.isInteger() && Val.getValueType().isInteger()) { // If a result value was tied to an input value, the computed result may // have a wider width than the expected result. Extract the relevant // portion. Val = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), ResultType, Val); } assert(ResultType == Val.getValueType() && "Asm result value mismatch!"); } setValue(CS.getInstruction(), Val); // Don't need to use this as a chain in this case. if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty()) return; } std::vector > StoresToEmit; // Process indirect outputs, first output all of the flagged copies out of // physregs. for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) { RegsForValue &OutRegs = IndirectStoresToEmit[i].first; const Value *Ptr = IndirectStoresToEmit[i].second; SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, &Flag, IA); StoresToEmit.push_back(std::make_pair(OutVal, Ptr)); } // Emit the non-flagged stores from the physregs. SmallVector OutChains; for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i) { SDValue Val = DAG.getStore(Chain, getCurSDLoc(), StoresToEmit[i].first, getValue(StoresToEmit[i].second), MachinePointerInfo(StoresToEmit[i].second)); OutChains.push_back(Val); } if (!OutChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, OutChains); DAG.setRoot(Chain); } void SelectionDAGBuilder::emitInlineAsmError(ImmutableCallSite CS, const Twine &Message) { LLVMContext &Ctx = *DAG.getContext(); Ctx.emitError(CS.getInstruction(), Message); // Make sure we leave the DAG in a valid state const TargetLowering &TLI = DAG.getTargetLoweringInfo(); auto VT = TLI.getValueType(DAG.getDataLayout(), CS.getType()); setValue(CS.getInstruction(), DAG.getUNDEF(VT)); } void SelectionDAGBuilder::visitVAStart(const CallInst &I) { DAG.setRoot(DAG.getNode(ISD::VASTART, getCurSDLoc(), MVT::Other, getRoot(), getValue(I.getArgOperand(0)), DAG.getSrcValue(I.getArgOperand(0)))); } void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const DataLayout &DL = DAG.getDataLayout(); SDValue V = DAG.getVAArg(TLI.getValueType(DAG.getDataLayout(), I.getType()), getCurSDLoc(), getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0)), DL.getABITypeAlignment(I.getType())); setValue(&I, V); DAG.setRoot(V.getValue(1)); } void SelectionDAGBuilder::visitVAEnd(const CallInst &I) { DAG.setRoot(DAG.getNode(ISD::VAEND, getCurSDLoc(), MVT::Other, getRoot(), getValue(I.getArgOperand(0)), DAG.getSrcValue(I.getArgOperand(0)))); } void SelectionDAGBuilder::visitVACopy(const CallInst &I) { DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurSDLoc(), MVT::Other, getRoot(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), DAG.getSrcValue(I.getArgOperand(0)), DAG.getSrcValue(I.getArgOperand(1)))); } SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG, const Instruction &I, SDValue Op) { const MDNode *Range = I.getMetadata(LLVMContext::MD_range); if (!Range) return Op; Constant *Lo = cast(Range->getOperand(0))->getValue(); if (!Lo->isNullValue()) return Op; Constant *Hi = cast(Range->getOperand(1))->getValue(); unsigned Bits = cast(Hi)->getValue().logBase2(); EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), Bits); SDLoc SL = getCurSDLoc(); SDValue ZExt = DAG.getNode(ISD::AssertZext, SL, Op.getValueType(), Op, DAG.getValueType(SmallVT)); unsigned NumVals = Op.getNode()->getNumValues(); if (NumVals == 1) return ZExt; SmallVector Ops; Ops.push_back(ZExt); for (unsigned I = 1; I != NumVals; ++I) Ops.push_back(Op.getValue(I)); return DAG.getMergeValues(Ops, SL); } /// \brief Populate a CallLowerinInfo (into \p CLI) based on the properties of /// the call being lowered. /// /// This is a helper for lowering intrinsics that follow a target calling /// convention or require stack pointer adjustment. Only a subset of the /// intrinsic's operands need to participate in the calling convention. void SelectionDAGBuilder::populateCallLoweringInfo( TargetLowering::CallLoweringInfo &CLI, ImmutableCallSite CS, unsigned ArgIdx, unsigned NumArgs, SDValue Callee, Type *ReturnTy, bool IsPatchPoint) { TargetLowering::ArgListTy Args; Args.reserve(NumArgs); // Populate the argument list. // Attributes for args start at offset 1, after the return attribute. for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs, AttrI = ArgIdx + 1; ArgI != ArgE; ++ArgI) { const Value *V = CS->getOperand(ArgI); assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic."); TargetLowering::ArgListEntry Entry; Entry.Node = getValue(V); Entry.Ty = V->getType(); Entry.setAttributes(&CS, AttrI); Args.push_back(Entry); } CLI.setDebugLoc(getCurSDLoc()) .setChain(getRoot()) .setCallee(CS.getCallingConv(), ReturnTy, Callee, std::move(Args)) .setDiscardResult(CS->use_empty()) .setIsPatchPoint(IsPatchPoint); } /// \brief Add a stack map intrinsic call's live variable operands to a stackmap /// or patchpoint target node's operand list. /// /// Constants are converted to TargetConstants purely as an optimization to /// avoid constant materialization and register allocation. /// /// FrameIndex operands are converted to TargetFrameIndex so that ISEL does not /// generate addess computation nodes, and so ExpandISelPseudo can convert the /// TargetFrameIndex into a DirectMemRefOp StackMap location. This avoids /// address materialization and register allocation, but may also be required /// for correctness. If a StackMap (or PatchPoint) intrinsic directly uses an /// alloca in the entry block, then the runtime may assume that the alloca's /// StackMap location can be read immediately after compilation and that the /// location is valid at any point during execution (this is similar to the /// assumption made by the llvm.gcroot intrinsic). If the alloca's location were /// only available in a register, then the runtime would need to trap when /// execution reaches the StackMap in order to read the alloca's location. static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx, const SDLoc &DL, SmallVectorImpl &Ops, SelectionDAGBuilder &Builder) { for (unsigned i = StartIdx, e = CS.arg_size(); i != e; ++i) { SDValue OpVal = Builder.getValue(CS.getArgument(i)); if (ConstantSDNode *C = dyn_cast(OpVal)) { Ops.push_back( Builder.DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64)); Ops.push_back( Builder.DAG.getTargetConstant(C->getSExtValue(), DL, MVT::i64)); } else if (FrameIndexSDNode *FI = dyn_cast(OpVal)) { const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo(); Ops.push_back(Builder.DAG.getTargetFrameIndex( FI->getIndex(), TLI.getPointerTy(Builder.DAG.getDataLayout()))); } else Ops.push_back(OpVal); } } /// \brief Lower llvm.experimental.stackmap directly to its target opcode. void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { // void @llvm.experimental.stackmap(i32 , i32 , // [live variables...]) assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value."); SDValue Chain, InFlag, Callee, NullPtr; SmallVector Ops; SDLoc DL = getCurSDLoc(); Callee = getValue(CI.getCalledValue()); NullPtr = DAG.getIntPtrConstant(0, DL, true); // The stackmap intrinsic only records the live variables (the arguemnts // passed to it) and emits NOPS (if requested). Unlike the patchpoint // intrinsic, this won't be lowered to a function call. This means we don't // have to worry about calling conventions and target specific lowering code. // Instead we perform the call lowering right here. // // chain, flag = CALLSEQ_START(chain, 0) // chain, flag = STACKMAP(id, nbytes, ..., chain, flag) // chain, flag = CALLSEQ_END(chain, 0, 0, flag) // Chain = DAG.getCALLSEQ_START(getRoot(), NullPtr, DL); InFlag = Chain.getValue(1); // Add the and constants. SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos)); Ops.push_back(DAG.getTargetConstant( cast(IDVal)->getZExtValue(), DL, MVT::i64)); SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos)); Ops.push_back(DAG.getTargetConstant( cast(NBytesVal)->getZExtValue(), DL, MVT::i32)); // Push live variables for the stack map. addStackMapLiveVars(&CI, 2, DL, Ops, *this); // We are not pushing any register mask info here on the operands list, // because the stackmap doesn't clobber anything. // Push the chain and the glue flag. Ops.push_back(Chain); Ops.push_back(InFlag); // Create the STACKMAP node. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SDNode *SM = DAG.getMachineNode(TargetOpcode::STACKMAP, DL, NodeTys, Ops); Chain = SDValue(SM, 0); InFlag = Chain.getValue(1); Chain = DAG.getCALLSEQ_END(Chain, NullPtr, NullPtr, InFlag, DL); // Stackmaps don't generate values, so nothing goes into the NodeMap. // Set the root to the target-lowered call chain. DAG.setRoot(Chain); // Inform the Frame Information that we have a stackmap in this function. FuncInfo.MF->getFrameInfo()->setHasStackMap(); } /// \brief Lower llvm.experimental.patchpoint directly to its target opcode. void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, const BasicBlock *EHPadBB) { // void|i64 @llvm.experimental.patchpoint.void|i64(i64 , // i32 , // i8* , // i32 , // [Args...], // [live variables...]) CallingConv::ID CC = CS.getCallingConv(); bool IsAnyRegCC = CC == CallingConv::AnyReg; bool HasDef = !CS->getType()->isVoidTy(); SDLoc dl = getCurSDLoc(); SDValue Callee = getValue(CS->getOperand(PatchPointOpers::TargetPos)); // Handle immediate and symbolic callees. if (auto* ConstCallee = dyn_cast(Callee)) Callee = DAG.getIntPtrConstant(ConstCallee->getZExtValue(), dl, /*isTarget=*/true); else if (auto* SymbolicCallee = dyn_cast(Callee)) Callee = DAG.getTargetGlobalAddress(SymbolicCallee->getGlobal(), SDLoc(SymbolicCallee), SymbolicCallee->getValueType(0)); // Get the real number of arguments participating in the call SDValue NArgVal = getValue(CS.getArgument(PatchPointOpers::NArgPos)); unsigned NumArgs = cast(NArgVal)->getZExtValue(); // Skip the four meta args: , , , // Intrinsics include all meta-operands up to but not including CC. unsigned NumMetaOpers = PatchPointOpers::CCPos; assert(CS.arg_size() >= NumMetaOpers + NumArgs && "Not enough arguments provided to the patchpoint intrinsic"); // For AnyRegCC the arguments are lowered later on manually. unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs; Type *ReturnTy = IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CS->getType(); TargetLowering::CallLoweringInfo CLI(DAG); populateCallLoweringInfo(CLI, CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy, true); std::pair Result = lowerInvokable(CLI, EHPadBB); SDNode *CallEnd = Result.second.getNode(); if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg)) CallEnd = CallEnd->getOperand(0).getNode(); /// Get a call instruction from the call sequence chain. /// Tail calls are not allowed. assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && "Expected a callseq node."); SDNode *Call = CallEnd->getOperand(0).getNode(); bool HasGlue = Call->getGluedNode(); // Replace the target specific call node with the patchable intrinsic. SmallVector Ops; // Add the and constants. SDValue IDVal = getValue(CS->getOperand(PatchPointOpers::IDPos)); Ops.push_back(DAG.getTargetConstant( cast(IDVal)->getZExtValue(), dl, MVT::i64)); SDValue NBytesVal = getValue(CS->getOperand(PatchPointOpers::NBytesPos)); Ops.push_back(DAG.getTargetConstant( cast(NBytesVal)->getZExtValue(), dl, MVT::i32)); // Add the callee. Ops.push_back(Callee); // Adjust to account for any arguments that have been passed on the // stack instead. // Call Node: Chain, Target, {Args}, RegMask, [Glue] unsigned NumCallRegArgs = Call->getNumOperands() - (HasGlue ? 4 : 3); NumCallRegArgs = IsAnyRegCC ? NumArgs : NumCallRegArgs; Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, dl, MVT::i32)); // Add the calling convention Ops.push_back(DAG.getTargetConstant((unsigned)CC, dl, MVT::i32)); // Add the arguments we omitted previously. The register allocator should // place these in any free register. if (IsAnyRegCC) for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i) Ops.push_back(getValue(CS.getArgument(i))); // Push the arguments from the call instruction up to the register mask. SDNode::op_iterator e = HasGlue ? Call->op_end()-2 : Call->op_end()-1; Ops.append(Call->op_begin() + 2, e); // Push live variables for the stack map. addStackMapLiveVars(CS, NumMetaOpers + NumArgs, dl, Ops, *this); // Push the register mask info. if (HasGlue) Ops.push_back(*(Call->op_end()-2)); else Ops.push_back(*(Call->op_end()-1)); // Push the chain (this is originally the first operand of the call, but // becomes now the last or second to last operand). Ops.push_back(*(Call->op_begin())); // Push the glue flag (last operand). if (HasGlue) Ops.push_back(*(Call->op_end()-1)); SDVTList NodeTys; if (IsAnyRegCC && HasDef) { // Create the return types based on the intrinsic definition const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector ValueVTs; ComputeValueVTs(TLI, DAG.getDataLayout(), CS->getType(), ValueVTs); assert(ValueVTs.size() == 1 && "Expected only one return value type."); // There is always a chain and a glue type at the end ValueVTs.push_back(MVT::Other); ValueVTs.push_back(MVT::Glue); NodeTys = DAG.getVTList(ValueVTs); } else NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); // Replace the target specific call node with a PATCHPOINT node. MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHPOINT, dl, NodeTys, Ops); // Update the NodeMap. if (HasDef) { if (IsAnyRegCC) setValue(CS.getInstruction(), SDValue(MN, 0)); else setValue(CS.getInstruction(), Result.first); } // Fixup the consumers of the intrinsic. The chain and glue may be used in the // call sequence. Furthermore the location of the chain and glue can change // when the AnyReg calling convention is used and the intrinsic returns a // value. if (IsAnyRegCC && HasDef) { SDValue From[] = {SDValue(Call, 0), SDValue(Call, 1)}; SDValue To[] = {SDValue(MN, 1), SDValue(MN, 2)}; DAG.ReplaceAllUsesOfValuesWith(From, To, 2); } else DAG.ReplaceAllUsesWith(Call, MN); DAG.DeleteNode(Call); // Inform the Frame Information that we have a patchpoint in this function. FuncInfo.MF->getFrameInfo()->setHasPatchPoint(); } /// Returns an AttributeSet representing the attributes applied to the return /// value of the given call. static AttributeSet getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) { SmallVector Attrs; if (CLI.RetSExt) Attrs.push_back(Attribute::SExt); if (CLI.RetZExt) Attrs.push_back(Attribute::ZExt); if (CLI.IsInReg) Attrs.push_back(Attribute::InReg); return AttributeSet::get(CLI.RetTy->getContext(), AttributeSet::ReturnIndex, Attrs); } /// TargetLowering::LowerCallTo - This is the default LowerCallTo /// implementation, which just calls LowerCall. /// FIXME: When all targets are /// migrated to using LowerCall, this hook should be integrated into SDISel. std::pair TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // Handle the incoming return values from the call. CLI.Ins.clear(); Type *OrigRetTy = CLI.RetTy; SmallVector RetTys; SmallVector Offsets; auto &DL = CLI.DAG.getDataLayout(); ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets); SmallVector Outs; GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, *this, DL); bool CanLowerReturn = this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(), CLI.IsVarArg, Outs, CLI.RetTy->getContext()); SDValue DemoteStackSlot; int DemoteStackIdx = -100; if (!CanLowerReturn) { // FIXME: equivalent assert? // assert(!CS.hasInAllocaArgument() && // "sret demotion is incompatible with inalloca"); uint64_t TySize = DL.getTypeAllocSize(CLI.RetTy); unsigned Align = DL.getPrefTypeAlignment(CLI.RetTy); MachineFunction &MF = CLI.DAG.getMachineFunction(); DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy); DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getPointerTy(DL)); ArgListEntry Entry; Entry.Node = DemoteStackSlot; Entry.Ty = StackSlotPtrType; Entry.isSExt = false; Entry.isZExt = false; Entry.isInReg = false; Entry.isSRet = true; Entry.isNest = false; Entry.isByVal = false; Entry.isReturned = false; Entry.isSwiftSelf = false; Entry.isSwiftError = false; Entry.Alignment = Align; CLI.getArgs().insert(CLI.getArgs().begin(), Entry); CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext()); // sret demotion isn't compatible with tail-calls, since the sret argument // points into the callers stack frame. CLI.IsTailCall = false; } else { for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { EVT VT = RetTys[I]; MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); for (unsigned i = 0; i != NumRegs; ++i) { ISD::InputArg MyFlags; MyFlags.VT = RegisterVT; MyFlags.ArgVT = VT; MyFlags.Used = CLI.IsReturnValueUsed; if (CLI.RetSExt) MyFlags.Flags.setSExt(); if (CLI.RetZExt) MyFlags.Flags.setZExt(); if (CLI.IsInReg) MyFlags.Flags.setInReg(); CLI.Ins.push_back(MyFlags); } } } // We push in swifterror return as the last element of CLI.Ins. ArgListTy &Args = CLI.getArgs(); if (supportSwiftError()) { for (unsigned i = 0, e = Args.size(); i != e; ++i) { if (Args[i].isSwiftError) { ISD::InputArg MyFlags; MyFlags.VT = getPointerTy(DL); MyFlags.ArgVT = EVT(getPointerTy(DL)); MyFlags.Flags.setSwiftError(); CLI.Ins.push_back(MyFlags); } } } // Handle all of the outgoing arguments. CLI.Outs.clear(); CLI.OutVals.clear(); for (unsigned i = 0, e = Args.size(); i != e; ++i) { SmallVector ValueVTs; ComputeValueVTs(*this, DL, Args[i].Ty, ValueVTs); Type *FinalType = Args[i].Ty; if (Args[i].isByVal) FinalType = cast(Args[i].Ty)->getElementType(); bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters( FinalType, CLI.CallConv, CLI.IsVarArg); for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; ++Value) { EVT VT = ValueVTs[Value]; Type *ArgTy = VT.getTypeForEVT(CLI.RetTy->getContext()); SDValue Op = SDValue(Args[i].Node.getNode(), Args[i].Node.getResNo() + Value); ISD::ArgFlagsTy Flags; unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); if (Args[i].isZExt) Flags.setZExt(); if (Args[i].isSExt) Flags.setSExt(); if (Args[i].isInReg) Flags.setInReg(); if (Args[i].isSRet) Flags.setSRet(); if (Args[i].isSwiftSelf) Flags.setSwiftSelf(); if (Args[i].isSwiftError) Flags.setSwiftError(); if (Args[i].isByVal) Flags.setByVal(); if (Args[i].isInAlloca) { Flags.setInAlloca(); // Set the byval flag for CCAssignFn callbacks that don't know about // inalloca. This way we can know how many bytes we should've allocated // and how many bytes a callee cleanup function will pop. If we port // inalloca to more targets, we'll have to add custom inalloca handling // in the various CC lowering callbacks. Flags.setByVal(); } if (Args[i].isByVal || Args[i].isInAlloca) { PointerType *Ty = cast(Args[i].Ty); Type *ElementTy = Ty->getElementType(); Flags.setByValSize(DL.getTypeAllocSize(ElementTy)); // For ByVal, alignment should come from FE. BE will guess if this // info is not there but there are cases it cannot get right. unsigned FrameAlign; if (Args[i].Alignment) FrameAlign = Args[i].Alignment; else FrameAlign = getByValTypeAlignment(ElementTy, DL); Flags.setByValAlign(FrameAlign); } if (Args[i].isNest) Flags.setNest(); if (NeedsRegBlock) Flags.setInConsecutiveRegs(); Flags.setOrigAlign(OriginalAlignment); MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT); unsigned NumParts = getNumRegisters(CLI.RetTy->getContext(), VT); SmallVector Parts(NumParts); ISD::NodeType ExtendKind = ISD::ANY_EXTEND; if (Args[i].isSExt) ExtendKind = ISD::SIGN_EXTEND; else if (Args[i].isZExt) ExtendKind = ISD::ZERO_EXTEND; // Conservatively only handle 'returned' on non-vectors for now if (Args[i].isReturned && !Op.getValueType().isVector()) { assert(CLI.RetTy == Args[i].Ty && RetTys.size() == NumValues && "unexpected use of 'returned'"); // Before passing 'returned' to the target lowering code, ensure that // either the register MVT and the actual EVT are the same size or that // the return value and argument are extended in the same way; in these // cases it's safe to pass the argument register value unchanged as the // return register value (although it's at the target's option whether // to do so) // TODO: allow code generation to take advantage of partially preserved // registers rather than clobbering the entire register when the // parameter extension method is not compatible with the return // extension method if ((NumParts * PartVT.getSizeInBits() == VT.getSizeInBits()) || (ExtendKind != ISD::ANY_EXTEND && CLI.RetSExt == Args[i].isSExt && CLI.RetZExt == Args[i].isZExt)) Flags.setReturned(); } getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT, CLI.CS ? CLI.CS->getInstruction() : nullptr, ExtendKind); for (unsigned j = 0; j != NumParts; ++j) { // if it isn't first piece, alignment must be 1 ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), VT, i < CLI.NumFixedArgs, i, j*Parts[j].getValueType().getStoreSize()); if (NumParts > 1 && j == 0) MyFlags.Flags.setSplit(); else if (j != 0) { MyFlags.Flags.setOrigAlign(1); if (j == NumParts - 1) MyFlags.Flags.setSplitEnd(); } CLI.Outs.push_back(MyFlags); CLI.OutVals.push_back(Parts[j]); } if (NeedsRegBlock && Value == NumValues - 1) CLI.Outs[CLI.Outs.size() - 1].Flags.setInConsecutiveRegsLast(); } } SmallVector InVals; CLI.Chain = LowerCall(CLI, InVals); // Update CLI.InVals to use outside of this function. CLI.InVals = InVals; // Verify that the target's LowerCall behaved as expected. assert(CLI.Chain.getNode() && CLI.Chain.getValueType() == MVT::Other && "LowerCall didn't return a valid chain!"); assert((!CLI.IsTailCall || InVals.empty()) && "LowerCall emitted a return value for a tail call!"); assert((CLI.IsTailCall || InVals.size() == CLI.Ins.size()) && "LowerCall didn't emit the correct number of values!"); // For a tail call, the return value is merely live-out and there aren't // any nodes in the DAG representing it. Return a special value to // indicate that a tail call has been emitted and no more Instructions // should be processed in the current block. if (CLI.IsTailCall) { CLI.DAG.setRoot(CLI.Chain); return std::make_pair(SDValue(), SDValue()); } #ifndef NDEBUG for (unsigned i = 0, e = CLI.Ins.size(); i != e; ++i) { assert(InVals[i].getNode() && "LowerCall emitted a null value!"); assert(EVT(CLI.Ins[i].VT) == InVals[i].getValueType() && "LowerCall emitted a value with the wrong type!"); } #endif SmallVector ReturnValues; if (!CanLowerReturn) { // The instruction result is the result of loading from the // hidden sret parameter. SmallVector PVTs; Type *PtrRetTy = PointerType::getUnqual(OrigRetTy); ComputeValueVTs(*this, DL, PtrRetTy, PVTs); assert(PVTs.size() == 1 && "Pointers should fit in one register"); EVT PtrVT = PVTs[0]; unsigned NumValues = RetTys.size(); ReturnValues.resize(NumValues); SmallVector Chains(NumValues); // An aggregate return value cannot wrap around the address space, so // offsets to its parts don't wrap either. SDNodeFlags Flags; Flags.setNoUnsignedWrap(true); for (unsigned i = 0; i < NumValues; ++i) { SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot, CLI.DAG.getConstant(Offsets[i], CLI.DL, PtrVT), &Flags); SDValue L = CLI.DAG.getLoad( RetTys[i], CLI.DL, CLI.Chain, Add, MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(), DemoteStackIdx, Offsets[i]), /* Alignment = */ 1); ReturnValues[i] = L; Chains[i] = L.getValue(1); } CLI.Chain = CLI.DAG.getNode(ISD::TokenFactor, CLI.DL, MVT::Other, Chains); } else { // Collect the legal value parts into potentially illegal values // that correspond to the original function's return values. Optional AssertOp; if (CLI.RetSExt) AssertOp = ISD::AssertSext; else if (CLI.RetZExt) AssertOp = ISD::AssertZext; unsigned CurReg = 0; for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { EVT VT = RetTys[I]; MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg], NumRegs, RegisterVT, VT, nullptr, AssertOp)); CurReg += NumRegs; } // For a function returning void, there is no return value. We can't create // such a node, so we just return a null return value in that case. In // that case, nothing will actually look at the value. if (ReturnValues.empty()) return std::make_pair(SDValue(), CLI.Chain); } SDValue Res = CLI.DAG.getNode(ISD::MERGE_VALUES, CLI.DL, CLI.DAG.getVTList(RetTys), ReturnValues); return std::make_pair(Res, CLI.Chain); } void TargetLowering::LowerOperationWrapper(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { if (SDValue Res = LowerOperation(SDValue(N, 0), DAG)) Results.push_back(Res); } SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { llvm_unreachable("LowerOperation not implemented for this target!"); } void SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { SDValue Op = getNonRegisterValue(V); assert((Op.getOpcode() != ISD::CopyFromReg || cast(Op.getOperand(1))->getReg() != Reg) && "Copy from a reg to the same reg!"); assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, V->getType()); SDValue Chain = DAG.getEntryNode(); ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) == FuncInfo.PreferredExtendType.end()) ? ISD::ANY_EXTEND : FuncInfo.PreferredExtendType[V]; RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V, ExtendType); PendingExports.push_back(Chain); } #include "llvm/CodeGen/SelectionDAGISel.h" /// isOnlyUsedInEntryBlock - If the specified argument is only used in the /// entry block, return true. This includes arguments used by switches, since /// the switch may expand into multiple basic blocks. static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) { // With FastISel active, we may be splitting blocks, so force creation // of virtual registers for all non-dead arguments. if (FastISel) return A->use_empty(); const BasicBlock &Entry = A->getParent()->front(); for (const User *U : A->users()) if (cast(U)->getParent() != &Entry || isa(U)) return false; // Use not in entry block. return true; } void SelectionDAGISel::LowerArguments(const Function &F) { SelectionDAG &DAG = SDB->DAG; SDLoc dl = SDB->getCurSDLoc(); const DataLayout &DL = DAG.getDataLayout(); SmallVector Ins; if (!FuncInfo->CanLowerReturn) { // Put in an sret pointer parameter before all the other parameters. SmallVector ValueVTs; ComputeValueVTs(*TLI, DAG.getDataLayout(), PointerType::getUnqual(F.getReturnType()), ValueVTs); // NOTE: Assuming that a pointer will never break down to more than one VT // or one register. ISD::ArgFlagsTy Flags; Flags.setSRet(); MVT RegisterVT = TLI->getRegisterType(*DAG.getContext(), ValueVTs[0]); ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true, ISD::InputArg::NoArgIndex, 0); Ins.push_back(RetArg); } // Set up the incoming argument description vector. unsigned Idx = 1; for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I, ++Idx) { SmallVector ValueVTs; ComputeValueVTs(*TLI, DAG.getDataLayout(), I->getType(), ValueVTs); bool isArgValueUsed = !I->use_empty(); unsigned PartBase = 0; Type *FinalType = I->getType(); if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) FinalType = cast(FinalType)->getElementType(); bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters( FinalType, F.getCallingConv(), F.isVarArg()); for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; ++Value) { EVT VT = ValueVTs[Value]; Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); ISD::ArgFlagsTy Flags; unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) Flags.setZExt(); if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) Flags.setSExt(); if (F.getAttributes().hasAttribute(Idx, Attribute::InReg)) Flags.setInReg(); if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet)) Flags.setSRet(); if (F.getAttributes().hasAttribute(Idx, Attribute::SwiftSelf)) Flags.setSwiftSelf(); if (F.getAttributes().hasAttribute(Idx, Attribute::SwiftError)) Flags.setSwiftError(); if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) Flags.setByVal(); if (F.getAttributes().hasAttribute(Idx, Attribute::InAlloca)) { Flags.setInAlloca(); // Set the byval flag for CCAssignFn callbacks that don't know about // inalloca. This way we can know how many bytes we should've allocated // and how many bytes a callee cleanup function will pop. If we port // inalloca to more targets, we'll have to add custom inalloca handling // in the various CC lowering callbacks. Flags.setByVal(); } if (F.getCallingConv() == CallingConv::X86_INTR) { // IA Interrupt passes frame (1st parameter) by value in the stack. if (Idx == 1) Flags.setByVal(); } if (Flags.isByVal() || Flags.isInAlloca()) { PointerType *Ty = cast(I->getType()); Type *ElementTy = Ty->getElementType(); Flags.setByValSize(DL.getTypeAllocSize(ElementTy)); // For ByVal, alignment should be passed from FE. BE will guess if // this info is not there but there are cases it cannot get right. unsigned FrameAlign; if (F.getParamAlignment(Idx)) FrameAlign = F.getParamAlignment(Idx); else FrameAlign = TLI->getByValTypeAlignment(ElementTy, DL); Flags.setByValAlign(FrameAlign); } if (F.getAttributes().hasAttribute(Idx, Attribute::Nest)) Flags.setNest(); if (NeedsRegBlock) Flags.setInConsecutiveRegs(); Flags.setOrigAlign(OriginalAlignment); MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT); unsigned NumRegs = TLI->getNumRegisters(*CurDAG->getContext(), VT); for (unsigned i = 0; i != NumRegs; ++i) { ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed, Idx-1, PartBase+i*RegisterVT.getStoreSize()); if (NumRegs > 1 && i == 0) MyFlags.Flags.setSplit(); // if it isn't first piece, alignment must be 1 else if (i > 0) { MyFlags.Flags.setOrigAlign(1); if (i == NumRegs - 1) MyFlags.Flags.setSplitEnd(); } Ins.push_back(MyFlags); } if (NeedsRegBlock && Value == NumValues - 1) Ins[Ins.size() - 1].Flags.setInConsecutiveRegsLast(); PartBase += VT.getStoreSize(); } } // Call the target to set up the argument values. SmallVector InVals; SDValue NewRoot = TLI->LowerFormalArguments( DAG.getRoot(), F.getCallingConv(), F.isVarArg(), Ins, dl, DAG, InVals); // Verify that the target's LowerFormalArguments behaved as expected. assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other && "LowerFormalArguments didn't return a valid chain!"); assert(InVals.size() == Ins.size() && "LowerFormalArguments didn't emit the correct number of values!"); DEBUG({ for (unsigned i = 0, e = Ins.size(); i != e; ++i) { assert(InVals[i].getNode() && "LowerFormalArguments emitted a null value!"); assert(EVT(Ins[i].VT) == InVals[i].getValueType() && "LowerFormalArguments emitted a value with the wrong type!"); } }); // Update the DAG with the new chain value resulting from argument lowering. DAG.setRoot(NewRoot); // Set up the argument values. unsigned i = 0; Idx = 1; if (!FuncInfo->CanLowerReturn) { // Create a virtual register for the sret pointer, and put in a copy // from the sret argument into it. SmallVector ValueVTs; ComputeValueVTs(*TLI, DAG.getDataLayout(), PointerType::getUnqual(F.getReturnType()), ValueVTs); MVT VT = ValueVTs[0].getSimpleVT(); MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT); Optional AssertOp = None; SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT, VT, nullptr, AssertOp); MachineFunction& MF = SDB->DAG.getMachineFunction(); MachineRegisterInfo& RegInfo = MF.getRegInfo(); unsigned SRetReg = RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT)); FuncInfo->DemoteRegister = SRetReg; NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurSDLoc(), SRetReg, ArgValue); DAG.setRoot(NewRoot); // i indexes lowered arguments. Bump it past the hidden sret argument. // Idx indexes LLVM arguments. Don't touch it. ++i; } for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I, ++Idx) { SmallVector ArgValues; SmallVector ValueVTs; ComputeValueVTs(*TLI, DAG.getDataLayout(), I->getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); // If this argument is unused then remember its value. It is used to generate // debugging information. if (I->use_empty() && NumValues) { SDB->setUnusedArgValue(&*I, InVals[i]); // Also remember any frame index for use in FastISel. if (FrameIndexSDNode *FI = dyn_cast(InVals[i].getNode())) FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex()); } for (unsigned Val = 0; Val != NumValues; ++Val) { EVT VT = ValueVTs[Val]; MVT PartVT = TLI->getRegisterType(*CurDAG->getContext(), VT); unsigned NumParts = TLI->getNumRegisters(*CurDAG->getContext(), VT); if (!I->use_empty()) { Optional AssertOp; if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) AssertOp = ISD::AssertSext; else if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) AssertOp = ISD::AssertZext; ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts, PartVT, VT, nullptr, AssertOp)); } i += NumParts; } // We don't need to do anything else for unused arguments. if (ArgValues.empty()) continue; // Note down frame index. if (FrameIndexSDNode *FI = dyn_cast(ArgValues[0].getNode())) FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex()); SDValue Res = DAG.getMergeValues(makeArrayRef(ArgValues.data(), NumValues), SDB->getCurSDLoc()); SDB->setValue(&*I, Res); if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) { if (LoadSDNode *LNode = dyn_cast(Res.getOperand(0).getNode())) if (FrameIndexSDNode *FI = dyn_cast(LNode->getBasePtr().getNode())) FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex()); } // Update SwiftErrorMap. if (Res.getOpcode() == ISD::CopyFromReg && TLI->supportSwiftError() && F.getAttributes().hasAttribute(Idx, Attribute::SwiftError)) { unsigned Reg = cast(Res.getOperand(1))->getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) FuncInfo->SwiftErrorMap[FuncInfo->MBB][0] = Reg; } // If this argument is live outside of the entry block, insert a copy from // wherever we got it to the vreg that other BB's will reference it as. if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) { // If we can, though, try to skip creating an unnecessary vreg. // FIXME: This isn't very clean... it would be nice to make this more // general. It's also subtly incompatible with the hacks FastISel // uses with vregs. unsigned Reg = cast(Res.getOperand(1))->getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) { FuncInfo->ValueMap[&*I] = Reg; continue; } } if (!isOnlyUsedInEntryBlock(&*I, TM.Options.EnableFastISel)) { FuncInfo->InitializeRegForValue(&*I); SDB->CopyToExportRegsIfNeeded(&*I); } } assert(i == InVals.size() && "Argument register count mismatch!"); // Finally, if the target has anything special to do, allow it to do so. EmitFunctionEntryCode(); } /// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to /// ensure constants are generated when needed. Remember the virtual registers /// that need to be added to the Machine PHI nodes as input. We cannot just /// directly add them, because expansion might result in multiple MBB's for one /// BB. As such, the start of the BB might correspond to a different MBB than /// the end. /// void SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { const TerminatorInst *TI = LLVMBB->getTerminator(); SmallPtrSet SuccsHandled; // Check PHI nodes in successors that expect a value to be available from this // block. for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { const BasicBlock *SuccBB = TI->getSuccessor(succ); if (!isa(SuccBB->begin())) continue; MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB]; // If this terminator has multiple identical successors (common for // switches), only handle each succ once. if (!SuccsHandled.insert(SuccMBB).second) continue; MachineBasicBlock::iterator MBBI = SuccMBB->begin(); // At this point we know that there is a 1-1 correspondence between LLVM PHI // nodes and Machine PHI nodes, but the incoming operands have not been // emitted yet. for (BasicBlock::const_iterator I = SuccBB->begin(); const PHINode *PN = dyn_cast(I); ++I) { // Ignore dead phi's. if (PN->use_empty()) continue; // Skip empty types if (PN->getType()->isEmptyTy()) continue; unsigned Reg; const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB); if (const Constant *C = dyn_cast(PHIOp)) { unsigned &RegOut = ConstantsOut[C]; if (RegOut == 0) { RegOut = FuncInfo.CreateRegs(C->getType()); CopyValueToVirtualRegister(C, RegOut); } Reg = RegOut; } else { DenseMap::iterator I = FuncInfo.ValueMap.find(PHIOp); if (I != FuncInfo.ValueMap.end()) Reg = I->second; else { assert(isa(PHIOp) && FuncInfo.StaticAllocaMap.count(cast(PHIOp)) && "Didn't codegen value into a register!??"); Reg = FuncInfo.CreateRegs(PHIOp->getType()); CopyValueToVirtualRegister(PHIOp, Reg); } } // Remember that this register needs to added to the machine PHI node as // the input for this MBB. SmallVector ValueVTs; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); ComputeValueVTs(TLI, DAG.getDataLayout(), PN->getType(), ValueVTs); for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { EVT VT = ValueVTs[vti]; unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT); for (unsigned i = 0, e = NumRegisters; i != e; ++i) FuncInfo.PHINodesToUpdate.push_back( std::make_pair(&*MBBI++, Reg + i)); Reg += NumRegisters; } } } ConstantsOut.clear(); } /// Add a successor MBB to ParentMBB< creating a new MachineBB for BB if SuccMBB /// is 0. MachineBasicBlock * SelectionDAGBuilder::StackProtectorDescriptor:: AddSuccessorMBB(const BasicBlock *BB, MachineBasicBlock *ParentMBB, bool IsLikely, MachineBasicBlock *SuccMBB) { // If SuccBB has not been created yet, create it. if (!SuccMBB) { MachineFunction *MF = ParentMBB->getParent(); MachineFunction::iterator BBI(ParentMBB); SuccMBB = MF->CreateMachineBasicBlock(BB); MF->insert(++BBI, SuccMBB); } // Add it as a successor of ParentMBB. ParentMBB->addSuccessor( SuccMBB, BranchProbabilityInfo::getBranchProbStackProtector(IsLikely)); return SuccMBB; } MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) { MachineFunction::iterator I(MBB); if (++I == FuncInfo.MF->end()) return nullptr; return &*I; } /// During lowering new call nodes can be created (such as memset, etc.). /// Those will become new roots of the current DAG, but complications arise /// when they are tail calls. In such cases, the call lowering will update /// the root, but the builder still needs to know that a tail call has been /// lowered in order to avoid generating an additional return. void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) { // If the node is null, we do have a tail call. if (MaybeTC.getNode() != nullptr) DAG.setRoot(MaybeTC); else HasTailCall = true; } bool SelectionDAGBuilder::isDense(const CaseClusterVector &Clusters, unsigned *TotalCases, unsigned First, unsigned Last, unsigned Density) { assert(Last >= First); assert(TotalCases[Last] >= TotalCases[First]); APInt LowCase = Clusters[First].Low->getValue(); APInt HighCase = Clusters[Last].High->getValue(); assert(LowCase.getBitWidth() == HighCase.getBitWidth()); // FIXME: A range of consecutive cases has 100% density, but only requires one // comparison to lower. We should discriminate against such consecutive ranges // in jump tables. uint64_t Diff = (HighCase - LowCase).getLimitedValue((UINT64_MAX - 1) / 100); uint64_t Range = Diff + 1; uint64_t NumCases = TotalCases[Last] - (First == 0 ? 0 : TotalCases[First - 1]); assert(NumCases < UINT64_MAX / 100); assert(Range >= NumCases); return NumCases * 100 >= Range * Density; } static inline bool areJTsAllowed(const TargetLowering &TLI, const SwitchInst *SI) { const Function *Fn = SI->getParent()->getParent(); if (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true") return false; return TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other); } bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters, unsigned First, unsigned Last, const SwitchInst *SI, MachineBasicBlock *DefaultMBB, CaseCluster &JTCluster) { assert(First <= Last); auto Prob = BranchProbability::getZero(); unsigned NumCmps = 0; std::vector Table; DenseMap JTProbs; // Initialize probabilities in JTProbs. for (unsigned I = First; I <= Last; ++I) JTProbs[Clusters[I].MBB] = BranchProbability::getZero(); for (unsigned I = First; I <= Last; ++I) { assert(Clusters[I].Kind == CC_Range); Prob += Clusters[I].Prob; APInt Low = Clusters[I].Low->getValue(); APInt High = Clusters[I].High->getValue(); NumCmps += (Low == High) ? 1 : 2; if (I != First) { // Fill the gap between this and the previous cluster. APInt PreviousHigh = Clusters[I - 1].High->getValue(); assert(PreviousHigh.slt(Low)); uint64_t Gap = (Low - PreviousHigh).getLimitedValue() - 1; for (uint64_t J = 0; J < Gap; J++) Table.push_back(DefaultMBB); } uint64_t ClusterSize = (High - Low).getLimitedValue() + 1; for (uint64_t J = 0; J < ClusterSize; ++J) Table.push_back(Clusters[I].MBB); JTProbs[Clusters[I].MBB] += Clusters[I].Prob; } unsigned NumDests = JTProbs.size(); if (isSuitableForBitTests(NumDests, NumCmps, Clusters[First].Low->getValue(), Clusters[Last].High->getValue())) { // Clusters[First..Last] should be lowered as bit tests instead. return false; } // Create the MBB that will load from and jump through the table. // Note: We create it here, but it's not inserted into the function yet. MachineFunction *CurMF = FuncInfo.MF; MachineBasicBlock *JumpTableMBB = CurMF->CreateMachineBasicBlock(SI->getParent()); // Add successors. Note: use table order for determinism. SmallPtrSet Done; for (MachineBasicBlock *Succ : Table) { if (Done.count(Succ)) continue; addSuccessorWithProb(JumpTableMBB, Succ, JTProbs[Succ]); Done.insert(Succ); } JumpTableMBB->normalizeSuccProbs(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned JTI = CurMF->getOrCreateJumpTableInfo(TLI.getJumpTableEncoding()) ->createJumpTableIndex(Table); // Set up the jump table info. JumpTable JT(-1U, JTI, JumpTableMBB, nullptr); JumpTableHeader JTH(Clusters[First].Low->getValue(), Clusters[Last].High->getValue(), SI->getCondition(), nullptr, false); JTCases.emplace_back(std::move(JTH), std::move(JT)); JTCluster = CaseCluster::jumpTable(Clusters[First].Low, Clusters[Last].High, JTCases.size() - 1, Prob); return true; } void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, const SwitchInst *SI, MachineBasicBlock *DefaultMBB) { #ifndef NDEBUG // Clusters must be non-empty, sorted, and only contain Range clusters. assert(!Clusters.empty()); for (CaseCluster &C : Clusters) assert(C.Kind == CC_Range); for (unsigned i = 1, e = Clusters.size(); i < e; ++i) assert(Clusters[i - 1].High->getValue().slt(Clusters[i].Low->getValue())); #endif const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!areJTsAllowed(TLI, SI)) return; const int64_t N = Clusters.size(); const unsigned MinJumpTableSize = TLI.getMinimumJumpTableEntries(); // TotalCases[i]: Total nbr of cases in Clusters[0..i]. SmallVector TotalCases(N); for (unsigned i = 0; i < N; ++i) { APInt Hi = Clusters[i].High->getValue(); APInt Lo = Clusters[i].Low->getValue(); TotalCases[i] = (Hi - Lo).getLimitedValue() + 1; if (i != 0) TotalCases[i] += TotalCases[i - 1]; } unsigned MinDensity = JumpTableDensity; if (DefaultMBB->getParent()->getFunction()->optForSize()) MinDensity = OptsizeJumpTableDensity; if (N >= MinJumpTableSize && isDense(Clusters, &TotalCases[0], 0, N - 1, MinDensity)) { // Cheap case: the whole range might be suitable for jump table. CaseCluster JTCluster; if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) { Clusters[0] = JTCluster; Clusters.resize(1); return; } } // The algorithm below is not suitable for -O0. if (TM.getOptLevel() == CodeGenOpt::None) return; // Split Clusters into minimum number of dense partitions. The algorithm uses // the same idea as Kannan & Proebsting "Correction to 'Producing Good Code // for the Case Statement'" (1994), but builds the MinPartitions array in // reverse order to make it easier to reconstruct the partitions in ascending // order. In the choice between two optimal partitionings, it picks the one // which yields more jump tables. // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1]. SmallVector MinPartitions(N); // LastElement[i] is the last element of the partition starting at i. SmallVector LastElement(N); // NumTables[i]: nbr of >= MinJumpTableSize partitions from Clusters[i..N-1]. SmallVector NumTables(N); // Base case: There is only one way to partition Clusters[N-1]. MinPartitions[N - 1] = 1; LastElement[N - 1] = N - 1; assert(MinJumpTableSize > 1); NumTables[N - 1] = 0; // Note: loop indexes are signed to avoid underflow. for (int64_t i = N - 2; i >= 0; i--) { // Find optimal partitioning of Clusters[i..N-1]. // Baseline: Put Clusters[i] into a partition on its own. MinPartitions[i] = MinPartitions[i + 1] + 1; LastElement[i] = i; NumTables[i] = NumTables[i + 1]; // Search for a solution that results in fewer partitions. for (int64_t j = N - 1; j > i; j--) { // Try building a partition from Clusters[i..j]. if (isDense(Clusters, &TotalCases[0], i, j, MinDensity)) { unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]); bool IsTable = j - i + 1 >= MinJumpTableSize; unsigned Tables = IsTable + (j == N - 1 ? 0 : NumTables[j + 1]); // If this j leads to fewer partitions, or same number of partitions // with more lookup tables, it is a better partitioning. if (NumPartitions < MinPartitions[i] || (NumPartitions == MinPartitions[i] && Tables > NumTables[i])) { MinPartitions[i] = NumPartitions; LastElement[i] = j; NumTables[i] = Tables; } } } } // Iterate over the partitions, replacing some with jump tables in-place. unsigned DstIndex = 0; for (unsigned First = 0, Last; First < N; First = Last + 1) { Last = LastElement[First]; assert(Last >= First); assert(DstIndex <= First); unsigned NumClusters = Last - First + 1; CaseCluster JTCluster; if (NumClusters >= MinJumpTableSize && buildJumpTable(Clusters, First, Last, SI, DefaultMBB, JTCluster)) { Clusters[DstIndex++] = JTCluster; } else { for (unsigned I = First; I <= Last; ++I) std::memmove(&Clusters[DstIndex++], &Clusters[I], sizeof(Clusters[I])); } } Clusters.resize(DstIndex); } bool SelectionDAGBuilder::rangeFitsInWord(const APInt &Low, const APInt &High) { // FIXME: Using the pointer type doesn't seem ideal. uint64_t BW = DAG.getDataLayout().getPointerSizeInBits(); uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1; return Range <= BW; } bool SelectionDAGBuilder::isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, const APInt &Low, const APInt &High) { // FIXME: I don't think NumCmps is the correct metric: a single case and a // range of cases both require only one branch to lower. Just looking at the // number of clusters and destinations should be enough to decide whether to // build bit tests. // To lower a range with bit tests, the range must fit the bitwidth of a // machine word. if (!rangeFitsInWord(Low, High)) return false; // Decide whether it's profitable to lower this range with bit tests. Each // destination requires a bit test and branch, and there is an overall range // check branch. For a small number of clusters, separate comparisons might be // cheaper, and for many destinations, splitting the range might be better. return (NumDests == 1 && NumCmps >= 3) || (NumDests == 2 && NumCmps >= 5) || (NumDests == 3 && NumCmps >= 6); } bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, unsigned First, unsigned Last, const SwitchInst *SI, CaseCluster &BTCluster) { assert(First <= Last); if (First == Last) return false; BitVector Dests(FuncInfo.MF->getNumBlockIDs()); unsigned NumCmps = 0; for (int64_t I = First; I <= Last; ++I) { assert(Clusters[I].Kind == CC_Range); Dests.set(Clusters[I].MBB->getNumber()); NumCmps += (Clusters[I].Low == Clusters[I].High) ? 1 : 2; } unsigned NumDests = Dests.count(); APInt Low = Clusters[First].Low->getValue(); APInt High = Clusters[Last].High->getValue(); assert(Low.slt(High)); if (!isSuitableForBitTests(NumDests, NumCmps, Low, High)) return false; APInt LowBound; APInt CmpRange; const int BitWidth = DAG.getTargetLoweringInfo() .getPointerTy(DAG.getDataLayout()) .getSizeInBits(); assert(rangeFitsInWord(Low, High) && "Case range must fit in bit mask!"); // Check if the clusters cover a contiguous range such that no value in the // range will jump to the default statement. bool ContiguousRange = true; for (int64_t I = First + 1; I <= Last; ++I) { if (Clusters[I].Low->getValue() != Clusters[I - 1].High->getValue() + 1) { ContiguousRange = false; break; } } if (Low.isStrictlyPositive() && High.slt(BitWidth)) { // Optimize the case where all the case values fit in a word without having // to subtract minValue. In this case, we can optimize away the subtraction. LowBound = APInt::getNullValue(Low.getBitWidth()); CmpRange = High; ContiguousRange = false; } else { LowBound = Low; CmpRange = High - Low; } CaseBitsVector CBV; auto TotalProb = BranchProbability::getZero(); for (unsigned i = First; i <= Last; ++i) { // Find the CaseBits for this destination. unsigned j; for (j = 0; j < CBV.size(); ++j) if (CBV[j].BB == Clusters[i].MBB) break; if (j == CBV.size()) CBV.push_back( CaseBits(0, Clusters[i].MBB, 0, BranchProbability::getZero())); CaseBits *CB = &CBV[j]; // Update Mask, Bits and ExtraProb. uint64_t Lo = (Clusters[i].Low->getValue() - LowBound).getZExtValue(); uint64_t Hi = (Clusters[i].High->getValue() - LowBound).getZExtValue(); assert(Hi >= Lo && Hi < 64 && "Invalid bit case!"); CB->Mask |= (-1ULL >> (63 - (Hi - Lo))) << Lo; CB->Bits += Hi - Lo + 1; CB->ExtraProb += Clusters[i].Prob; TotalProb += Clusters[i].Prob; } BitTestInfo BTI; std::sort(CBV.begin(), CBV.end(), [](const CaseBits &a, const CaseBits &b) { // Sort by probability first, number of bits second. if (a.ExtraProb != b.ExtraProb) return a.ExtraProb > b.ExtraProb; return a.Bits > b.Bits; }); for (auto &CB : CBV) { MachineBasicBlock *BitTestBB = FuncInfo.MF->CreateMachineBasicBlock(SI->getParent()); BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraProb)); } BitTestCases.emplace_back(std::move(LowBound), std::move(CmpRange), SI->getCondition(), -1U, MVT::Other, false, ContiguousRange, nullptr, nullptr, std::move(BTI), TotalProb); BTCluster = CaseCluster::bitTests(Clusters[First].Low, Clusters[Last].High, BitTestCases.size() - 1, TotalProb); return true; } void SelectionDAGBuilder::findBitTestClusters(CaseClusterVector &Clusters, const SwitchInst *SI) { // Partition Clusters into as few subsets as possible, where each subset has a // range that fits in a machine word and has <= 3 unique destinations. #ifndef NDEBUG // Clusters must be sorted and contain Range or JumpTable clusters. assert(!Clusters.empty()); assert(Clusters[0].Kind == CC_Range || Clusters[0].Kind == CC_JumpTable); for (const CaseCluster &C : Clusters) assert(C.Kind == CC_Range || C.Kind == CC_JumpTable); for (unsigned i = 1; i < Clusters.size(); ++i) assert(Clusters[i-1].High->getValue().slt(Clusters[i].Low->getValue())); #endif // The algorithm below is not suitable for -O0. if (TM.getOptLevel() == CodeGenOpt::None) return; // If target does not have legal shift left, do not emit bit tests at all. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT PTy = TLI.getPointerTy(DAG.getDataLayout()); if (!TLI.isOperationLegal(ISD::SHL, PTy)) return; int BitWidth = PTy.getSizeInBits(); const int64_t N = Clusters.size(); // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1]. SmallVector MinPartitions(N); // LastElement[i] is the last element of the partition starting at i. SmallVector LastElement(N); // FIXME: This might not be the best algorithm for finding bit test clusters. // Base case: There is only one way to partition Clusters[N-1]. MinPartitions[N - 1] = 1; LastElement[N - 1] = N - 1; // Note: loop indexes are signed to avoid underflow. for (int64_t i = N - 2; i >= 0; --i) { // Find optimal partitioning of Clusters[i..N-1]. // Baseline: Put Clusters[i] into a partition on its own. MinPartitions[i] = MinPartitions[i + 1] + 1; LastElement[i] = i; // Search for a solution that results in fewer partitions. // Note: the search is limited by BitWidth, reducing time complexity. for (int64_t j = std::min(N - 1, i + BitWidth - 1); j > i; --j) { // Try building a partition from Clusters[i..j]. // Check the range. if (!rangeFitsInWord(Clusters[i].Low->getValue(), Clusters[j].High->getValue())) continue; // Check nbr of destinations and cluster types. // FIXME: This works, but doesn't seem very efficient. bool RangesOnly = true; BitVector Dests(FuncInfo.MF->getNumBlockIDs()); for (int64_t k = i; k <= j; k++) { if (Clusters[k].Kind != CC_Range) { RangesOnly = false; break; } Dests.set(Clusters[k].MBB->getNumber()); } if (!RangesOnly || Dests.count() > 3) break; // Check if it's a better partition. unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]); if (NumPartitions < MinPartitions[i]) { // Found a better partition. MinPartitions[i] = NumPartitions; LastElement[i] = j; } } } // Iterate over the partitions, replacing with bit-test clusters in-place. unsigned DstIndex = 0; for (unsigned First = 0, Last; First < N; First = Last + 1) { Last = LastElement[First]; assert(First <= Last); assert(DstIndex <= First); CaseCluster BitTestCluster; if (buildBitTests(Clusters, First, Last, SI, BitTestCluster)) { Clusters[DstIndex++] = BitTestCluster; } else { size_t NumClusters = Last - First + 1; std::memmove(&Clusters[DstIndex], &Clusters[First], sizeof(Clusters[0]) * NumClusters); DstIndex += NumClusters; } } Clusters.resize(DstIndex); } void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, MachineBasicBlock *SwitchMBB, MachineBasicBlock *DefaultMBB) { MachineFunction *CurMF = FuncInfo.MF; MachineBasicBlock *NextMBB = nullptr; MachineFunction::iterator BBI(W.MBB); if (++BBI != FuncInfo.MF->end()) NextMBB = &*BBI; unsigned Size = W.LastCluster - W.FirstCluster + 1; BranchProbabilityInfo *BPI = FuncInfo.BPI; if (Size == 2 && W.MBB == SwitchMBB) { // If any two of the cases has the same destination, and if one value // is the same as the other, but has one bit unset that the other has set, // use bit manipulation to do two compares at once. For example: // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)" // TODO: This could be extended to merge any 2 cases in switches with 3 // cases. // TODO: Handle cases where W.CaseBB != SwitchBB. CaseCluster &Small = *W.FirstCluster; CaseCluster &Big = *W.LastCluster; if (Small.Low == Small.High && Big.Low == Big.High && Small.MBB == Big.MBB) { const APInt &SmallValue = Small.Low->getValue(); const APInt &BigValue = Big.Low->getValue(); // Check that there is only one bit different. APInt CommonBit = BigValue ^ SmallValue; if (CommonBit.isPowerOf2()) { SDValue CondLHS = getValue(Cond); EVT VT = CondLHS.getValueType(); SDLoc DL = getCurSDLoc(); SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS, DAG.getConstant(CommonBit, DL, VT)); SDValue Cond = DAG.getSetCC( DL, MVT::i1, Or, DAG.getConstant(BigValue | SmallValue, DL, VT), ISD::SETEQ); // Update successor info. // Both Small and Big will jump to Small.BB, so we sum up the // probabilities. addSuccessorWithProb(SwitchMBB, Small.MBB, Small.Prob + Big.Prob); if (BPI) addSuccessorWithProb( SwitchMBB, DefaultMBB, // The default destination is the first successor in IR. BPI->getEdgeProbability(SwitchMBB->getBasicBlock(), (unsigned)0)); else addSuccessorWithProb(SwitchMBB, DefaultMBB); // Insert the true branch. SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other, getControlRoot(), Cond, DAG.getBasicBlock(Small.MBB)); // Insert the false branch. BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond, DAG.getBasicBlock(DefaultMBB)); DAG.setRoot(BrCond); return; } } } if (TM.getOptLevel() != CodeGenOpt::None) { // Order cases by probability so the most likely case will be checked first. std::sort(W.FirstCluster, W.LastCluster + 1, [](const CaseCluster &a, const CaseCluster &b) { return a.Prob > b.Prob; }); // Rearrange the case blocks so that the last one falls through if possible // without without changing the order of probabilities. for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster; ) { --I; if (I->Prob > W.LastCluster->Prob) break; if (I->Kind == CC_Range && I->MBB == NextMBB) { std::swap(*I, *W.LastCluster); break; } } } // Compute total probability. BranchProbability DefaultProb = W.DefaultProb; BranchProbability UnhandledProbs = DefaultProb; for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I) UnhandledProbs += I->Prob; MachineBasicBlock *CurMBB = W.MBB; for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) { MachineBasicBlock *Fallthrough; if (I == W.LastCluster) { // For the last cluster, fall through to the default destination. Fallthrough = DefaultMBB; } else { Fallthrough = CurMF->CreateMachineBasicBlock(CurMBB->getBasicBlock()); CurMF->insert(BBI, Fallthrough); // Put Cond in a virtual register to make it available from the new blocks. ExportFromCurrentBlock(Cond); } UnhandledProbs -= I->Prob; switch (I->Kind) { case CC_JumpTable: { // FIXME: Optimize away range check based on pivot comparisons. JumpTableHeader *JTH = &JTCases[I->JTCasesIndex].first; JumpTable *JT = &JTCases[I->JTCasesIndex].second; // The jump block hasn't been inserted yet; insert it here. MachineBasicBlock *JumpMBB = JT->MBB; CurMF->insert(BBI, JumpMBB); auto JumpProb = I->Prob; auto FallthroughProb = UnhandledProbs; // If the default statement is a target of the jump table, we evenly // distribute the default probability to successors of CurMBB. Also // update the probability on the edge from JumpMBB to Fallthrough. for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(), SE = JumpMBB->succ_end(); SI != SE; ++SI) { if (*SI == DefaultMBB) { JumpProb += DefaultProb / 2; FallthroughProb -= DefaultProb / 2; JumpMBB->setSuccProbability(SI, DefaultProb / 2); JumpMBB->normalizeSuccProbs(); break; } } addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb); addSuccessorWithProb(CurMBB, JumpMBB, JumpProb); CurMBB->normalizeSuccProbs(); // The jump table header will be inserted in our current block, do the // range check, and fall through to our fallthrough block. JTH->HeaderBB = CurMBB; JT->Default = Fallthrough; // FIXME: Move Default to JumpTableHeader. // If we're in the right place, emit the jump table header right now. if (CurMBB == SwitchMBB) { visitJumpTableHeader(*JT, *JTH, SwitchMBB); JTH->Emitted = true; } break; } case CC_BitTests: { // FIXME: Optimize away range check based on pivot comparisons. BitTestBlock *BTB = &BitTestCases[I->BTCasesIndex]; // The bit test blocks haven't been inserted yet; insert them here. for (BitTestCase &BTC : BTB->Cases) CurMF->insert(BBI, BTC.ThisBB); // Fill in fields of the BitTestBlock. BTB->Parent = CurMBB; BTB->Default = Fallthrough; BTB->DefaultProb = UnhandledProbs; // If the cases in bit test don't form a contiguous range, we evenly // distribute the probability on the edge to Fallthrough to two // successors of CurMBB. if (!BTB->ContiguousRange) { BTB->Prob += DefaultProb / 2; BTB->DefaultProb -= DefaultProb / 2; } // If we're in the right place, emit the bit test header right now. if (CurMBB == SwitchMBB) { visitBitTestHeader(*BTB, SwitchMBB); BTB->Emitted = true; } break; } case CC_Range: { const Value *RHS, *LHS, *MHS; ISD::CondCode CC; if (I->Low == I->High) { // Check Cond == I->Low. CC = ISD::SETEQ; LHS = Cond; RHS=I->Low; MHS = nullptr; } else { // Check I->Low <= Cond <= I->High. CC = ISD::SETLE; LHS = I->Low; MHS = Cond; RHS = I->High; } // The false probability is the sum of all unhandled cases. CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, I->Prob, UnhandledProbs); if (CurMBB == SwitchMBB) visitSwitchCase(CB, SwitchMBB); else SwitchCases.push_back(CB); break; } } CurMBB = Fallthrough; } } unsigned SelectionDAGBuilder::caseClusterRank(const CaseCluster &CC, CaseClusterIt First, CaseClusterIt Last) { return std::count_if(First, Last + 1, [&](const CaseCluster &X) { if (X.Prob != CC.Prob) return X.Prob > CC.Prob; // Ties are broken by comparing the case value. return X.Low->getValue().slt(CC.Low->getValue()); }); } void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, const SwitchWorkListItem &W, Value *Cond, MachineBasicBlock *SwitchMBB) { assert(W.FirstCluster->Low->getValue().slt(W.LastCluster->Low->getValue()) && "Clusters not sorted?"); assert(W.LastCluster - W.FirstCluster + 1 >= 2 && "Too small to split!"); // Balance the tree based on branch probabilities to create a near-optimal (in // terms of search time given key frequency) binary search tree. See e.g. Kurt // Mehlhorn "Nearly Optimal Binary Search Trees" (1975). CaseClusterIt LastLeft = W.FirstCluster; CaseClusterIt FirstRight = W.LastCluster; auto LeftProb = LastLeft->Prob + W.DefaultProb / 2; auto RightProb = FirstRight->Prob + W.DefaultProb / 2; // Move LastLeft and FirstRight towards each other from opposite directions to // find a partitioning of the clusters which balances the probability on both // sides. If LeftProb and RightProb are equal, alternate which side is // taken to ensure 0-probability nodes are distributed evenly. unsigned I = 0; while (LastLeft + 1 < FirstRight) { if (LeftProb < RightProb || (LeftProb == RightProb && (I & 1))) LeftProb += (++LastLeft)->Prob; else RightProb += (--FirstRight)->Prob; I++; } for (;;) { // Our binary search tree differs from a typical BST in that ours can have up // to three values in each leaf. The pivot selection above doesn't take that // into account, which means the tree might require more nodes and be less // efficient. We compensate for this here. unsigned NumLeft = LastLeft - W.FirstCluster + 1; unsigned NumRight = W.LastCluster - FirstRight + 1; if (std::min(NumLeft, NumRight) < 3 && std::max(NumLeft, NumRight) > 3) { // If one side has less than 3 clusters, and the other has more than 3, // consider taking a cluster from the other side. if (NumLeft < NumRight) { // Consider moving the first cluster on the right to the left side. CaseCluster &CC = *FirstRight; unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster); unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft); if (LeftSideRank <= RightSideRank) { // Moving the cluster to the left does not demote it. ++LastLeft; ++FirstRight; continue; } } else { assert(NumRight < NumLeft); // Consider moving the last element on the left to the right side. CaseCluster &CC = *LastLeft; unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft); unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster); if (RightSideRank <= LeftSideRank) { // Moving the cluster to the right does not demot it. --LastLeft; --FirstRight; continue; } } } break; } assert(LastLeft + 1 == FirstRight); assert(LastLeft >= W.FirstCluster); assert(FirstRight <= W.LastCluster); // Use the first element on the right as pivot since we will make less-than // comparisons against it. CaseClusterIt PivotCluster = FirstRight; assert(PivotCluster > W.FirstCluster); assert(PivotCluster <= W.LastCluster); CaseClusterIt FirstLeft = W.FirstCluster; CaseClusterIt LastRight = W.LastCluster; const ConstantInt *Pivot = PivotCluster->Low; // New blocks will be inserted immediately after the current one. MachineFunction::iterator BBI(W.MBB); ++BBI; // We will branch to the LHS if Value < Pivot. If LHS is a single cluster, // we can branch to its destination directly if it's squeezed exactly in // between the known lower bound and Pivot - 1. MachineBasicBlock *LeftMBB; if (FirstLeft == LastLeft && FirstLeft->Kind == CC_Range && FirstLeft->Low == W.GE && (FirstLeft->High->getValue() + 1LL) == Pivot->getValue()) { LeftMBB = FirstLeft->MBB; } else { LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock()); FuncInfo.MF->insert(BBI, LeftMBB); WorkList.push_back( {LeftMBB, FirstLeft, LastLeft, W.GE, Pivot, W.DefaultProb / 2}); // Put Cond in a virtual register to make it available from the new blocks. ExportFromCurrentBlock(Cond); } // Similarly, we will branch to the RHS if Value >= Pivot. If RHS is a // single cluster, RHS.Low == Pivot, and we can branch to its destination // directly if RHS.High equals the current upper bound. MachineBasicBlock *RightMBB; if (FirstRight == LastRight && FirstRight->Kind == CC_Range && W.LT && (FirstRight->High->getValue() + 1ULL) == W.LT->getValue()) { RightMBB = FirstRight->MBB; } else { RightMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock()); FuncInfo.MF->insert(BBI, RightMBB); WorkList.push_back( {RightMBB, FirstRight, LastRight, Pivot, W.LT, W.DefaultProb / 2}); // Put Cond in a virtual register to make it available from the new blocks. ExportFromCurrentBlock(Cond); } // Create the CaseBlock record that will be used to lower the branch. CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB, LeftProb, RightProb); if (W.MBB == SwitchMBB) visitSwitchCase(CB, SwitchMBB); else SwitchCases.push_back(CB); } void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { // Extract cases from the switch. BranchProbabilityInfo *BPI = FuncInfo.BPI; CaseClusterVector Clusters; Clusters.reserve(SI.getNumCases()); for (auto I : SI.cases()) { MachineBasicBlock *Succ = FuncInfo.MBBMap[I.getCaseSuccessor()]; const ConstantInt *CaseVal = I.getCaseValue(); BranchProbability Prob = BPI ? BPI->getEdgeProbability(SI.getParent(), I.getSuccessorIndex()) : BranchProbability(1, SI.getNumCases() + 1); Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Prob)); } MachineBasicBlock *DefaultMBB = FuncInfo.MBBMap[SI.getDefaultDest()]; // Cluster adjacent cases with the same destination. We do this at all // optimization levels because it's cheap to do and will make codegen faster // if there are many clusters. sortAndRangeify(Clusters); if (TM.getOptLevel() != CodeGenOpt::None) { // Replace an unreachable default with the most popular destination. // FIXME: Exploit unreachable default more aggressively. bool UnreachableDefault = isa(SI.getDefaultDest()->getFirstNonPHIOrDbg()); if (UnreachableDefault && !Clusters.empty()) { DenseMap Popularity; unsigned MaxPop = 0; const BasicBlock *MaxBB = nullptr; for (auto I : SI.cases()) { const BasicBlock *BB = I.getCaseSuccessor(); if (++Popularity[BB] > MaxPop) { MaxPop = Popularity[BB]; MaxBB = BB; } } // Set new default. assert(MaxPop > 0 && MaxBB); DefaultMBB = FuncInfo.MBBMap[MaxBB]; // Remove cases that were pointing to the destination that is now the // default. CaseClusterVector New; New.reserve(Clusters.size()); for (CaseCluster &CC : Clusters) { if (CC.MBB != DefaultMBB) New.push_back(CC); } Clusters = std::move(New); } } // If there is only the default destination, jump there directly. MachineBasicBlock *SwitchMBB = FuncInfo.MBB; if (Clusters.empty()) { SwitchMBB->addSuccessor(DefaultMBB); if (DefaultMBB != NextBlock(SwitchMBB)) { DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(), DAG.getBasicBlock(DefaultMBB))); } return; } findJumpTables(Clusters, &SI, DefaultMBB); findBitTestClusters(Clusters, &SI); DEBUG({ dbgs() << "Case clusters: "; for (const CaseCluster &C : Clusters) { if (C.Kind == CC_JumpTable) dbgs() << "JT:"; if (C.Kind == CC_BitTests) dbgs() << "BT:"; C.Low->getValue().print(dbgs(), true); if (C.Low != C.High) { dbgs() << '-'; C.High->getValue().print(dbgs(), true); } dbgs() << ' '; } dbgs() << '\n'; }); assert(!Clusters.empty()); SwitchWorkList WorkList; CaseClusterIt First = Clusters.begin(); CaseClusterIt Last = Clusters.end() - 1; auto DefaultProb = getEdgeProbability(SwitchMBB, DefaultMBB); WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultProb}); while (!WorkList.empty()) { SwitchWorkListItem W = WorkList.back(); WorkList.pop_back(); unsigned NumClusters = W.LastCluster - W.FirstCluster + 1; if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None) { // For optimized builds, lower large range as a balanced binary tree. splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB); continue; } lowerWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB); } } Index: projects/clang390-import/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp =================================================================== --- projects/clang390-import/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp (revision 305682) +++ projects/clang390-import/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp (revision 305683) @@ -1,719 +1,720 @@ //===-- SelectionDAGDumper.cpp - Implement SelectionDAG::dump() -----------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This implements the SelectionDAG::dump method and friends. // //===----------------------------------------------------------------------===// #include "llvm/CodeGen/SelectionDAG.h" #include "ScheduleDAGSDNodes.h" #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/Intrinsics.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/Printable.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; static cl::opt VerboseDAGDumping("dag-dump-verbose", cl::Hidden, cl::desc("Display more information when dumping selection " "DAG nodes.")); std::string SDNode::getOperationName(const SelectionDAG *G) const { switch (getOpcode()) { default: if (getOpcode() < ISD::BUILTIN_OP_END) return "<>"; if (isMachineOpcode()) { if (G) if (const TargetInstrInfo *TII = G->getSubtarget().getInstrInfo()) if (getMachineOpcode() < TII->getNumOpcodes()) return TII->getName(getMachineOpcode()); return "<>"; } if (G) { const TargetLowering &TLI = G->getTargetLoweringInfo(); const char *Name = TLI.getTargetNodeName(getOpcode()); if (Name) return Name; return "<>"; } return "<>"; #ifndef NDEBUG case ISD::DELETED_NODE: return "<>"; #endif case ISD::PREFETCH: return "Prefetch"; case ISD::ATOMIC_FENCE: return "AtomicFence"; case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap"; case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: return "AtomicCmpSwapWithSuccess"; case ISD::ATOMIC_SWAP: return "AtomicSwap"; case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd"; case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub"; case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd"; case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr"; case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor"; case ISD::ATOMIC_LOAD_NAND: return "AtomicLoadNand"; case ISD::ATOMIC_LOAD_MIN: return "AtomicLoadMin"; case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax"; case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin"; case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax"; case ISD::ATOMIC_LOAD: return "AtomicLoad"; case ISD::ATOMIC_STORE: return "AtomicStore"; case ISD::PCMARKER: return "PCMarker"; case ISD::READCYCLECOUNTER: return "ReadCycleCounter"; case ISD::SRCVALUE: return "SrcValue"; case ISD::MDNODE_SDNODE: return "MDNode"; case ISD::EntryToken: return "EntryToken"; case ISD::TokenFactor: return "TokenFactor"; case ISD::AssertSext: return "AssertSext"; case ISD::AssertZext: return "AssertZext"; case ISD::BasicBlock: return "BasicBlock"; case ISD::VALUETYPE: return "ValueType"; case ISD::Register: return "Register"; case ISD::RegisterMask: return "RegisterMask"; case ISD::Constant: if (cast(this)->isOpaque()) return "OpaqueConstant"; return "Constant"; case ISD::ConstantFP: return "ConstantFP"; case ISD::GlobalAddress: return "GlobalAddress"; case ISD::GlobalTLSAddress: return "GlobalTLSAddress"; case ISD::FrameIndex: return "FrameIndex"; case ISD::JumpTable: return "JumpTable"; case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE"; case ISD::RETURNADDR: return "RETURNADDR"; case ISD::FRAMEADDR: return "FRAMEADDR"; case ISD::LOCAL_RECOVER: return "LOCAL_RECOVER"; case ISD::READ_REGISTER: return "READ_REGISTER"; case ISD::WRITE_REGISTER: return "WRITE_REGISTER"; case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET"; + case ISD::EH_DWARF_CFA: return "EH_DWARF_CFA"; case ISD::EH_RETURN: return "EH_RETURN"; case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP"; case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP"; case ISD::EH_SJLJ_SETUP_DISPATCH: return "EH_SJLJ_SETUP_DISPATCH"; case ISD::ConstantPool: return "ConstantPool"; case ISD::TargetIndex: return "TargetIndex"; case ISD::ExternalSymbol: return "ExternalSymbol"; case ISD::BlockAddress: return "BlockAddress"; case ISD::INTRINSIC_WO_CHAIN: case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: { unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1; unsigned IID = cast(getOperand(OpNo))->getZExtValue(); if (IID < Intrinsic::num_intrinsics) return Intrinsic::getName((Intrinsic::ID)IID); else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo()) return TII->getName(IID); llvm_unreachable("Invalid intrinsic ID"); } case ISD::BUILD_VECTOR: return "BUILD_VECTOR"; case ISD::TargetConstant: if (cast(this)->isOpaque()) return "OpaqueTargetConstant"; return "TargetConstant"; case ISD::TargetConstantFP: return "TargetConstantFP"; case ISD::TargetGlobalAddress: return "TargetGlobalAddress"; case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress"; case ISD::TargetFrameIndex: return "TargetFrameIndex"; case ISD::TargetJumpTable: return "TargetJumpTable"; case ISD::TargetConstantPool: return "TargetConstantPool"; case ISD::TargetExternalSymbol: return "TargetExternalSymbol"; case ISD::MCSymbol: return "MCSymbol"; case ISD::TargetBlockAddress: return "TargetBlockAddress"; case ISD::CopyToReg: return "CopyToReg"; case ISD::CopyFromReg: return "CopyFromReg"; case ISD::UNDEF: return "undef"; case ISD::MERGE_VALUES: return "merge_values"; case ISD::INLINEASM: return "inlineasm"; case ISD::EH_LABEL: return "eh_label"; case ISD::HANDLENODE: return "handlenode"; // Unary operators case ISD::FABS: return "fabs"; case ISD::FMINNUM: return "fminnum"; case ISD::FMAXNUM: return "fmaxnum"; case ISD::FMINNAN: return "fminnan"; case ISD::FMAXNAN: return "fmaxnan"; case ISD::FNEG: return "fneg"; case ISD::FSQRT: return "fsqrt"; case ISD::FSIN: return "fsin"; case ISD::FCOS: return "fcos"; case ISD::FSINCOS: return "fsincos"; case ISD::FTRUNC: return "ftrunc"; case ISD::FFLOOR: return "ffloor"; case ISD::FCEIL: return "fceil"; case ISD::FRINT: return "frint"; case ISD::FNEARBYINT: return "fnearbyint"; case ISD::FROUND: return "fround"; case ISD::FEXP: return "fexp"; case ISD::FEXP2: return "fexp2"; case ISD::FLOG: return "flog"; case ISD::FLOG2: return "flog2"; case ISD::FLOG10: return "flog10"; // Binary operators case ISD::ADD: return "add"; case ISD::SUB: return "sub"; case ISD::MUL: return "mul"; case ISD::MULHU: return "mulhu"; case ISD::MULHS: return "mulhs"; case ISD::SDIV: return "sdiv"; case ISD::UDIV: return "udiv"; case ISD::SREM: return "srem"; case ISD::UREM: return "urem"; case ISD::SMUL_LOHI: return "smul_lohi"; case ISD::UMUL_LOHI: return "umul_lohi"; case ISD::SDIVREM: return "sdivrem"; case ISD::UDIVREM: return "udivrem"; case ISD::AND: return "and"; case ISD::OR: return "or"; case ISD::XOR: return "xor"; case ISD::SHL: return "shl"; case ISD::SRA: return "sra"; case ISD::SRL: return "srl"; case ISD::ROTL: return "rotl"; case ISD::ROTR: return "rotr"; case ISD::FADD: return "fadd"; case ISD::FSUB: return "fsub"; case ISD::FMUL: return "fmul"; case ISD::FDIV: return "fdiv"; case ISD::FMA: return "fma"; case ISD::FMAD: return "fmad"; case ISD::FREM: return "frem"; case ISD::FCOPYSIGN: return "fcopysign"; case ISD::FGETSIGN: return "fgetsign"; case ISD::FCANONICALIZE: return "fcanonicalize"; case ISD::FPOW: return "fpow"; case ISD::SMIN: return "smin"; case ISD::SMAX: return "smax"; case ISD::UMIN: return "umin"; case ISD::UMAX: return "umax"; case ISD::FPOWI: return "fpowi"; case ISD::SETCC: return "setcc"; case ISD::SETCCE: return "setcce"; case ISD::SELECT: return "select"; case ISD::VSELECT: return "vselect"; case ISD::SELECT_CC: return "select_cc"; case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt"; case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt"; case ISD::CONCAT_VECTORS: return "concat_vectors"; case ISD::INSERT_SUBVECTOR: return "insert_subvector"; case ISD::EXTRACT_SUBVECTOR: return "extract_subvector"; case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector"; case ISD::VECTOR_SHUFFLE: return "vector_shuffle"; case ISD::CARRY_FALSE: return "carry_false"; case ISD::ADDC: return "addc"; case ISD::ADDE: return "adde"; case ISD::SADDO: return "saddo"; case ISD::UADDO: return "uaddo"; case ISD::SSUBO: return "ssubo"; case ISD::USUBO: return "usubo"; case ISD::SMULO: return "smulo"; case ISD::UMULO: return "umulo"; case ISD::SUBC: return "subc"; case ISD::SUBE: return "sube"; case ISD::SHL_PARTS: return "shl_parts"; case ISD::SRA_PARTS: return "sra_parts"; case ISD::SRL_PARTS: return "srl_parts"; // Conversion operators. case ISD::SIGN_EXTEND: return "sign_extend"; case ISD::ZERO_EXTEND: return "zero_extend"; case ISD::ANY_EXTEND: return "any_extend"; case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg"; case ISD::ANY_EXTEND_VECTOR_INREG: return "any_extend_vector_inreg"; case ISD::SIGN_EXTEND_VECTOR_INREG: return "sign_extend_vector_inreg"; case ISD::ZERO_EXTEND_VECTOR_INREG: return "zero_extend_vector_inreg"; case ISD::TRUNCATE: return "truncate"; case ISD::FP_ROUND: return "fp_round"; case ISD::FLT_ROUNDS_: return "flt_rounds"; case ISD::FP_ROUND_INREG: return "fp_round_inreg"; case ISD::FP_EXTEND: return "fp_extend"; case ISD::SINT_TO_FP: return "sint_to_fp"; case ISD::UINT_TO_FP: return "uint_to_fp"; case ISD::FP_TO_SINT: return "fp_to_sint"; case ISD::FP_TO_UINT: return "fp_to_uint"; case ISD::BITCAST: return "bitcast"; case ISD::ADDRSPACECAST: return "addrspacecast"; case ISD::FP16_TO_FP: return "fp16_to_fp"; case ISD::FP_TO_FP16: return "fp_to_fp16"; case ISD::CONVERT_RNDSAT: { switch (cast(this)->getCvtCode()) { default: llvm_unreachable("Unknown cvt code!"); case ISD::CVT_FF: return "cvt_ff"; case ISD::CVT_FS: return "cvt_fs"; case ISD::CVT_FU: return "cvt_fu"; case ISD::CVT_SF: return "cvt_sf"; case ISD::CVT_UF: return "cvt_uf"; case ISD::CVT_SS: return "cvt_ss"; case ISD::CVT_SU: return "cvt_su"; case ISD::CVT_US: return "cvt_us"; case ISD::CVT_UU: return "cvt_uu"; } } // Control flow instructions case ISD::BR: return "br"; case ISD::BRIND: return "brind"; case ISD::BR_JT: return "br_jt"; case ISD::BRCOND: return "brcond"; case ISD::BR_CC: return "br_cc"; case ISD::CALLSEQ_START: return "callseq_start"; case ISD::CALLSEQ_END: return "callseq_end"; // EH instructions case ISD::CATCHRET: return "catchret"; case ISD::CLEANUPRET: return "cleanupret"; // Other operators case ISD::LOAD: return "load"; case ISD::STORE: return "store"; case ISD::MLOAD: return "masked_load"; case ISD::MSTORE: return "masked_store"; case ISD::MGATHER: return "masked_gather"; case ISD::MSCATTER: return "masked_scatter"; case ISD::VAARG: return "vaarg"; case ISD::VACOPY: return "vacopy"; case ISD::VAEND: return "vaend"; case ISD::VASTART: return "vastart"; case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc"; case ISD::EXTRACT_ELEMENT: return "extract_element"; case ISD::BUILD_PAIR: return "build_pair"; case ISD::STACKSAVE: return "stacksave"; case ISD::STACKRESTORE: return "stackrestore"; case ISD::TRAP: return "trap"; case ISD::DEBUGTRAP: return "debugtrap"; case ISD::LIFETIME_START: return "lifetime.start"; case ISD::LIFETIME_END: return "lifetime.end"; case ISD::GC_TRANSITION_START: return "gc_transition.start"; case ISD::GC_TRANSITION_END: return "gc_transition.end"; case ISD::GET_DYNAMIC_AREA_OFFSET: return "get.dynamic.area.offset"; // Bit manipulation case ISD::BITREVERSE: return "bitreverse"; case ISD::BSWAP: return "bswap"; case ISD::CTPOP: return "ctpop"; case ISD::CTTZ: return "cttz"; case ISD::CTTZ_ZERO_UNDEF: return "cttz_zero_undef"; case ISD::CTLZ: return "ctlz"; case ISD::CTLZ_ZERO_UNDEF: return "ctlz_zero_undef"; // Trampolines case ISD::INIT_TRAMPOLINE: return "init_trampoline"; case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline"; case ISD::CONDCODE: switch (cast(this)->get()) { default: llvm_unreachable("Unknown setcc condition!"); case ISD::SETOEQ: return "setoeq"; case ISD::SETOGT: return "setogt"; case ISD::SETOGE: return "setoge"; case ISD::SETOLT: return "setolt"; case ISD::SETOLE: return "setole"; case ISD::SETONE: return "setone"; case ISD::SETO: return "seto"; case ISD::SETUO: return "setuo"; case ISD::SETUEQ: return "setueq"; case ISD::SETUGT: return "setugt"; case ISD::SETUGE: return "setuge"; case ISD::SETULT: return "setult"; case ISD::SETULE: return "setule"; case ISD::SETUNE: return "setune"; case ISD::SETEQ: return "seteq"; case ISD::SETGT: return "setgt"; case ISD::SETGE: return "setge"; case ISD::SETLT: return "setlt"; case ISD::SETLE: return "setle"; case ISD::SETNE: return "setne"; case ISD::SETTRUE: return "settrue"; case ISD::SETTRUE2: return "settrue2"; case ISD::SETFALSE: return "setfalse"; case ISD::SETFALSE2: return "setfalse2"; } } } const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) { switch (AM) { default: return ""; case ISD::PRE_INC: return ""; case ISD::PRE_DEC: return ""; case ISD::POST_INC: return ""; case ISD::POST_DEC: return ""; } } static Printable PrintNodeId(const SDNode &Node) { return Printable([&Node](raw_ostream &OS) { #ifndef NDEBUG OS << 't' << Node.PersistentId; #else OS << (const void*)&Node; #endif }); } LLVM_DUMP_METHOD void SDNode::dump() const { dump(nullptr); } void SDNode::dump(const SelectionDAG *G) const { print(dbgs(), G); dbgs() << '\n'; } void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const { for (unsigned i = 0, e = getNumValues(); i != e; ++i) { if (i) OS << ","; if (getValueType(i) == MVT::Other) OS << "ch"; else OS << getValueType(i).getEVTString(); } } void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { if (const MachineSDNode *MN = dyn_cast(this)) { if (!MN->memoperands_empty()) { OS << "<"; OS << "Mem:"; for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(), e = MN->memoperands_end(); i != e; ++i) { OS << **i; if (std::next(i) != e) OS << " "; } OS << ">"; } } else if (const ShuffleVectorSDNode *SVN = dyn_cast(this)) { OS << "<"; for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) { int Idx = SVN->getMaskElt(i); if (i) OS << ","; if (Idx < 0) OS << "u"; else OS << Idx; } OS << ">"; } else if (const ConstantSDNode *CSDN = dyn_cast(this)) { OS << '<' << CSDN->getAPIntValue() << '>'; } else if (const ConstantFPSDNode *CSDN = dyn_cast(this)) { if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle) OS << '<' << CSDN->getValueAPF().convertToFloat() << '>'; else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble) OS << '<' << CSDN->getValueAPF().convertToDouble() << '>'; else { OS << "getValueAPF().bitcastToAPInt().dump(); OS << ")>"; } } else if (const GlobalAddressSDNode *GADN = dyn_cast(this)) { int64_t offset = GADN->getOffset(); OS << '<'; GADN->getGlobal()->printAsOperand(OS); OS << '>'; if (offset > 0) OS << " + " << offset; else OS << " " << offset; if (unsigned int TF = GADN->getTargetFlags()) OS << " [TF=" << TF << ']'; } else if (const FrameIndexSDNode *FIDN = dyn_cast(this)) { OS << "<" << FIDN->getIndex() << ">"; } else if (const JumpTableSDNode *JTDN = dyn_cast(this)) { OS << "<" << JTDN->getIndex() << ">"; if (unsigned int TF = JTDN->getTargetFlags()) OS << " [TF=" << TF << ']'; } else if (const ConstantPoolSDNode *CP = dyn_cast(this)){ int offset = CP->getOffset(); if (CP->isMachineConstantPoolEntry()) OS << "<" << *CP->getMachineCPVal() << ">"; else OS << "<" << *CP->getConstVal() << ">"; if (offset > 0) OS << " + " << offset; else OS << " " << offset; if (unsigned int TF = CP->getTargetFlags()) OS << " [TF=" << TF << ']'; } else if (const TargetIndexSDNode *TI = dyn_cast(this)) { OS << "<" << TI->getIndex() << '+' << TI->getOffset() << ">"; if (unsigned TF = TI->getTargetFlags()) OS << " [TF=" << TF << ']'; } else if (const BasicBlockSDNode *BBDN = dyn_cast(this)) { OS << "<"; const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock(); if (LBB) OS << LBB->getName() << " "; OS << (const void*)BBDN->getBasicBlock() << ">"; } else if (const RegisterSDNode *R = dyn_cast(this)) { OS << ' ' << PrintReg(R->getReg(), G ? G->getSubtarget().getRegisterInfo() : nullptr); } else if (const ExternalSymbolSDNode *ES = dyn_cast(this)) { OS << "'" << ES->getSymbol() << "'"; if (unsigned int TF = ES->getTargetFlags()) OS << " [TF=" << TF << ']'; } else if (const SrcValueSDNode *M = dyn_cast(this)) { if (M->getValue()) OS << "<" << M->getValue() << ">"; else OS << ""; } else if (const MDNodeSDNode *MD = dyn_cast(this)) { if (MD->getMD()) OS << "<" << MD->getMD() << ">"; else OS << ""; } else if (const VTSDNode *N = dyn_cast(this)) { OS << ":" << N->getVT().getEVTString(); } else if (const LoadSDNode *LD = dyn_cast(this)) { OS << "<" << *LD->getMemOperand(); bool doExt = true; switch (LD->getExtensionType()) { default: doExt = false; break; case ISD::EXTLOAD: OS << ", anyext"; break; case ISD::SEXTLOAD: OS << ", sext"; break; case ISD::ZEXTLOAD: OS << ", zext"; break; } if (doExt) OS << " from " << LD->getMemoryVT().getEVTString(); const char *AM = getIndexedModeName(LD->getAddressingMode()); if (*AM) OS << ", " << AM; OS << ">"; } else if (const StoreSDNode *ST = dyn_cast(this)) { OS << "<" << *ST->getMemOperand(); if (ST->isTruncatingStore()) OS << ", trunc to " << ST->getMemoryVT().getEVTString(); const char *AM = getIndexedModeName(ST->getAddressingMode()); if (*AM) OS << ", " << AM; OS << ">"; } else if (const MemSDNode* M = dyn_cast(this)) { OS << "<" << *M->getMemOperand() << ">"; } else if (const BlockAddressSDNode *BA = dyn_cast(this)) { int64_t offset = BA->getOffset(); OS << "<"; BA->getBlockAddress()->getFunction()->printAsOperand(OS, false); OS << ", "; BA->getBlockAddress()->getBasicBlock()->printAsOperand(OS, false); OS << ">"; if (offset > 0) OS << " + " << offset; else OS << " " << offset; if (unsigned int TF = BA->getTargetFlags()) OS << " [TF=" << TF << ']'; } else if (const AddrSpaceCastSDNode *ASC = dyn_cast(this)) { OS << '[' << ASC->getSrcAddressSpace() << " -> " << ASC->getDestAddressSpace() << ']'; } if (VerboseDAGDumping) { if (unsigned Order = getIROrder()) OS << " [ORD=" << Order << ']'; if (getNodeId() != -1) OS << " [ID=" << getNodeId() << ']'; if (!G) return; DILocation *L = getDebugLoc(); if (!L) return; if (auto *Scope = L->getScope()) OS << Scope->getFilename(); else OS << ""; OS << ':' << L->getLine(); if (unsigned C = L->getColumn()) OS << ':' << C; } } /// Return true if this node is so simple that we should just print it inline /// if it appears as an operand. static bool shouldPrintInline(const SDNode &Node) { if (Node.getOpcode() == ISD::EntryToken) return false; return Node.getNumOperands() == 0; } static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) { for (const SDValue &Op : N->op_values()) { if (shouldPrintInline(*Op.getNode())) continue; if (Op.getNode()->hasOneUse()) DumpNodes(Op.getNode(), indent+2, G); } dbgs().indent(indent); N->dump(G); } LLVM_DUMP_METHOD void SelectionDAG::dump() const { dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:\n"; for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end(); I != E; ++I) { const SDNode *N = &*I; if (!N->hasOneUse() && N != getRoot().getNode() && (!shouldPrintInline(*N) || N->use_empty())) DumpNodes(N, 2, this); } if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this); dbgs() << "\n\n"; } void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const { OS << PrintNodeId(*this) << ": "; print_types(OS, G); OS << " = " << getOperationName(G); print_details(OS, G); } static bool printOperand(raw_ostream &OS, const SelectionDAG *G, const SDValue Value) { if (!Value.getNode()) { OS << ""; return false; } else if (shouldPrintInline(*Value.getNode())) { OS << Value->getOperationName(G) << ':'; Value->print_types(OS, G); Value->print_details(OS, G); return true; } else { OS << PrintNodeId(*Value.getNode()); if (unsigned RN = Value.getResNo()) OS << ':' << RN; return false; } } typedef SmallPtrSet VisitedSDNodeSet; static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent, const SelectionDAG *G, VisitedSDNodeSet &once) { if (!once.insert(N).second) // If we've been here before, return now. return; // Dump the current SDNode, but don't end the line yet. OS.indent(indent); N->printr(OS, G); // Having printed this SDNode, walk the children: for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { if (i) OS << ","; OS << " "; const SDValue Op = N->getOperand(i); bool printedInline = printOperand(OS, G, Op); if (printedInline) once.insert(Op.getNode()); } OS << "\n"; // Dump children that have grandchildren on their own line(s). for (const SDValue &Op : N->op_values()) DumpNodesr(OS, Op.getNode(), indent+2, G, once); } void SDNode::dumpr() const { VisitedSDNodeSet once; DumpNodesr(dbgs(), this, 0, nullptr, once); } void SDNode::dumpr(const SelectionDAG *G) const { VisitedSDNodeSet once; DumpNodesr(dbgs(), this, 0, G, once); } static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N, const SelectionDAG *G, unsigned depth, unsigned indent) { if (depth == 0) return; OS.indent(indent); N->print(OS, G); if (depth < 1) return; for (const SDValue &Op : N->op_values()) { // Don't follow chain operands. if (Op.getValueType() == MVT::Other) continue; OS << '\n'; printrWithDepthHelper(OS, Op.getNode(), G, depth-1, indent+2); } } void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G, unsigned depth) const { printrWithDepthHelper(OS, this, G, depth, 0); } void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const { // Don't print impossibly deep things. printrWithDepth(OS, G, 10); } void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const { printrWithDepth(dbgs(), G, depth); } void SDNode::dumprFull(const SelectionDAG *G) const { // Don't print impossibly deep things. dumprWithDepth(G, 10); } void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const { printr(OS, G); for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { if (i) OS << ", "; else OS << " "; printOperand(OS, G, getOperand(i)); } } Index: projects/clang390-import/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp =================================================================== --- projects/clang390-import/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp (revision 305682) +++ projects/clang390-import/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp (revision 305683) @@ -1,4047 +1,4036 @@ //===-- MipsISelLowering.cpp - Mips DAG Lowering Implementation -----------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file defines the interfaces that Mips uses to lower LLVM code into a // selection DAG. // //===----------------------------------------------------------------------===// #include "MipsISelLowering.h" #include "InstPrinter/MipsInstPrinter.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "MipsCCState.h" #include "MipsMachineFunction.h" #include "MipsSubtarget.h" #include "MipsTargetMachine.h" #include "MipsTargetObjectFile.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include using namespace llvm; #define DEBUG_TYPE "mips-lower" STATISTIC(NumTailCalls, "Number of tail calls"); static cl::opt LargeGOT("mxgot", cl::Hidden, cl::desc("MIPS: Enable GOT larger than 64k."), cl::init(false)); static cl::opt NoZeroDivCheck("mno-check-zero-division", cl::Hidden, cl::desc("MIPS: Don't trap on integer division by zero."), cl::init(false)); static const MCPhysReg Mips64DPRegs[8] = { Mips::D12_64, Mips::D13_64, Mips::D14_64, Mips::D15_64, Mips::D16_64, Mips::D17_64, Mips::D18_64, Mips::D19_64 }; // If I is a shifted mask, set the size (Size) and the first bit of the // mask (Pos), and return true. // For example, if I is 0x003ff800, (Pos, Size) = (11, 11). static bool isShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) { if (!isShiftedMask_64(I)) return false; Size = countPopulation(I); Pos = countTrailingZeros(I); return true; } SDValue MipsTargetLowering::getGlobalReg(SelectionDAG &DAG, EVT Ty) const { MipsFunctionInfo *FI = DAG.getMachineFunction().getInfo(); return DAG.getRegister(FI->getGlobalBaseReg(), Ty); } SDValue MipsTargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG, unsigned Flag) const { return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty, 0, Flag); } SDValue MipsTargetLowering::getTargetNode(ExternalSymbolSDNode *N, EVT Ty, SelectionDAG &DAG, unsigned Flag) const { return DAG.getTargetExternalSymbol(N->getSymbol(), Ty, Flag); } SDValue MipsTargetLowering::getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG, unsigned Flag) const { return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag); } SDValue MipsTargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG, unsigned Flag) const { return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag); } SDValue MipsTargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG, unsigned Flag) const { return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlignment(), N->getOffset(), Flag); } const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { switch ((MipsISD::NodeType)Opcode) { case MipsISD::FIRST_NUMBER: break; case MipsISD::JmpLink: return "MipsISD::JmpLink"; case MipsISD::TailCall: return "MipsISD::TailCall"; case MipsISD::Hi: return "MipsISD::Hi"; case MipsISD::Lo: return "MipsISD::Lo"; case MipsISD::GPRel: return "MipsISD::GPRel"; case MipsISD::ThreadPointer: return "MipsISD::ThreadPointer"; case MipsISD::Ret: return "MipsISD::Ret"; case MipsISD::ERet: return "MipsISD::ERet"; case MipsISD::EH_RETURN: return "MipsISD::EH_RETURN"; case MipsISD::FPBrcond: return "MipsISD::FPBrcond"; case MipsISD::FPCmp: return "MipsISD::FPCmp"; case MipsISD::CMovFP_T: return "MipsISD::CMovFP_T"; case MipsISD::CMovFP_F: return "MipsISD::CMovFP_F"; case MipsISD::TruncIntFP: return "MipsISD::TruncIntFP"; case MipsISD::MFHI: return "MipsISD::MFHI"; case MipsISD::MFLO: return "MipsISD::MFLO"; case MipsISD::MTLOHI: return "MipsISD::MTLOHI"; case MipsISD::Mult: return "MipsISD::Mult"; case MipsISD::Multu: return "MipsISD::Multu"; case MipsISD::MAdd: return "MipsISD::MAdd"; case MipsISD::MAddu: return "MipsISD::MAddu"; case MipsISD::MSub: return "MipsISD::MSub"; case MipsISD::MSubu: return "MipsISD::MSubu"; case MipsISD::DivRem: return "MipsISD::DivRem"; case MipsISD::DivRemU: return "MipsISD::DivRemU"; case MipsISD::DivRem16: return "MipsISD::DivRem16"; case MipsISD::DivRemU16: return "MipsISD::DivRemU16"; case MipsISD::BuildPairF64: return "MipsISD::BuildPairF64"; case MipsISD::ExtractElementF64: return "MipsISD::ExtractElementF64"; case MipsISD::Wrapper: return "MipsISD::Wrapper"; case MipsISD::DynAlloc: return "MipsISD::DynAlloc"; case MipsISD::Sync: return "MipsISD::Sync"; case MipsISD::Ext: return "MipsISD::Ext"; case MipsISD::Ins: return "MipsISD::Ins"; case MipsISD::LWL: return "MipsISD::LWL"; case MipsISD::LWR: return "MipsISD::LWR"; case MipsISD::SWL: return "MipsISD::SWL"; case MipsISD::SWR: return "MipsISD::SWR"; case MipsISD::LDL: return "MipsISD::LDL"; case MipsISD::LDR: return "MipsISD::LDR"; case MipsISD::SDL: return "MipsISD::SDL"; case MipsISD::SDR: return "MipsISD::SDR"; case MipsISD::EXTP: return "MipsISD::EXTP"; case MipsISD::EXTPDP: return "MipsISD::EXTPDP"; case MipsISD::EXTR_S_H: return "MipsISD::EXTR_S_H"; case MipsISD::EXTR_W: return "MipsISD::EXTR_W"; case MipsISD::EXTR_R_W: return "MipsISD::EXTR_R_W"; case MipsISD::EXTR_RS_W: return "MipsISD::EXTR_RS_W"; case MipsISD::SHILO: return "MipsISD::SHILO"; case MipsISD::MTHLIP: return "MipsISD::MTHLIP"; case MipsISD::MULSAQ_S_W_PH: return "MipsISD::MULSAQ_S_W_PH"; case MipsISD::MAQ_S_W_PHL: return "MipsISD::MAQ_S_W_PHL"; case MipsISD::MAQ_S_W_PHR: return "MipsISD::MAQ_S_W_PHR"; case MipsISD::MAQ_SA_W_PHL: return "MipsISD::MAQ_SA_W_PHL"; case MipsISD::MAQ_SA_W_PHR: return "MipsISD::MAQ_SA_W_PHR"; case MipsISD::DPAU_H_QBL: return "MipsISD::DPAU_H_QBL"; case MipsISD::DPAU_H_QBR: return "MipsISD::DPAU_H_QBR"; case MipsISD::DPSU_H_QBL: return "MipsISD::DPSU_H_QBL"; case MipsISD::DPSU_H_QBR: return "MipsISD::DPSU_H_QBR"; case MipsISD::DPAQ_S_W_PH: return "MipsISD::DPAQ_S_W_PH"; case MipsISD::DPSQ_S_W_PH: return "MipsISD::DPSQ_S_W_PH"; case MipsISD::DPAQ_SA_L_W: return "MipsISD::DPAQ_SA_L_W"; case MipsISD::DPSQ_SA_L_W: return "MipsISD::DPSQ_SA_L_W"; case MipsISD::DPA_W_PH: return "MipsISD::DPA_W_PH"; case MipsISD::DPS_W_PH: return "MipsISD::DPS_W_PH"; case MipsISD::DPAQX_S_W_PH: return "MipsISD::DPAQX_S_W_PH"; case MipsISD::DPAQX_SA_W_PH: return "MipsISD::DPAQX_SA_W_PH"; case MipsISD::DPAX_W_PH: return "MipsISD::DPAX_W_PH"; case MipsISD::DPSX_W_PH: return "MipsISD::DPSX_W_PH"; case MipsISD::DPSQX_S_W_PH: return "MipsISD::DPSQX_S_W_PH"; case MipsISD::DPSQX_SA_W_PH: return "MipsISD::DPSQX_SA_W_PH"; case MipsISD::MULSA_W_PH: return "MipsISD::MULSA_W_PH"; case MipsISD::MULT: return "MipsISD::MULT"; case MipsISD::MULTU: return "MipsISD::MULTU"; case MipsISD::MADD_DSP: return "MipsISD::MADD_DSP"; case MipsISD::MADDU_DSP: return "MipsISD::MADDU_DSP"; case MipsISD::MSUB_DSP: return "MipsISD::MSUB_DSP"; case MipsISD::MSUBU_DSP: return "MipsISD::MSUBU_DSP"; case MipsISD::SHLL_DSP: return "MipsISD::SHLL_DSP"; case MipsISD::SHRA_DSP: return "MipsISD::SHRA_DSP"; case MipsISD::SHRL_DSP: return "MipsISD::SHRL_DSP"; case MipsISD::SETCC_DSP: return "MipsISD::SETCC_DSP"; case MipsISD::SELECT_CC_DSP: return "MipsISD::SELECT_CC_DSP"; case MipsISD::VALL_ZERO: return "MipsISD::VALL_ZERO"; case MipsISD::VANY_ZERO: return "MipsISD::VANY_ZERO"; case MipsISD::VALL_NONZERO: return "MipsISD::VALL_NONZERO"; case MipsISD::VANY_NONZERO: return "MipsISD::VANY_NONZERO"; case MipsISD::VCEQ: return "MipsISD::VCEQ"; case MipsISD::VCLE_S: return "MipsISD::VCLE_S"; case MipsISD::VCLE_U: return "MipsISD::VCLE_U"; case MipsISD::VCLT_S: return "MipsISD::VCLT_S"; case MipsISD::VCLT_U: return "MipsISD::VCLT_U"; case MipsISD::VSMAX: return "MipsISD::VSMAX"; case MipsISD::VSMIN: return "MipsISD::VSMIN"; case MipsISD::VUMAX: return "MipsISD::VUMAX"; case MipsISD::VUMIN: return "MipsISD::VUMIN"; case MipsISD::VEXTRACT_SEXT_ELT: return "MipsISD::VEXTRACT_SEXT_ELT"; case MipsISD::VEXTRACT_ZEXT_ELT: return "MipsISD::VEXTRACT_ZEXT_ELT"; case MipsISD::VNOR: return "MipsISD::VNOR"; case MipsISD::VSHF: return "MipsISD::VSHF"; case MipsISD::SHF: return "MipsISD::SHF"; case MipsISD::ILVEV: return "MipsISD::ILVEV"; case MipsISD::ILVOD: return "MipsISD::ILVOD"; case MipsISD::ILVL: return "MipsISD::ILVL"; case MipsISD::ILVR: return "MipsISD::ILVR"; case MipsISD::PCKEV: return "MipsISD::PCKEV"; case MipsISD::PCKOD: return "MipsISD::PCKOD"; case MipsISD::INSVE: return "MipsISD::INSVE"; } return nullptr; } MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM, const MipsSubtarget &STI) : TargetLowering(TM), Subtarget(STI), ABI(TM.getABI()) { // Mips does not have i1 type, so use i32 for // setcc operations results (slt, sgt, ...). setBooleanContents(ZeroOrOneBooleanContent); setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); // The cmp.cond.fmt instruction in MIPS32r6/MIPS64r6 uses 0 and -1 like MSA // does. Integer booleans still use 0 and 1. if (Subtarget.hasMips32r6()) setBooleanContents(ZeroOrOneBooleanContent, ZeroOrNegativeOneBooleanContent); // Load extented operations for i1 types must be promoted for (MVT VT : MVT::integer_valuetypes()) { setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); } // MIPS doesn't have extending float->double load/store. Set LoadExtAction // for f32, f16 for (MVT VT : MVT::fp_valuetypes()) { setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand); } // Set LoadExtAction for f16 vectors to Expand for (MVT VT : MVT::fp_vector_valuetypes()) { MVT F16VT = MVT::getVectorVT(MVT::f16, VT.getVectorNumElements()); if (F16VT.isValid()) setLoadExtAction(ISD::EXTLOAD, VT, F16VT, Expand); } setTruncStoreAction(MVT::f32, MVT::f16, Expand); setTruncStoreAction(MVT::f64, MVT::f16, Expand); setTruncStoreAction(MVT::f64, MVT::f32, Expand); // Used by legalize types to correctly generate the setcc result. // Without this, every float setcc comes with a AND/OR with the result, // we don't want this, since the fpcmp result goes to a flag register, // which is used implicitly by brcond and select operations. AddPromotedToType(ISD::SETCC, MVT::i1, MVT::i32); // Mips Custom Operations setOperationAction(ISD::BR_JT, MVT::Other, Custom); setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::BlockAddress, MVT::i32, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); setOperationAction(ISD::JumpTable, MVT::i32, Custom); setOperationAction(ISD::ConstantPool, MVT::i32, Custom); setOperationAction(ISD::SELECT, MVT::f32, Custom); setOperationAction(ISD::SELECT, MVT::f64, Custom); setOperationAction(ISD::SELECT, MVT::i32, Custom); setOperationAction(ISD::SETCC, MVT::f32, Custom); setOperationAction(ISD::SETCC, MVT::f64, Custom); setOperationAction(ISD::BRCOND, MVT::Other, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); if (Subtarget.isGP64bit()) { setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); setOperationAction(ISD::BlockAddress, MVT::i64, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); setOperationAction(ISD::JumpTable, MVT::i64, Custom); setOperationAction(ISD::ConstantPool, MVT::i64, Custom); setOperationAction(ISD::SELECT, MVT::i64, Custom); setOperationAction(ISD::LOAD, MVT::i64, Custom); setOperationAction(ISD::STORE, MVT::i64, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); } if (!Subtarget.isGP64bit()) { setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); } - setOperationAction(ISD::ADD, MVT::i32, Custom); + setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); if (Subtarget.isGP64bit()) - setOperationAction(ISD::ADD, MVT::i64, Custom); + setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom); setOperationAction(ISD::SDIV, MVT::i32, Expand); setOperationAction(ISD::SREM, MVT::i32, Expand); setOperationAction(ISD::UDIV, MVT::i32, Expand); setOperationAction(ISD::UREM, MVT::i32, Expand); setOperationAction(ISD::SDIV, MVT::i64, Expand); setOperationAction(ISD::SREM, MVT::i64, Expand); setOperationAction(ISD::UDIV, MVT::i64, Expand); setOperationAction(ISD::UREM, MVT::i64, Expand); // Operations not directly supported by Mips. setOperationAction(ISD::BR_CC, MVT::f32, Expand); setOperationAction(ISD::BR_CC, MVT::f64, Expand); setOperationAction(ISD::BR_CC, MVT::i32, Expand); setOperationAction(ISD::BR_CC, MVT::i64, Expand); setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); if (Subtarget.hasCnMips()) { setOperationAction(ISD::CTPOP, MVT::i32, Legal); setOperationAction(ISD::CTPOP, MVT::i64, Legal); } else { setOperationAction(ISD::CTPOP, MVT::i32, Expand); setOperationAction(ISD::CTPOP, MVT::i64, Expand); } setOperationAction(ISD::CTTZ, MVT::i32, Expand); setOperationAction(ISD::CTTZ, MVT::i64, Expand); setOperationAction(ISD::ROTL, MVT::i32, Expand); setOperationAction(ISD::ROTL, MVT::i64, Expand); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); if (!Subtarget.hasMips32r2()) setOperationAction(ISD::ROTR, MVT::i32, Expand); if (!Subtarget.hasMips64r2()) setOperationAction(ISD::ROTR, MVT::i64, Expand); setOperationAction(ISD::FSIN, MVT::f32, Expand); setOperationAction(ISD::FSIN, MVT::f64, Expand); setOperationAction(ISD::FCOS, MVT::f32, Expand); setOperationAction(ISD::FCOS, MVT::f64, Expand); setOperationAction(ISD::FSINCOS, MVT::f32, Expand); setOperationAction(ISD::FSINCOS, MVT::f64, Expand); setOperationAction(ISD::FPOWI, MVT::f32, Expand); setOperationAction(ISD::FPOW, MVT::f32, Expand); setOperationAction(ISD::FPOW, MVT::f64, Expand); setOperationAction(ISD::FLOG, MVT::f32, Expand); setOperationAction(ISD::FLOG2, MVT::f32, Expand); setOperationAction(ISD::FLOG10, MVT::f32, Expand); setOperationAction(ISD::FEXP, MVT::f32, Expand); setOperationAction(ISD::FMA, MVT::f32, Expand); setOperationAction(ISD::FMA, MVT::f64, Expand); setOperationAction(ISD::FREM, MVT::f32, Expand); setOperationAction(ISD::FREM, MVT::f64, Expand); // Lower f16 conversion operations into library calls setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); setOperationAction(ISD::EH_RETURN, MVT::Other, Custom); setOperationAction(ISD::VASTART, MVT::Other, Custom); setOperationAction(ISD::VAARG, MVT::Other, Custom); setOperationAction(ISD::VACOPY, MVT::Other, Expand); setOperationAction(ISD::VAEND, MVT::Other, Expand); // Use the default for now setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); if (!Subtarget.isGP64bit()) { setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); } if (!Subtarget.hasMips32r2()) { setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); } // MIPS16 lacks MIPS32's clz and clo instructions. if (!Subtarget.hasMips32() || Subtarget.inMips16Mode()) setOperationAction(ISD::CTLZ, MVT::i32, Expand); if (!Subtarget.hasMips64()) setOperationAction(ISD::CTLZ, MVT::i64, Expand); if (!Subtarget.hasMips32r2()) setOperationAction(ISD::BSWAP, MVT::i32, Expand); if (!Subtarget.hasMips64r2()) setOperationAction(ISD::BSWAP, MVT::i64, Expand); if (Subtarget.isGP64bit()) { setLoadExtAction(ISD::SEXTLOAD, MVT::i64, MVT::i32, Custom); setLoadExtAction(ISD::ZEXTLOAD, MVT::i64, MVT::i32, Custom); setLoadExtAction(ISD::EXTLOAD, MVT::i64, MVT::i32, Custom); setTruncStoreAction(MVT::i64, MVT::i32, Custom); } setOperationAction(ISD::TRAP, MVT::Other, Legal); setTargetDAGCombine(ISD::SDIVREM); setTargetDAGCombine(ISD::UDIVREM); setTargetDAGCombine(ISD::SELECT); setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::ADD); setTargetDAGCombine(ISD::AssertZext); setMinFunctionAlignment(Subtarget.isGP64bit() ? 3 : 2); // The arguments on the stack are defined in terms of 4-byte slots on O32 // and 8-byte slots on N32/N64. setMinStackArgumentAlignment((ABI.IsN32() || ABI.IsN64()) ? 8 : 4); setStackPointerRegisterToSaveRestore(ABI.IsN64() ? Mips::SP_64 : Mips::SP); MaxStoresPerMemcpy = 16; isMicroMips = Subtarget.inMicroMipsMode(); } const MipsTargetLowering *MipsTargetLowering::create(const MipsTargetMachine &TM, const MipsSubtarget &STI) { if (STI.inMips16Mode()) return llvm::createMips16TargetLowering(TM, STI); return llvm::createMipsSETargetLowering(TM, STI); } // Create a fast isel object. FastISel * MipsTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const { if (!funcInfo.MF->getTarget().Options.EnableFastISel) return TargetLowering::createFastISel(funcInfo, libInfo); return Mips::createFastISel(funcInfo, libInfo); } EVT MipsTargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &, EVT VT) const { if (!VT.isVector()) return MVT::i32; return VT.changeVectorElementTypeToInteger(); } static SDValue performDivRemCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget) { if (DCI.isBeforeLegalizeOps()) return SDValue(); EVT Ty = N->getValueType(0); unsigned LO = (Ty == MVT::i32) ? Mips::LO0 : Mips::LO0_64; unsigned HI = (Ty == MVT::i32) ? Mips::HI0 : Mips::HI0_64; unsigned Opc = N->getOpcode() == ISD::SDIVREM ? MipsISD::DivRem16 : MipsISD::DivRemU16; SDLoc DL(N); SDValue DivRem = DAG.getNode(Opc, DL, MVT::Glue, N->getOperand(0), N->getOperand(1)); SDValue InChain = DAG.getEntryNode(); SDValue InGlue = DivRem; // insert MFLO if (N->hasAnyUseOfValue(0)) { SDValue CopyFromLo = DAG.getCopyFromReg(InChain, DL, LO, Ty, InGlue); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), CopyFromLo); InChain = CopyFromLo.getValue(1); InGlue = CopyFromLo.getValue(2); } // insert MFHI if (N->hasAnyUseOfValue(1)) { SDValue CopyFromHi = DAG.getCopyFromReg(InChain, DL, HI, Ty, InGlue); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), CopyFromHi); } return SDValue(); } static Mips::CondCode condCodeToFCC(ISD::CondCode CC) { switch (CC) { default: llvm_unreachable("Unknown fp condition code!"); case ISD::SETEQ: case ISD::SETOEQ: return Mips::FCOND_OEQ; case ISD::SETUNE: return Mips::FCOND_UNE; case ISD::SETLT: case ISD::SETOLT: return Mips::FCOND_OLT; case ISD::SETGT: case ISD::SETOGT: return Mips::FCOND_OGT; case ISD::SETLE: case ISD::SETOLE: return Mips::FCOND_OLE; case ISD::SETGE: case ISD::SETOGE: return Mips::FCOND_OGE; case ISD::SETULT: return Mips::FCOND_ULT; case ISD::SETULE: return Mips::FCOND_ULE; case ISD::SETUGT: return Mips::FCOND_UGT; case ISD::SETUGE: return Mips::FCOND_UGE; case ISD::SETUO: return Mips::FCOND_UN; case ISD::SETO: return Mips::FCOND_OR; case ISD::SETNE: case ISD::SETONE: return Mips::FCOND_ONE; case ISD::SETUEQ: return Mips::FCOND_UEQ; } } /// This function returns true if the floating point conditional branches and /// conditional moves which use condition code CC should be inverted. static bool invertFPCondCodeUser(Mips::CondCode CC) { if (CC >= Mips::FCOND_F && CC <= Mips::FCOND_NGT) return false; assert((CC >= Mips::FCOND_T && CC <= Mips::FCOND_GT) && "Illegal Condition Code"); return true; } // Creates and returns an FPCmp node from a setcc node. // Returns Op if setcc is not a floating point comparison. static SDValue createFPCmp(SelectionDAG &DAG, const SDValue &Op) { // must be a SETCC node if (Op.getOpcode() != ISD::SETCC) return Op; SDValue LHS = Op.getOperand(0); if (!LHS.getValueType().isFloatingPoint()) return Op; SDValue RHS = Op.getOperand(1); SDLoc DL(Op); // Assume the 3rd operand is a CondCodeSDNode. Add code to check the type of // node if necessary. ISD::CondCode CC = cast(Op.getOperand(2))->get(); return DAG.getNode(MipsISD::FPCmp, DL, MVT::Glue, LHS, RHS, DAG.getConstant(condCodeToFCC(CC), DL, MVT::i32)); } // Creates and returns a CMovFPT/F node. static SDValue createCMovFP(SelectionDAG &DAG, SDValue Cond, SDValue True, SDValue False, const SDLoc &DL) { ConstantSDNode *CC = cast(Cond.getOperand(2)); bool invert = invertFPCondCodeUser((Mips::CondCode)CC->getSExtValue()); SDValue FCC0 = DAG.getRegister(Mips::FCC0, MVT::i32); return DAG.getNode((invert ? MipsISD::CMovFP_F : MipsISD::CMovFP_T), DL, True.getValueType(), True, FCC0, False, Cond); } static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget) { if (DCI.isBeforeLegalizeOps()) return SDValue(); SDValue SetCC = N->getOperand(0); if ((SetCC.getOpcode() != ISD::SETCC) || !SetCC.getOperand(0).getValueType().isInteger()) return SDValue(); SDValue False = N->getOperand(2); EVT FalseTy = False.getValueType(); if (!FalseTy.isInteger()) return SDValue(); ConstantSDNode *FalseC = dyn_cast(False); // If the RHS (False) is 0, we swap the order of the operands // of ISD::SELECT (obviously also inverting the condition) so that we can // take advantage of conditional moves using the $0 register. // Example: // return (a != 0) ? x : 0; // load $reg, x // movz $reg, $0, a if (!FalseC) return SDValue(); const SDLoc DL(N); if (!FalseC->getZExtValue()) { ISD::CondCode CC = cast(SetCC.getOperand(2))->get(); SDValue True = N->getOperand(1); SetCC = DAG.getSetCC(DL, SetCC.getValueType(), SetCC.getOperand(0), SetCC.getOperand(1), ISD::getSetCCInverse(CC, true)); return DAG.getNode(ISD::SELECT, DL, FalseTy, SetCC, False, True); } // If both operands are integer constants there's a possibility that we // can do some interesting optimizations. SDValue True = N->getOperand(1); ConstantSDNode *TrueC = dyn_cast(True); if (!TrueC || !True.getValueType().isInteger()) return SDValue(); // We'll also ignore MVT::i64 operands as this optimizations proves // to be ineffective because of the required sign extensions as the result // of a SETCC operator is always MVT::i32 for non-vector types. if (True.getValueType() == MVT::i64) return SDValue(); int64_t Diff = TrueC->getSExtValue() - FalseC->getSExtValue(); // 1) (a < x) ? y : y-1 // slti $reg1, a, x // addiu $reg2, $reg1, y-1 if (Diff == 1) return DAG.getNode(ISD::ADD, DL, SetCC.getValueType(), SetCC, False); // 2) (a < x) ? y-1 : y // slti $reg1, a, x // xor $reg1, $reg1, 1 // addiu $reg2, $reg1, y-1 if (Diff == -1) { ISD::CondCode CC = cast(SetCC.getOperand(2))->get(); SetCC = DAG.getSetCC(DL, SetCC.getValueType(), SetCC.getOperand(0), SetCC.getOperand(1), ISD::getSetCCInverse(CC, true)); return DAG.getNode(ISD::ADD, DL, SetCC.getValueType(), SetCC, True); } // Couldn't optimize. return SDValue(); } static SDValue performCMovFPCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget) { if (DCI.isBeforeLegalizeOps()) return SDValue(); SDValue ValueIfTrue = N->getOperand(0), ValueIfFalse = N->getOperand(2); ConstantSDNode *FalseC = dyn_cast(ValueIfFalse); if (!FalseC || FalseC->getZExtValue()) return SDValue(); // Since RHS (False) is 0, we swap the order of the True/False operands // (obviously also inverting the condition) so that we can // take advantage of conditional moves using the $0 register. // Example: // return (a != 0) ? x : 0; // load $reg, x // movz $reg, $0, a unsigned Opc = (N->getOpcode() == MipsISD::CMovFP_T) ? MipsISD::CMovFP_F : MipsISD::CMovFP_T; SDValue FCC = N->getOperand(1), Glue = N->getOperand(3); return DAG.getNode(Opc, SDLoc(N), ValueIfFalse.getValueType(), ValueIfFalse, FCC, ValueIfTrue, Glue); } static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget) { // Pattern match EXT. // $dst = and ((sra or srl) $src , pos), (2**size - 1) // => ext $dst, $src, size, pos if (DCI.isBeforeLegalizeOps() || !Subtarget.hasExtractInsert()) return SDValue(); SDValue ShiftRight = N->getOperand(0), Mask = N->getOperand(1); unsigned ShiftRightOpc = ShiftRight.getOpcode(); // Op's first operand must be a shift right. if (ShiftRightOpc != ISD::SRA && ShiftRightOpc != ISD::SRL) return SDValue(); // The second operand of the shift must be an immediate. ConstantSDNode *CN; if (!(CN = dyn_cast(ShiftRight.getOperand(1)))) return SDValue(); uint64_t Pos = CN->getZExtValue(); uint64_t SMPos, SMSize; // Op's second operand must be a shifted mask. if (!(CN = dyn_cast(Mask)) || !isShiftedMask(CN->getZExtValue(), SMPos, SMSize)) return SDValue(); // Return if the shifted mask does not start at bit 0 or the sum of its size // and Pos exceeds the word's size. EVT ValTy = N->getValueType(0); if (SMPos != 0 || Pos + SMSize > ValTy.getSizeInBits()) return SDValue(); SDLoc DL(N); return DAG.getNode(MipsISD::Ext, DL, ValTy, ShiftRight.getOperand(0), DAG.getConstant(Pos, DL, MVT::i32), DAG.getConstant(SMSize, DL, MVT::i32)); } static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget) { // Pattern match INS. // $dst = or (and $src1 , mask0), (and (shl $src, pos), mask1), // where mask1 = (2**size - 1) << pos, mask0 = ~mask1 // => ins $dst, $src, size, pos, $src1 if (DCI.isBeforeLegalizeOps() || !Subtarget.hasExtractInsert()) return SDValue(); SDValue And0 = N->getOperand(0), And1 = N->getOperand(1); uint64_t SMPos0, SMSize0, SMPos1, SMSize1; ConstantSDNode *CN; // See if Op's first operand matches (and $src1 , mask0). if (And0.getOpcode() != ISD::AND) return SDValue(); if (!(CN = dyn_cast(And0.getOperand(1))) || !isShiftedMask(~CN->getSExtValue(), SMPos0, SMSize0)) return SDValue(); // See if Op's second operand matches (and (shl $src, pos), mask1). if (And1.getOpcode() != ISD::AND) return SDValue(); if (!(CN = dyn_cast(And1.getOperand(1))) || !isShiftedMask(CN->getZExtValue(), SMPos1, SMSize1)) return SDValue(); // The shift masks must have the same position and size. if (SMPos0 != SMPos1 || SMSize0 != SMSize1) return SDValue(); SDValue Shl = And1.getOperand(0); if (Shl.getOpcode() != ISD::SHL) return SDValue(); if (!(CN = dyn_cast(Shl.getOperand(1)))) return SDValue(); unsigned Shamt = CN->getZExtValue(); // Return if the shift amount and the first bit position of mask are not the // same. EVT ValTy = N->getValueType(0); if ((Shamt != SMPos0) || (SMPos0 + SMSize0 > ValTy.getSizeInBits())) return SDValue(); SDLoc DL(N); return DAG.getNode(MipsISD::Ins, DL, ValTy, Shl.getOperand(0), DAG.getConstant(SMPos0, DL, MVT::i32), DAG.getConstant(SMSize0, DL, MVT::i32), And0.getOperand(0)); } static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget) { // (add v0, (add v1, abs_lo(tjt))) => (add (add v0, v1), abs_lo(tjt)) if (DCI.isBeforeLegalizeOps()) return SDValue(); SDValue Add = N->getOperand(1); if (Add.getOpcode() != ISD::ADD) return SDValue(); SDValue Lo = Add.getOperand(1); if ((Lo.getOpcode() != MipsISD::Lo) || (Lo.getOperand(0).getOpcode() != ISD::TargetJumpTable)) return SDValue(); EVT ValTy = N->getValueType(0); SDLoc DL(N); SDValue Add1 = DAG.getNode(ISD::ADD, DL, ValTy, N->getOperand(0), Add.getOperand(0)); return DAG.getNode(ISD::ADD, DL, ValTy, Add1, Lo); } static SDValue performAssertZextCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget) { SDValue N0 = N->getOperand(0); EVT NarrowerVT = cast(N->getOperand(1))->getVT(); if (N0.getOpcode() != ISD::TRUNCATE) return SDValue(); if (N0.getOperand(0).getOpcode() != ISD::AssertZext) return SDValue(); // fold (AssertZext (trunc (AssertZext x))) -> (trunc (AssertZext x)) // if the type of the extension of the innermost AssertZext node is // smaller from that of the outermost node, eg: // (AssertZext:i32 (trunc:i32 (AssertZext:i64 X, i32)), i8) // -> (trunc:i32 (AssertZext X, i8)) SDValue WiderAssertZext = N0.getOperand(0); EVT WiderVT = cast(WiderAssertZext->getOperand(1))->getVT(); if (NarrowerVT.bitsLT(WiderVT)) { SDValue NewAssertZext = DAG.getNode( ISD::AssertZext, SDLoc(N), WiderAssertZext.getValueType(), WiderAssertZext.getOperand(0), DAG.getValueType(NarrowerVT)); return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), NewAssertZext); } return SDValue(); } SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; unsigned Opc = N->getOpcode(); switch (Opc) { default: break; case ISD::SDIVREM: case ISD::UDIVREM: return performDivRemCombine(N, DAG, DCI, Subtarget); case ISD::SELECT: return performSELECTCombine(N, DAG, DCI, Subtarget); case MipsISD::CMovFP_F: case MipsISD::CMovFP_T: return performCMovFPCombine(N, DAG, DCI, Subtarget); case ISD::AND: return performANDCombine(N, DAG, DCI, Subtarget); case ISD::OR: return performORCombine(N, DAG, DCI, Subtarget); case ISD::ADD: return performADDCombine(N, DAG, DCI, Subtarget); case ISD::AssertZext: return performAssertZextCombine(N, DAG, DCI, Subtarget); } return SDValue(); } bool MipsTargetLowering::isCheapToSpeculateCttz() const { return Subtarget.hasMips32(); } bool MipsTargetLowering::isCheapToSpeculateCtlz() const { return Subtarget.hasMips32(); } void MipsTargetLowering::LowerOperationWrapper(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { SDValue Res = LowerOperation(SDValue(N, 0), DAG); for (unsigned I = 0, E = Res->getNumValues(); I != E; ++I) Results.push_back(Res.getValue(I)); } void MipsTargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { return LowerOperationWrapper(N, Results, DAG); } SDValue MipsTargetLowering:: LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { case ISD::BR_JT: return lowerBR_JT(Op, DAG); case ISD::BRCOND: return lowerBRCOND(Op, DAG); case ISD::ConstantPool: return lowerConstantPool(Op, DAG); case ISD::GlobalAddress: return lowerGlobalAddress(Op, DAG); case ISD::BlockAddress: return lowerBlockAddress(Op, DAG); case ISD::GlobalTLSAddress: return lowerGlobalTLSAddress(Op, DAG); case ISD::JumpTable: return lowerJumpTable(Op, DAG); case ISD::SELECT: return lowerSELECT(Op, DAG); case ISD::SETCC: return lowerSETCC(Op, DAG); case ISD::VASTART: return lowerVASTART(Op, DAG); case ISD::VAARG: return lowerVAARG(Op, DAG); case ISD::FCOPYSIGN: return lowerFCOPYSIGN(Op, DAG); case ISD::FRAMEADDR: return lowerFRAMEADDR(Op, DAG); case ISD::RETURNADDR: return lowerRETURNADDR(Op, DAG); case ISD::EH_RETURN: return lowerEH_RETURN(Op, DAG); case ISD::ATOMIC_FENCE: return lowerATOMIC_FENCE(Op, DAG); case ISD::SHL_PARTS: return lowerShiftLeftParts(Op, DAG); case ISD::SRA_PARTS: return lowerShiftRightParts(Op, DAG, true); case ISD::SRL_PARTS: return lowerShiftRightParts(Op, DAG, false); case ISD::LOAD: return lowerLOAD(Op, DAG); case ISD::STORE: return lowerSTORE(Op, DAG); - case ISD::ADD: return lowerADD(Op, DAG); + case ISD::EH_DWARF_CFA: return lowerEH_DWARF_CFA(Op, DAG); case ISD::FP_TO_SINT: return lowerFP_TO_SINT(Op, DAG); } return SDValue(); } //===----------------------------------------------------------------------===// // Lower helper functions //===----------------------------------------------------------------------===// // addLiveIn - This helper function adds the specified physical register to the // MachineFunction as a live in value. It also creates a corresponding // virtual register for it. static unsigned addLiveIn(MachineFunction &MF, unsigned PReg, const TargetRegisterClass *RC) { unsigned VReg = MF.getRegInfo().createVirtualRegister(RC); MF.getRegInfo().addLiveIn(PReg, VReg); return VReg; } static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI, MachineBasicBlock &MBB, const TargetInstrInfo &TII, bool Is64Bit, bool IsMicroMips) { if (NoZeroDivCheck) return &MBB; // Insert instruction "teq $divisor_reg, $zero, 7". MachineBasicBlock::iterator I(MI); MachineInstrBuilder MIB; MachineOperand &Divisor = MI.getOperand(2); MIB = BuildMI(MBB, std::next(I), MI.getDebugLoc(), TII.get(IsMicroMips ? Mips::TEQ_MM : Mips::TEQ)) .addReg(Divisor.getReg(), getKillRegState(Divisor.isKill())) .addReg(Mips::ZERO) .addImm(7); // Use the 32-bit sub-register if this is a 64-bit division. if (Is64Bit) MIB->getOperand(0).setSubReg(Mips::sub_32); // Clear Divisor's kill flag. Divisor.setIsKill(false); // We would normally delete the original instruction here but in this case // we only needed to inject an additional instruction rather than replace it. return &MBB; } MachineBasicBlock * MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const { switch (MI.getOpcode()) { default: llvm_unreachable("Unexpected instr type to insert"); case Mips::ATOMIC_LOAD_ADD_I8: return emitAtomicBinaryPartword(MI, BB, 1, Mips::ADDu); case Mips::ATOMIC_LOAD_ADD_I16: return emitAtomicBinaryPartword(MI, BB, 2, Mips::ADDu); case Mips::ATOMIC_LOAD_ADD_I32: return emitAtomicBinary(MI, BB, 4, Mips::ADDu); case Mips::ATOMIC_LOAD_ADD_I64: return emitAtomicBinary(MI, BB, 8, Mips::DADDu); case Mips::ATOMIC_LOAD_AND_I8: return emitAtomicBinaryPartword(MI, BB, 1, Mips::AND); case Mips::ATOMIC_LOAD_AND_I16: return emitAtomicBinaryPartword(MI, BB, 2, Mips::AND); case Mips::ATOMIC_LOAD_AND_I32: return emitAtomicBinary(MI, BB, 4, Mips::AND); case Mips::ATOMIC_LOAD_AND_I64: return emitAtomicBinary(MI, BB, 8, Mips::AND64); case Mips::ATOMIC_LOAD_OR_I8: return emitAtomicBinaryPartword(MI, BB, 1, Mips::OR); case Mips::ATOMIC_LOAD_OR_I16: return emitAtomicBinaryPartword(MI, BB, 2, Mips::OR); case Mips::ATOMIC_LOAD_OR_I32: return emitAtomicBinary(MI, BB, 4, Mips::OR); case Mips::ATOMIC_LOAD_OR_I64: return emitAtomicBinary(MI, BB, 8, Mips::OR64); case Mips::ATOMIC_LOAD_XOR_I8: return emitAtomicBinaryPartword(MI, BB, 1, Mips::XOR); case Mips::ATOMIC_LOAD_XOR_I16: return emitAtomicBinaryPartword(MI, BB, 2, Mips::XOR); case Mips::ATOMIC_LOAD_XOR_I32: return emitAtomicBinary(MI, BB, 4, Mips::XOR); case Mips::ATOMIC_LOAD_XOR_I64: return emitAtomicBinary(MI, BB, 8, Mips::XOR64); case Mips::ATOMIC_LOAD_NAND_I8: return emitAtomicBinaryPartword(MI, BB, 1, 0, true); case Mips::ATOMIC_LOAD_NAND_I16: return emitAtomicBinaryPartword(MI, BB, 2, 0, true); case Mips::ATOMIC_LOAD_NAND_I32: return emitAtomicBinary(MI, BB, 4, 0, true); case Mips::ATOMIC_LOAD_NAND_I64: return emitAtomicBinary(MI, BB, 8, 0, true); case Mips::ATOMIC_LOAD_SUB_I8: return emitAtomicBinaryPartword(MI, BB, 1, Mips::SUBu); case Mips::ATOMIC_LOAD_SUB_I16: return emitAtomicBinaryPartword(MI, BB, 2, Mips::SUBu); case Mips::ATOMIC_LOAD_SUB_I32: return emitAtomicBinary(MI, BB, 4, Mips::SUBu); case Mips::ATOMIC_LOAD_SUB_I64: return emitAtomicBinary(MI, BB, 8, Mips::DSUBu); case Mips::ATOMIC_SWAP_I8: return emitAtomicBinaryPartword(MI, BB, 1, 0); case Mips::ATOMIC_SWAP_I16: return emitAtomicBinaryPartword(MI, BB, 2, 0); case Mips::ATOMIC_SWAP_I32: return emitAtomicBinary(MI, BB, 4, 0); case Mips::ATOMIC_SWAP_I64: return emitAtomicBinary(MI, BB, 8, 0); case Mips::ATOMIC_CMP_SWAP_I8: return emitAtomicCmpSwapPartword(MI, BB, 1); case Mips::ATOMIC_CMP_SWAP_I16: return emitAtomicCmpSwapPartword(MI, BB, 2); case Mips::ATOMIC_CMP_SWAP_I32: return emitAtomicCmpSwap(MI, BB, 4); case Mips::ATOMIC_CMP_SWAP_I64: return emitAtomicCmpSwap(MI, BB, 8); case Mips::PseudoSDIV: case Mips::PseudoUDIV: case Mips::DIV: case Mips::DIVU: case Mips::MOD: case Mips::MODU: return insertDivByZeroTrap(MI, *BB, *Subtarget.getInstrInfo(), false, false); case Mips::SDIV_MM_Pseudo: case Mips::UDIV_MM_Pseudo: case Mips::SDIV_MM: case Mips::UDIV_MM: case Mips::DIV_MMR6: case Mips::DIVU_MMR6: case Mips::MOD_MMR6: case Mips::MODU_MMR6: return insertDivByZeroTrap(MI, *BB, *Subtarget.getInstrInfo(), false, true); case Mips::PseudoDSDIV: case Mips::PseudoDUDIV: case Mips::DDIV: case Mips::DDIVU: case Mips::DMOD: case Mips::DMODU: return insertDivByZeroTrap(MI, *BB, *Subtarget.getInstrInfo(), true, false); case Mips::DDIV_MM64R6: case Mips::DDIVU_MM64R6: case Mips::DMOD_MM64R6: case Mips::DMODU_MM64R6: return insertDivByZeroTrap(MI, *BB, *Subtarget.getInstrInfo(), true, true); case Mips::SEL_D: case Mips::SEL_D_MMR6: return emitSEL_D(MI, BB); case Mips::PseudoSELECT_I: case Mips::PseudoSELECT_I64: case Mips::PseudoSELECT_S: case Mips::PseudoSELECT_D32: case Mips::PseudoSELECT_D64: return emitPseudoSELECT(MI, BB, false, Mips::BNE); case Mips::PseudoSELECTFP_F_I: case Mips::PseudoSELECTFP_F_I64: case Mips::PseudoSELECTFP_F_S: case Mips::PseudoSELECTFP_F_D32: case Mips::PseudoSELECTFP_F_D64: return emitPseudoSELECT(MI, BB, true, Mips::BC1F); case Mips::PseudoSELECTFP_T_I: case Mips::PseudoSELECTFP_T_I64: case Mips::PseudoSELECTFP_T_S: case Mips::PseudoSELECTFP_T_D32: case Mips::PseudoSELECTFP_T_D64: return emitPseudoSELECT(MI, BB, true, Mips::BC1T); } } // This function also handles Mips::ATOMIC_SWAP_I32 (when BinOpcode == 0), and // Mips::ATOMIC_LOAD_NAND_I32 (when Nand == true) MachineBasicBlock *MipsTargetLowering::emitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, unsigned Size, unsigned BinOpcode, bool Nand) const { assert((Size == 4 || Size == 8) && "Unsupported size for EmitAtomicBinary."); MachineFunction *MF = BB->getParent(); MachineRegisterInfo &RegInfo = MF->getRegInfo(); const TargetRegisterClass *RC = getRegClassFor(MVT::getIntegerVT(Size * 8)); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); const bool ArePtrs64bit = ABI.ArePtrs64bit(); DebugLoc DL = MI.getDebugLoc(); unsigned LL, SC, AND, NOR, ZERO, BEQ; if (Size == 4) { if (isMicroMips) { LL = Mips::LL_MM; SC = Mips::SC_MM; } else { LL = Subtarget.hasMips32r6() ? (ArePtrs64bit ? Mips::LL64_R6 : Mips::LL_R6) : (ArePtrs64bit ? Mips::LL64 : Mips::LL); SC = Subtarget.hasMips32r6() ? (ArePtrs64bit ? Mips::SC64_R6 : Mips::SC_R6) : (ArePtrs64bit ? Mips::SC64 : Mips::SC); } AND = Mips::AND; NOR = Mips::NOR; ZERO = Mips::ZERO; BEQ = Mips::BEQ; } else { LL = Subtarget.hasMips64r6() ? Mips::LLD_R6 : Mips::LLD; SC = Subtarget.hasMips64r6() ? Mips::SCD_R6 : Mips::SCD; AND = Mips::AND64; NOR = Mips::NOR64; ZERO = Mips::ZERO_64; BEQ = Mips::BEQ64; } unsigned OldVal = MI.getOperand(0).getReg(); unsigned Ptr = MI.getOperand(1).getReg(); unsigned Incr = MI.getOperand(2).getReg(); unsigned StoreVal = RegInfo.createVirtualRegister(RC); unsigned AndRes = RegInfo.createVirtualRegister(RC); unsigned Success = RegInfo.createVirtualRegister(RC); // insert new blocks after the current block const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineFunction::iterator It = ++BB->getIterator(); MF->insert(It, loopMBB); MF->insert(It, exitMBB); // Transfer the remainder of BB and its successor edges to exitMBB. exitMBB->splice(exitMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); // thisMBB: // ... // fallthrough --> loopMBB BB->addSuccessor(loopMBB); loopMBB->addSuccessor(loopMBB); loopMBB->addSuccessor(exitMBB); // loopMBB: // ll oldval, 0(ptr) // storeval, oldval, incr // sc success, storeval, 0(ptr) // beq success, $0, loopMBB BB = loopMBB; BuildMI(BB, DL, TII->get(LL), OldVal).addReg(Ptr).addImm(0); if (Nand) { // and andres, oldval, incr // nor storeval, $0, andres BuildMI(BB, DL, TII->get(AND), AndRes).addReg(OldVal).addReg(Incr); BuildMI(BB, DL, TII->get(NOR), StoreVal).addReg(ZERO).addReg(AndRes); } else if (BinOpcode) { // storeval, oldval, incr BuildMI(BB, DL, TII->get(BinOpcode), StoreVal).addReg(OldVal).addReg(Incr); } else { StoreVal = Incr; } BuildMI(BB, DL, TII->get(SC), Success).addReg(StoreVal).addReg(Ptr).addImm(0); BuildMI(BB, DL, TII->get(BEQ)).addReg(Success).addReg(ZERO).addMBB(loopMBB); MI.eraseFromParent(); // The instruction is gone now. return exitMBB; } MachineBasicBlock *MipsTargetLowering::emitSignExtendToI32InReg( MachineInstr &MI, MachineBasicBlock *BB, unsigned Size, unsigned DstReg, unsigned SrcReg) const { const TargetInstrInfo *TII = Subtarget.getInstrInfo(); const DebugLoc &DL = MI.getDebugLoc(); if (Subtarget.hasMips32r2() && Size == 1) { BuildMI(BB, DL, TII->get(Mips::SEB), DstReg).addReg(SrcReg); return BB; } if (Subtarget.hasMips32r2() && Size == 2) { BuildMI(BB, DL, TII->get(Mips::SEH), DstReg).addReg(SrcReg); return BB; } MachineFunction *MF = BB->getParent(); MachineRegisterInfo &RegInfo = MF->getRegInfo(); const TargetRegisterClass *RC = getRegClassFor(MVT::i32); unsigned ScrReg = RegInfo.createVirtualRegister(RC); assert(Size < 32); int64_t ShiftImm = 32 - (Size * 8); BuildMI(BB, DL, TII->get(Mips::SLL), ScrReg).addReg(SrcReg).addImm(ShiftImm); BuildMI(BB, DL, TII->get(Mips::SRA), DstReg).addReg(ScrReg).addImm(ShiftImm); return BB; } MachineBasicBlock *MipsTargetLowering::emitAtomicBinaryPartword( MachineInstr &MI, MachineBasicBlock *BB, unsigned Size, unsigned BinOpcode, bool Nand) const { assert((Size == 1 || Size == 2) && "Unsupported size for EmitAtomicBinaryPartial."); MachineFunction *MF = BB->getParent(); MachineRegisterInfo &RegInfo = MF->getRegInfo(); const TargetRegisterClass *RC = getRegClassFor(MVT::i32); const bool ArePtrs64bit = ABI.ArePtrs64bit(); const TargetRegisterClass *RCp = getRegClassFor(ArePtrs64bit ? MVT::i64 : MVT::i32); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); unsigned Dest = MI.getOperand(0).getReg(); unsigned Ptr = MI.getOperand(1).getReg(); unsigned Incr = MI.getOperand(2).getReg(); unsigned AlignedAddr = RegInfo.createVirtualRegister(RCp); unsigned ShiftAmt = RegInfo.createVirtualRegister(RC); unsigned Mask = RegInfo.createVirtualRegister(RC); unsigned Mask2 = RegInfo.createVirtualRegister(RC); unsigned NewVal = RegInfo.createVirtualRegister(RC); unsigned OldVal = RegInfo.createVirtualRegister(RC); unsigned Incr2 = RegInfo.createVirtualRegister(RC); unsigned MaskLSB2 = RegInfo.createVirtualRegister(RCp); unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC); unsigned MaskUpper = RegInfo.createVirtualRegister(RC); unsigned AndRes = RegInfo.createVirtualRegister(RC); unsigned BinOpRes = RegInfo.createVirtualRegister(RC); unsigned MaskedOldVal0 = RegInfo.createVirtualRegister(RC); unsigned StoreVal = RegInfo.createVirtualRegister(RC); unsigned MaskedOldVal1 = RegInfo.createVirtualRegister(RC); unsigned SrlRes = RegInfo.createVirtualRegister(RC); unsigned Success = RegInfo.createVirtualRegister(RC); unsigned LL, SC; if (isMicroMips) { LL = Mips::LL_MM; SC = Mips::SC_MM; } else { LL = Subtarget.hasMips32r6() ? (ArePtrs64bit ? Mips::LL64_R6 : Mips::LL_R6) : (ArePtrs64bit ? Mips::LL64 : Mips::LL); SC = Subtarget.hasMips32r6() ? (ArePtrs64bit ? Mips::SC64_R6 : Mips::SC_R6) : (ArePtrs64bit ? Mips::SC64 : Mips::SC); } // insert new blocks after the current block const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineFunction::iterator It = ++BB->getIterator(); MF->insert(It, loopMBB); MF->insert(It, sinkMBB); MF->insert(It, exitMBB); // Transfer the remainder of BB and its successor edges to exitMBB. exitMBB->splice(exitMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); BB->addSuccessor(loopMBB); loopMBB->addSuccessor(loopMBB); loopMBB->addSuccessor(sinkMBB); sinkMBB->addSuccessor(exitMBB); // thisMBB: // addiu masklsb2,$0,-4 # 0xfffffffc // and alignedaddr,ptr,masklsb2 // andi ptrlsb2,ptr,3 // sll shiftamt,ptrlsb2,3 // ori maskupper,$0,255 # 0xff // sll mask,maskupper,shiftamt // nor mask2,$0,mask // sll incr2,incr,shiftamt int64_t MaskImm = (Size == 1) ? 255 : 65535; BuildMI(BB, DL, TII->get(ABI.GetPtrAddiuOp()), MaskLSB2) .addReg(ABI.GetNullPtr()).addImm(-4); BuildMI(BB, DL, TII->get(ABI.GetPtrAndOp()), AlignedAddr) .addReg(Ptr).addReg(MaskLSB2); BuildMI(BB, DL, TII->get(Mips::ANDi), PtrLSB2) .addReg(Ptr, 0, ArePtrs64bit ? Mips::sub_32 : 0).addImm(3); if (Subtarget.isLittle()) { BuildMI(BB, DL, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3); } else { unsigned Off = RegInfo.createVirtualRegister(RC); BuildMI(BB, DL, TII->get(Mips::XORi), Off) .addReg(PtrLSB2).addImm((Size == 1) ? 3 : 2); BuildMI(BB, DL, TII->get(Mips::SLL), ShiftAmt).addReg(Off).addImm(3); } BuildMI(BB, DL, TII->get(Mips::ORi), MaskUpper) .addReg(Mips::ZERO).addImm(MaskImm); BuildMI(BB, DL, TII->get(Mips::SLLV), Mask) .addReg(MaskUpper).addReg(ShiftAmt); BuildMI(BB, DL, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask); BuildMI(BB, DL, TII->get(Mips::SLLV), Incr2).addReg(Incr).addReg(ShiftAmt); // atomic.load.binop // loopMBB: // ll oldval,0(alignedaddr) // binop binopres,oldval,incr2 // and newval,binopres,mask // and maskedoldval0,oldval,mask2 // or storeval,maskedoldval0,newval // sc success,storeval,0(alignedaddr) // beq success,$0,loopMBB // atomic.swap // loopMBB: // ll oldval,0(alignedaddr) // and newval,incr2,mask // and maskedoldval0,oldval,mask2 // or storeval,maskedoldval0,newval // sc success,storeval,0(alignedaddr) // beq success,$0,loopMBB BB = loopMBB; BuildMI(BB, DL, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0); if (Nand) { // and andres, oldval, incr2 // nor binopres, $0, andres // and newval, binopres, mask BuildMI(BB, DL, TII->get(Mips::AND), AndRes).addReg(OldVal).addReg(Incr2); BuildMI(BB, DL, TII->get(Mips::NOR), BinOpRes) .addReg(Mips::ZERO).addReg(AndRes); BuildMI(BB, DL, TII->get(Mips::AND), NewVal).addReg(BinOpRes).addReg(Mask); } else if (BinOpcode) { // binopres, oldval, incr2 // and newval, binopres, mask BuildMI(BB, DL, TII->get(BinOpcode), BinOpRes).addReg(OldVal).addReg(Incr2); BuildMI(BB, DL, TII->get(Mips::AND), NewVal).addReg(BinOpRes).addReg(Mask); } else { // atomic.swap // and newval, incr2, mask BuildMI(BB, DL, TII->get(Mips::AND), NewVal).addReg(Incr2).addReg(Mask); } BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal0) .addReg(OldVal).addReg(Mask2); BuildMI(BB, DL, TII->get(Mips::OR), StoreVal) .addReg(MaskedOldVal0).addReg(NewVal); BuildMI(BB, DL, TII->get(SC), Success) .addReg(StoreVal).addReg(AlignedAddr).addImm(0); BuildMI(BB, DL, TII->get(Mips::BEQ)) .addReg(Success).addReg(Mips::ZERO).addMBB(loopMBB); // sinkMBB: // and maskedoldval1,oldval,mask // srl srlres,maskedoldval1,shiftamt // sign_extend dest,srlres BB = sinkMBB; BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal1) .addReg(OldVal).addReg(Mask); BuildMI(BB, DL, TII->get(Mips::SRLV), SrlRes) .addReg(MaskedOldVal1).addReg(ShiftAmt); BB = emitSignExtendToI32InReg(MI, BB, Size, Dest, SrlRes); MI.eraseFromParent(); // The instruction is gone now. return exitMBB; } MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwap(MachineInstr &MI, MachineBasicBlock *BB, unsigned Size) const { assert((Size == 4 || Size == 8) && "Unsupported size for EmitAtomicCmpSwap."); MachineFunction *MF = BB->getParent(); MachineRegisterInfo &RegInfo = MF->getRegInfo(); const TargetRegisterClass *RC = getRegClassFor(MVT::getIntegerVT(Size * 8)); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); const bool ArePtrs64bit = ABI.ArePtrs64bit(); DebugLoc DL = MI.getDebugLoc(); unsigned LL, SC, ZERO, BNE, BEQ; if (Size == 4) { if (isMicroMips) { LL = Mips::LL_MM; SC = Mips::SC_MM; } else { LL = Subtarget.hasMips32r6() ? (ArePtrs64bit ? Mips::LL64_R6 : Mips::LL_R6) : (ArePtrs64bit ? Mips::LL64 : Mips::LL); SC = Subtarget.hasMips32r6() ? (ArePtrs64bit ? Mips::SC64_R6 : Mips::SC_R6) : (ArePtrs64bit ? Mips::SC64 : Mips::SC); } ZERO = Mips::ZERO; BNE = Mips::BNE; BEQ = Mips::BEQ; } else { LL = Subtarget.hasMips64r6() ? Mips::LLD_R6 : Mips::LLD; SC = Subtarget.hasMips64r6() ? Mips::SCD_R6 : Mips::SCD; ZERO = Mips::ZERO_64; BNE = Mips::BNE64; BEQ = Mips::BEQ64; } unsigned Dest = MI.getOperand(0).getReg(); unsigned Ptr = MI.getOperand(1).getReg(); unsigned OldVal = MI.getOperand(2).getReg(); unsigned NewVal = MI.getOperand(3).getReg(); unsigned Success = RegInfo.createVirtualRegister(RC); // insert new blocks after the current block const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineFunction::iterator It = ++BB->getIterator(); MF->insert(It, loop1MBB); MF->insert(It, loop2MBB); MF->insert(It, exitMBB); // Transfer the remainder of BB and its successor edges to exitMBB. exitMBB->splice(exitMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); // thisMBB: // ... // fallthrough --> loop1MBB BB->addSuccessor(loop1MBB); loop1MBB->addSuccessor(exitMBB); loop1MBB->addSuccessor(loop2MBB); loop2MBB->addSuccessor(loop1MBB); loop2MBB->addSuccessor(exitMBB); // loop1MBB: // ll dest, 0(ptr) // bne dest, oldval, exitMBB BB = loop1MBB; BuildMI(BB, DL, TII->get(LL), Dest).addReg(Ptr).addImm(0); BuildMI(BB, DL, TII->get(BNE)) .addReg(Dest).addReg(OldVal).addMBB(exitMBB); // loop2MBB: // sc success, newval, 0(ptr) // beq success, $0, loop1MBB BB = loop2MBB; BuildMI(BB, DL, TII->get(SC), Success) .addReg(NewVal).addReg(Ptr).addImm(0); BuildMI(BB, DL, TII->get(BEQ)) .addReg(Success).addReg(ZERO).addMBB(loop1MBB); MI.eraseFromParent(); // The instruction is gone now. return exitMBB; } MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwapPartword( MachineInstr &MI, MachineBasicBlock *BB, unsigned Size) const { assert((Size == 1 || Size == 2) && "Unsupported size for EmitAtomicCmpSwapPartial."); MachineFunction *MF = BB->getParent(); MachineRegisterInfo &RegInfo = MF->getRegInfo(); const TargetRegisterClass *RC = getRegClassFor(MVT::i32); const bool ArePtrs64bit = ABI.ArePtrs64bit(); const TargetRegisterClass *RCp = getRegClassFor(ArePtrs64bit ? MVT::i64 : MVT::i32); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); unsigned Dest = MI.getOperand(0).getReg(); unsigned Ptr = MI.getOperand(1).getReg(); unsigned CmpVal = MI.getOperand(2).getReg(); unsigned NewVal = MI.getOperand(3).getReg(); unsigned AlignedAddr = RegInfo.createVirtualRegister(RCp); unsigned ShiftAmt = RegInfo.createVirtualRegister(RC); unsigned Mask = RegInfo.createVirtualRegister(RC); unsigned Mask2 = RegInfo.createVirtualRegister(RC); unsigned ShiftedCmpVal = RegInfo.createVirtualRegister(RC); unsigned OldVal = RegInfo.createVirtualRegister(RC); unsigned MaskedOldVal0 = RegInfo.createVirtualRegister(RC); unsigned ShiftedNewVal = RegInfo.createVirtualRegister(RC); unsigned MaskLSB2 = RegInfo.createVirtualRegister(RCp); unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC); unsigned MaskUpper = RegInfo.createVirtualRegister(RC); unsigned MaskedCmpVal = RegInfo.createVirtualRegister(RC); unsigned MaskedNewVal = RegInfo.createVirtualRegister(RC); unsigned MaskedOldVal1 = RegInfo.createVirtualRegister(RC); unsigned StoreVal = RegInfo.createVirtualRegister(RC); unsigned SrlRes = RegInfo.createVirtualRegister(RC); unsigned Success = RegInfo.createVirtualRegister(RC); unsigned LL, SC; if (isMicroMips) { LL = Mips::LL_MM; SC = Mips::SC_MM; } else { LL = Subtarget.hasMips32r6() ? (ArePtrs64bit ? Mips::LL64_R6 : Mips::LL_R6) : (ArePtrs64bit ? Mips::LL64 : Mips::LL); SC = Subtarget.hasMips32r6() ? (ArePtrs64bit ? Mips::SC64_R6 : Mips::SC_R6) : (ArePtrs64bit ? Mips::SC64 : Mips::SC); } // insert new blocks after the current block const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineFunction::iterator It = ++BB->getIterator(); MF->insert(It, loop1MBB); MF->insert(It, loop2MBB); MF->insert(It, sinkMBB); MF->insert(It, exitMBB); // Transfer the remainder of BB and its successor edges to exitMBB. exitMBB->splice(exitMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); BB->addSuccessor(loop1MBB); loop1MBB->addSuccessor(sinkMBB); loop1MBB->addSuccessor(loop2MBB); loop2MBB->addSuccessor(loop1MBB); loop2MBB->addSuccessor(sinkMBB); sinkMBB->addSuccessor(exitMBB); // FIXME: computation of newval2 can be moved to loop2MBB. // thisMBB: // addiu masklsb2,$0,-4 # 0xfffffffc // and alignedaddr,ptr,masklsb2 // andi ptrlsb2,ptr,3 // xori ptrlsb2,ptrlsb2,3 # Only for BE // sll shiftamt,ptrlsb2,3 // ori maskupper,$0,255 # 0xff // sll mask,maskupper,shiftamt // nor mask2,$0,mask // andi maskedcmpval,cmpval,255 // sll shiftedcmpval,maskedcmpval,shiftamt // andi maskednewval,newval,255 // sll shiftednewval,maskednewval,shiftamt int64_t MaskImm = (Size == 1) ? 255 : 65535; BuildMI(BB, DL, TII->get(ArePtrs64bit ? Mips::DADDiu : Mips::ADDiu), MaskLSB2) .addReg(ABI.GetNullPtr()).addImm(-4); BuildMI(BB, DL, TII->get(ArePtrs64bit ? Mips::AND64 : Mips::AND), AlignedAddr) .addReg(Ptr).addReg(MaskLSB2); BuildMI(BB, DL, TII->get(Mips::ANDi), PtrLSB2) .addReg(Ptr, 0, ArePtrs64bit ? Mips::sub_32 : 0).addImm(3); if (Subtarget.isLittle()) { BuildMI(BB, DL, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3); } else { unsigned Off = RegInfo.createVirtualRegister(RC); BuildMI(BB, DL, TII->get(Mips::XORi), Off) .addReg(PtrLSB2).addImm((Size == 1) ? 3 : 2); BuildMI(BB, DL, TII->get(Mips::SLL), ShiftAmt).addReg(Off).addImm(3); } BuildMI(BB, DL, TII->get(Mips::ORi), MaskUpper) .addReg(Mips::ZERO).addImm(MaskImm); BuildMI(BB, DL, TII->get(Mips::SLLV), Mask) .addReg(MaskUpper).addReg(ShiftAmt); BuildMI(BB, DL, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask); BuildMI(BB, DL, TII->get(Mips::ANDi), MaskedCmpVal) .addReg(CmpVal).addImm(MaskImm); BuildMI(BB, DL, TII->get(Mips::SLLV), ShiftedCmpVal) .addReg(MaskedCmpVal).addReg(ShiftAmt); BuildMI(BB, DL, TII->get(Mips::ANDi), MaskedNewVal) .addReg(NewVal).addImm(MaskImm); BuildMI(BB, DL, TII->get(Mips::SLLV), ShiftedNewVal) .addReg(MaskedNewVal).addReg(ShiftAmt); // loop1MBB: // ll oldval,0(alginedaddr) // and maskedoldval0,oldval,mask // bne maskedoldval0,shiftedcmpval,sinkMBB BB = loop1MBB; BuildMI(BB, DL, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0); BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal0) .addReg(OldVal).addReg(Mask); BuildMI(BB, DL, TII->get(Mips::BNE)) .addReg(MaskedOldVal0).addReg(ShiftedCmpVal).addMBB(sinkMBB); // loop2MBB: // and maskedoldval1,oldval,mask2 // or storeval,maskedoldval1,shiftednewval // sc success,storeval,0(alignedaddr) // beq success,$0,loop1MBB BB = loop2MBB; BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal1) .addReg(OldVal).addReg(Mask2); BuildMI(BB, DL, TII->get(Mips::OR), StoreVal) .addReg(MaskedOldVal1).addReg(ShiftedNewVal); BuildMI(BB, DL, TII->get(SC), Success) .addReg(StoreVal).addReg(AlignedAddr).addImm(0); BuildMI(BB, DL, TII->get(Mips::BEQ)) .addReg(Success).addReg(Mips::ZERO).addMBB(loop1MBB); // sinkMBB: // srl srlres,maskedoldval0,shiftamt // sign_extend dest,srlres BB = sinkMBB; BuildMI(BB, DL, TII->get(Mips::SRLV), SrlRes) .addReg(MaskedOldVal0).addReg(ShiftAmt); BB = emitSignExtendToI32InReg(MI, BB, Size, Dest, SrlRes); MI.eraseFromParent(); // The instruction is gone now. return exitMBB; } MachineBasicBlock *MipsTargetLowering::emitSEL_D(MachineInstr &MI, MachineBasicBlock *BB) const { MachineFunction *MF = BB->getParent(); const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &RegInfo = MF->getRegInfo(); DebugLoc DL = MI.getDebugLoc(); MachineBasicBlock::iterator II(MI); unsigned Fc = MI.getOperand(1).getReg(); const auto &FGR64RegClass = TRI->getRegClass(Mips::FGR64RegClassID); unsigned Fc2 = RegInfo.createVirtualRegister(FGR64RegClass); BuildMI(*BB, II, DL, TII->get(Mips::SUBREG_TO_REG), Fc2) .addImm(0) .addReg(Fc) .addImm(Mips::sub_lo); // We don't erase the original instruction, we just replace the condition // register with the 64-bit super-register. MI.getOperand(1).setReg(Fc2); return BB; } //===----------------------------------------------------------------------===// // Misc Lower Operation implementation //===----------------------------------------------------------------------===// SDValue MipsTargetLowering::lowerBR_JT(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Table = Op.getOperand(1); SDValue Index = Op.getOperand(2); SDLoc DL(Op); auto &TD = DAG.getDataLayout(); EVT PTy = getPointerTy(TD); unsigned EntrySize = DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD); Index = DAG.getNode(ISD::MUL, DL, PTy, Index, DAG.getConstant(EntrySize, DL, PTy)); SDValue Addr = DAG.getNode(ISD::ADD, DL, PTy, Index, Table); EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8); Addr = DAG.getExtLoad( ISD::SEXTLOAD, DL, PTy, Chain, Addr, MachinePointerInfo::getJumpTable(DAG.getMachineFunction()), MemVT); Chain = Addr.getValue(1); if (isPositionIndependent() || ABI.IsN64()) { // For PIC, the sequence is: // BRIND(load(Jumptable + index) + RelocBase) // RelocBase can be JumpTable, GOT or some sort of global base. Addr = DAG.getNode(ISD::ADD, DL, PTy, Addr, getPICJumpTableRelocBase(Table, DAG)); } return DAG.getNode(ISD::BRIND, DL, MVT::Other, Chain, Addr); } SDValue MipsTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const { // The first operand is the chain, the second is the condition, the third is // the block to branch to if the condition is true. SDValue Chain = Op.getOperand(0); SDValue Dest = Op.getOperand(2); SDLoc DL(Op); assert(!Subtarget.hasMips32r6() && !Subtarget.hasMips64r6()); SDValue CondRes = createFPCmp(DAG, Op.getOperand(1)); // Return if flag is not set by a floating point comparison. if (CondRes.getOpcode() != MipsISD::FPCmp) return Op; SDValue CCNode = CondRes.getOperand(2); Mips::CondCode CC = (Mips::CondCode)cast(CCNode)->getZExtValue(); unsigned Opc = invertFPCondCodeUser(CC) ? Mips::BRANCH_F : Mips::BRANCH_T; SDValue BrCode = DAG.getConstant(Opc, DL, MVT::i32); SDValue FCC0 = DAG.getRegister(Mips::FCC0, MVT::i32); return DAG.getNode(MipsISD::FPBrcond, DL, Op.getValueType(), Chain, BrCode, FCC0, Dest, CondRes); } SDValue MipsTargetLowering:: lowerSELECT(SDValue Op, SelectionDAG &DAG) const { assert(!Subtarget.hasMips32r6() && !Subtarget.hasMips64r6()); SDValue Cond = createFPCmp(DAG, Op.getOperand(0)); // Return if flag is not set by a floating point comparison. if (Cond.getOpcode() != MipsISD::FPCmp) return Op; return createCMovFP(DAG, Cond, Op.getOperand(1), Op.getOperand(2), SDLoc(Op)); } SDValue MipsTargetLowering::lowerSETCC(SDValue Op, SelectionDAG &DAG) const { assert(!Subtarget.hasMips32r6() && !Subtarget.hasMips64r6()); SDValue Cond = createFPCmp(DAG, Op); assert(Cond.getOpcode() == MipsISD::FPCmp && "Floating point operand expected."); SDLoc DL(Op); SDValue True = DAG.getConstant(1, DL, MVT::i32); SDValue False = DAG.getConstant(0, DL, MVT::i32); return createCMovFP(DAG, Cond, True, False, DL); } SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { EVT Ty = Op.getValueType(); GlobalAddressSDNode *N = cast(Op); const GlobalValue *GV = N->getGlobal(); if (!isPositionIndependent() && !ABI.IsN64()) { const MipsTargetObjectFile *TLOF = static_cast( getTargetMachine().getObjFileLowering()); if (TLOF->IsGlobalInSmallSection(GV, getTargetMachine())) // %gp_rel relocation return getAddrGPRel(N, SDLoc(N), Ty, DAG); // %hi/%lo relocation return getAddrNonPIC(N, SDLoc(N), Ty, DAG); } // Every other architecture would use shouldAssumeDSOLocal in here, but // mips is special. // * In PIC code mips requires got loads even for local statics! // * To save on got entries, for local statics the got entry contains the // page and an additional add instruction takes care of the low bits. // * It is legal to access a hidden symbol with a non hidden undefined, // so one cannot guarantee that all access to a hidden symbol will know // it is hidden. // * Mips linkers don't support creating a page and a full got entry for // the same symbol. // * Given all that, we have to use a full got entry for hidden symbols :-( if (GV->hasLocalLinkage()) return getAddrLocal(N, SDLoc(N), Ty, DAG, ABI.IsN32() || ABI.IsN64()); if (LargeGOT) return getAddrGlobalLargeGOT( N, SDLoc(N), Ty, DAG, MipsII::MO_GOT_HI16, MipsII::MO_GOT_LO16, DAG.getEntryNode(), MachinePointerInfo::getGOT(DAG.getMachineFunction())); return getAddrGlobal( N, SDLoc(N), Ty, DAG, (ABI.IsN32() || ABI.IsN64()) ? MipsII::MO_GOT_DISP : MipsII::MO_GOT, DAG.getEntryNode(), MachinePointerInfo::getGOT(DAG.getMachineFunction())); } SDValue MipsTargetLowering::lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { BlockAddressSDNode *N = cast(Op); EVT Ty = Op.getValueType(); if (!isPositionIndependent() && !ABI.IsN64()) return getAddrNonPIC(N, SDLoc(N), Ty, DAG); return getAddrLocal(N, SDLoc(N), Ty, DAG, ABI.IsN32() || ABI.IsN64()); } SDValue MipsTargetLowering:: lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { // If the relocation model is PIC, use the General Dynamic TLS Model or // Local Dynamic TLS model, otherwise use the Initial Exec or // Local Exec TLS Model. GlobalAddressSDNode *GA = cast(Op); if (DAG.getTarget().Options.EmulatedTLS) return LowerToTLSEmulatedModel(GA, DAG); SDLoc DL(GA); const GlobalValue *GV = GA->getGlobal(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); TLSModel::Model model = getTargetMachine().getTLSModel(GV); if (model == TLSModel::GeneralDynamic || model == TLSModel::LocalDynamic) { // General Dynamic and Local Dynamic TLS Model. unsigned Flag = (model == TLSModel::LocalDynamic) ? MipsII::MO_TLSLDM : MipsII::MO_TLSGD; SDValue TGA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, Flag); SDValue Argument = DAG.getNode(MipsISD::Wrapper, DL, PtrVT, getGlobalReg(DAG, PtrVT), TGA); unsigned PtrSize = PtrVT.getSizeInBits(); IntegerType *PtrTy = Type::getIntNTy(*DAG.getContext(), PtrSize); SDValue TlsGetAddr = DAG.getExternalSymbol("__tls_get_addr", PtrVT); ArgListTy Args; ArgListEntry Entry; Entry.Node = Argument; Entry.Ty = PtrTy; Args.push_back(Entry); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(DL).setChain(DAG.getEntryNode()) .setCallee(CallingConv::C, PtrTy, TlsGetAddr, std::move(Args)); std::pair CallResult = LowerCallTo(CLI); SDValue Ret = CallResult.first; if (model != TLSModel::LocalDynamic) return Ret; SDValue TGAHi = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, MipsII::MO_DTPREL_HI); SDValue Hi = DAG.getNode(MipsISD::Hi, DL, PtrVT, TGAHi); SDValue TGALo = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, MipsII::MO_DTPREL_LO); SDValue Lo = DAG.getNode(MipsISD::Lo, DL, PtrVT, TGALo); SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Ret); return DAG.getNode(ISD::ADD, DL, PtrVT, Add, Lo); } SDValue Offset; if (model == TLSModel::InitialExec) { // Initial Exec TLS Model SDValue TGA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, MipsII::MO_GOTTPREL); TGA = DAG.getNode(MipsISD::Wrapper, DL, PtrVT, getGlobalReg(DAG, PtrVT), TGA); Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), TGA, MachinePointerInfo()); } else { // Local Exec TLS Model assert(model == TLSModel::LocalExec); SDValue TGAHi = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, MipsII::MO_TPREL_HI); SDValue TGALo = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, MipsII::MO_TPREL_LO); SDValue Hi = DAG.getNode(MipsISD::Hi, DL, PtrVT, TGAHi); SDValue Lo = DAG.getNode(MipsISD::Lo, DL, PtrVT, TGALo); Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo); } SDValue ThreadPointer = DAG.getNode(MipsISD::ThreadPointer, DL, PtrVT); return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadPointer, Offset); } SDValue MipsTargetLowering:: lowerJumpTable(SDValue Op, SelectionDAG &DAG) const { JumpTableSDNode *N = cast(Op); EVT Ty = Op.getValueType(); if (!isPositionIndependent() && !ABI.IsN64()) return getAddrNonPIC(N, SDLoc(N), Ty, DAG); return getAddrLocal(N, SDLoc(N), Ty, DAG, ABI.IsN32() || ABI.IsN64()); } SDValue MipsTargetLowering:: lowerConstantPool(SDValue Op, SelectionDAG &DAG) const { ConstantPoolSDNode *N = cast(Op); EVT Ty = Op.getValueType(); if (!isPositionIndependent() && !ABI.IsN64()) { const MipsTargetObjectFile *TLOF = static_cast( getTargetMachine().getObjFileLowering()); if (TLOF->IsConstantInSmallSection(DAG.getDataLayout(), N->getConstVal(), getTargetMachine())) // %gp_rel relocation return getAddrGPRel(N, SDLoc(N), Ty, DAG); return getAddrNonPIC(N, SDLoc(N), Ty, DAG); } return getAddrLocal(N, SDLoc(N), Ty, DAG, ABI.IsN32() || ABI.IsN64()); } SDValue MipsTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MipsFunctionInfo *FuncInfo = MF.getInfo(); SDLoc DL(Op); SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), getPointerTy(MF.getDataLayout())); // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. const Value *SV = cast(Op.getOperand(2))->getValue(); return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), MachinePointerInfo(SV)); } SDValue MipsTargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); EVT VT = Node->getValueType(0); SDValue Chain = Node->getOperand(0); SDValue VAListPtr = Node->getOperand(1); unsigned Align = Node->getConstantOperandVal(3); const Value *SV = cast(Node->getOperand(2))->getValue(); SDLoc DL(Node); unsigned ArgSlotSizeInBytes = (ABI.IsN32() || ABI.IsN64()) ? 8 : 4; SDValue VAListLoad = DAG.getLoad(getPointerTy(DAG.getDataLayout()), DL, Chain, VAListPtr, MachinePointerInfo(SV)); SDValue VAList = VAListLoad; // Re-align the pointer if necessary. // It should only ever be necessary for 64-bit types on O32 since the minimum // argument alignment is the same as the maximum type alignment for N32/N64. // // FIXME: We currently align too often. The code generator doesn't notice // when the pointer is still aligned from the last va_arg (or pair of // va_args for the i64 on O32 case). if (Align > getMinStackArgumentAlignment()) { assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2"); VAList = DAG.getNode(ISD::ADD, DL, VAList.getValueType(), VAList, DAG.getConstant(Align - 1, DL, VAList.getValueType())); VAList = DAG.getNode(ISD::AND, DL, VAList.getValueType(), VAList, DAG.getConstant(-(int64_t)Align, DL, VAList.getValueType())); } // Increment the pointer, VAList, to the next vaarg. auto &TD = DAG.getDataLayout(); unsigned ArgSizeInBytes = TD.getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())); SDValue Tmp3 = DAG.getNode(ISD::ADD, DL, VAList.getValueType(), VAList, DAG.getConstant(alignTo(ArgSizeInBytes, ArgSlotSizeInBytes), DL, VAList.getValueType())); // Store the incremented VAList to the legalized pointer Chain = DAG.getStore(VAListLoad.getValue(1), DL, Tmp3, VAListPtr, MachinePointerInfo(SV)); // In big-endian mode we must adjust the pointer when the load size is smaller // than the argument slot size. We must also reduce the known alignment to // match. For example in the N64 ABI, we must add 4 bytes to the offset to get // the correct half of the slot, and reduce the alignment from 8 (slot // alignment) down to 4 (type alignment). if (!Subtarget.isLittle() && ArgSizeInBytes < ArgSlotSizeInBytes) { unsigned Adjustment = ArgSlotSizeInBytes - ArgSizeInBytes; VAList = DAG.getNode(ISD::ADD, DL, VAListPtr.getValueType(), VAList, DAG.getIntPtrConstant(Adjustment, DL)); } // Load the actual argument out of the pointer VAList return DAG.getLoad(VT, DL, Chain, VAList, MachinePointerInfo()); } static SDValue lowerFCOPYSIGN32(SDValue Op, SelectionDAG &DAG, bool HasExtractInsert) { EVT TyX = Op.getOperand(0).getValueType(); EVT TyY = Op.getOperand(1).getValueType(); SDLoc DL(Op); SDValue Const1 = DAG.getConstant(1, DL, MVT::i32); SDValue Const31 = DAG.getConstant(31, DL, MVT::i32); SDValue Res; // If operand is of type f64, extract the upper 32-bit. Otherwise, bitcast it // to i32. SDValue X = (TyX == MVT::f32) ? DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op.getOperand(0)) : DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0), Const1); SDValue Y = (TyY == MVT::f32) ? DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op.getOperand(1)) : DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(1), Const1); if (HasExtractInsert) { // ext E, Y, 31, 1 ; extract bit31 of Y // ins X, E, 31, 1 ; insert extracted bit at bit31 of X SDValue E = DAG.getNode(MipsISD::Ext, DL, MVT::i32, Y, Const31, Const1); Res = DAG.getNode(MipsISD::Ins, DL, MVT::i32, E, Const31, Const1, X); } else { // sll SllX, X, 1 // srl SrlX, SllX, 1 // srl SrlY, Y, 31 // sll SllY, SrlX, 31 // or Or, SrlX, SllY SDValue SllX = DAG.getNode(ISD::SHL, DL, MVT::i32, X, Const1); SDValue SrlX = DAG.getNode(ISD::SRL, DL, MVT::i32, SllX, Const1); SDValue SrlY = DAG.getNode(ISD::SRL, DL, MVT::i32, Y, Const31); SDValue SllY = DAG.getNode(ISD::SHL, DL, MVT::i32, SrlY, Const31); Res = DAG.getNode(ISD::OR, DL, MVT::i32, SrlX, SllY); } if (TyX == MVT::f32) return DAG.getNode(ISD::BITCAST, DL, Op.getOperand(0).getValueType(), Res); SDValue LowX = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32)); return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, LowX, Res); } static SDValue lowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG, bool HasExtractInsert) { unsigned WidthX = Op.getOperand(0).getValueSizeInBits(); unsigned WidthY = Op.getOperand(1).getValueSizeInBits(); EVT TyX = MVT::getIntegerVT(WidthX), TyY = MVT::getIntegerVT(WidthY); SDLoc DL(Op); SDValue Const1 = DAG.getConstant(1, DL, MVT::i32); // Bitcast to integer nodes. SDValue X = DAG.getNode(ISD::BITCAST, DL, TyX, Op.getOperand(0)); SDValue Y = DAG.getNode(ISD::BITCAST, DL, TyY, Op.getOperand(1)); if (HasExtractInsert) { // ext E, Y, width(Y) - 1, 1 ; extract bit width(Y)-1 of Y // ins X, E, width(X) - 1, 1 ; insert extracted bit at bit width(X)-1 of X SDValue E = DAG.getNode(MipsISD::Ext, DL, TyY, Y, DAG.getConstant(WidthY - 1, DL, MVT::i32), Const1); if (WidthX > WidthY) E = DAG.getNode(ISD::ZERO_EXTEND, DL, TyX, E); else if (WidthY > WidthX) E = DAG.getNode(ISD::TRUNCATE, DL, TyX, E); SDValue I = DAG.getNode(MipsISD::Ins, DL, TyX, E, DAG.getConstant(WidthX - 1, DL, MVT::i32), Const1, X); return DAG.getNode(ISD::BITCAST, DL, Op.getOperand(0).getValueType(), I); } // (d)sll SllX, X, 1 // (d)srl SrlX, SllX, 1 // (d)srl SrlY, Y, width(Y)-1 // (d)sll SllY, SrlX, width(Y)-1 // or Or, SrlX, SllY SDValue SllX = DAG.getNode(ISD::SHL, DL, TyX, X, Const1); SDValue SrlX = DAG.getNode(ISD::SRL, DL, TyX, SllX, Const1); SDValue SrlY = DAG.getNode(ISD::SRL, DL, TyY, Y, DAG.getConstant(WidthY - 1, DL, MVT::i32)); if (WidthX > WidthY) SrlY = DAG.getNode(ISD::ZERO_EXTEND, DL, TyX, SrlY); else if (WidthY > WidthX) SrlY = DAG.getNode(ISD::TRUNCATE, DL, TyX, SrlY); SDValue SllY = DAG.getNode(ISD::SHL, DL, TyX, SrlY, DAG.getConstant(WidthX - 1, DL, MVT::i32)); SDValue Or = DAG.getNode(ISD::OR, DL, TyX, SrlX, SllY); return DAG.getNode(ISD::BITCAST, DL, Op.getOperand(0).getValueType(), Or); } SDValue MipsTargetLowering::lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { if (Subtarget.isGP64bit()) return lowerFCOPYSIGN64(Op, DAG, Subtarget.hasExtractInsert()); return lowerFCOPYSIGN32(Op, DAG, Subtarget.hasExtractInsert()); } SDValue MipsTargetLowering:: lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { // check the depth assert((cast(Op.getOperand(0))->getZExtValue() == 0) && "Frame address can only be determined for current frame."); MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); MFI->setFrameAddressIsTaken(true); EVT VT = Op.getValueType(); SDLoc DL(Op); SDValue FrameAddr = DAG.getCopyFromReg( DAG.getEntryNode(), DL, ABI.IsN64() ? Mips::FP_64 : Mips::FP, VT); return FrameAddr; } SDValue MipsTargetLowering::lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { if (verifyReturnAddressArgumentIsConstant(Op, DAG)) return SDValue(); // check the depth assert((cast(Op.getOperand(0))->getZExtValue() == 0) && "Return address can be determined only for current frame."); MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MVT VT = Op.getSimpleValueType(); unsigned RA = ABI.IsN64() ? Mips::RA_64 : Mips::RA; MFI->setReturnAddressIsTaken(true); // Return RA, which contains the return address. Mark it an implicit live-in. unsigned Reg = MF.addLiveIn(RA, getRegClassFor(VT)); return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, VT); } // An EH_RETURN is the result of lowering llvm.eh.return which in turn is // generated from __builtin_eh_return (offset, handler) // The effect of this is to adjust the stack pointer by "offset" // and then branch to "handler". SDValue MipsTargetLowering::lowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MipsFunctionInfo *MipsFI = MF.getInfo(); MipsFI->setCallsEhReturn(); SDValue Chain = Op.getOperand(0); SDValue Offset = Op.getOperand(1); SDValue Handler = Op.getOperand(2); SDLoc DL(Op); EVT Ty = ABI.IsN64() ? MVT::i64 : MVT::i32; // Store stack offset in V1, store jump target in V0. Glue CopyToReg and // EH_RETURN nodes, so that instructions are emitted back-to-back. unsigned OffsetReg = ABI.IsN64() ? Mips::V1_64 : Mips::V1; unsigned AddrReg = ABI.IsN64() ? Mips::V0_64 : Mips::V0; Chain = DAG.getCopyToReg(Chain, DL, OffsetReg, Offset, SDValue()); Chain = DAG.getCopyToReg(Chain, DL, AddrReg, Handler, Chain.getValue(1)); return DAG.getNode(MipsISD::EH_RETURN, DL, MVT::Other, Chain, DAG.getRegister(OffsetReg, Ty), DAG.getRegister(AddrReg, getPointerTy(MF.getDataLayout())), Chain.getValue(1)); } SDValue MipsTargetLowering::lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const { // FIXME: Need pseudo-fence for 'singlethread' fences // FIXME: Set SType for weaker fences where supported/appropriate. unsigned SType = 0; SDLoc DL(Op); return DAG.getNode(MipsISD::Sync, DL, MVT::Other, Op.getOperand(0), DAG.getConstant(SType, DL, MVT::i32)); } SDValue MipsTargetLowering::lowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); MVT VT = Subtarget.isGP64bit() ? MVT::i64 : MVT::i32; SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1); SDValue Shamt = Op.getOperand(2); // if shamt < (VT.bits): // lo = (shl lo, shamt) // hi = (or (shl hi, shamt) (srl (srl lo, 1), ~shamt)) // else: // lo = 0 // hi = (shl lo, shamt[4:0]) SDValue Not = DAG.getNode(ISD::XOR, DL, MVT::i32, Shamt, DAG.getConstant(-1, DL, MVT::i32)); SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, DAG.getConstant(1, DL, VT)); SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, Not); SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); SDValue ShiftLeftLo = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); SDValue Cond = DAG.getNode(ISD::AND, DL, MVT::i32, Shamt, DAG.getConstant(VT.getSizeInBits(), DL, MVT::i32)); Lo = DAG.getNode(ISD::SELECT, DL, VT, Cond, DAG.getConstant(0, DL, VT), ShiftLeftLo); Hi = DAG.getNode(ISD::SELECT, DL, VT, Cond, ShiftLeftLo, Or); SDValue Ops[2] = {Lo, Hi}; return DAG.getMergeValues(Ops, DL); } SDValue MipsTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, bool IsSRA) const { SDLoc DL(Op); SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1); SDValue Shamt = Op.getOperand(2); MVT VT = Subtarget.isGP64bit() ? MVT::i64 : MVT::i32; // if shamt < (VT.bits): // lo = (or (shl (shl hi, 1), ~shamt) (srl lo, shamt)) // if isSRA: // hi = (sra hi, shamt) // else: // hi = (srl hi, shamt) // else: // if isSRA: // lo = (sra hi, shamt[4:0]) // hi = (sra hi, 31) // else: // lo = (srl hi, shamt[4:0]) // hi = 0 SDValue Not = DAG.getNode(ISD::XOR, DL, MVT::i32, Shamt, DAG.getConstant(-1, DL, MVT::i32)); SDValue ShiftLeft1Hi = DAG.getNode(ISD::SHL, DL, VT, Hi, DAG.getConstant(1, DL, VT)); SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, ShiftLeft1Hi, Not); SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); SDValue ShiftRightHi = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shamt); SDValue Cond = DAG.getNode(ISD::AND, DL, MVT::i32, Shamt, DAG.getConstant(VT.getSizeInBits(), DL, MVT::i32)); SDValue Ext = DAG.getNode(ISD::SRA, DL, VT, Hi, DAG.getConstant(VT.getSizeInBits() - 1, DL, VT)); Lo = DAG.getNode(ISD::SELECT, DL, VT, Cond, ShiftRightHi, Or); Hi = DAG.getNode(ISD::SELECT, DL, VT, Cond, IsSRA ? Ext : DAG.getConstant(0, DL, VT), ShiftRightHi); SDValue Ops[2] = {Lo, Hi}; return DAG.getMergeValues(Ops, DL); } static SDValue createLoadLR(unsigned Opc, SelectionDAG &DAG, LoadSDNode *LD, SDValue Chain, SDValue Src, unsigned Offset) { SDValue Ptr = LD->getBasePtr(); EVT VT = LD->getValueType(0), MemVT = LD->getMemoryVT(); EVT BasePtrVT = Ptr.getValueType(); SDLoc DL(LD); SDVTList VTList = DAG.getVTList(VT, MVT::Other); if (Offset) Ptr = DAG.getNode(ISD::ADD, DL, BasePtrVT, Ptr, DAG.getConstant(Offset, DL, BasePtrVT)); SDValue Ops[] = { Chain, Ptr, Src }; return DAG.getMemIntrinsicNode(Opc, DL, VTList, Ops, MemVT, LD->getMemOperand()); } // Expand an unaligned 32 or 64-bit integer load node. SDValue MipsTargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const { LoadSDNode *LD = cast(Op); EVT MemVT = LD->getMemoryVT(); if (Subtarget.systemSupportsUnalignedAccess()) return Op; // Return if load is aligned or if MemVT is neither i32 nor i64. if ((LD->getAlignment() >= MemVT.getSizeInBits() / 8) || ((MemVT != MVT::i32) && (MemVT != MVT::i64))) return SDValue(); bool IsLittle = Subtarget.isLittle(); EVT VT = Op.getValueType(); ISD::LoadExtType ExtType = LD->getExtensionType(); SDValue Chain = LD->getChain(), Undef = DAG.getUNDEF(VT); assert((VT == MVT::i32) || (VT == MVT::i64)); // Expand // (set dst, (i64 (load baseptr))) // to // (set tmp, (ldl (add baseptr, 7), undef)) // (set dst, (ldr baseptr, tmp)) if ((VT == MVT::i64) && (ExtType == ISD::NON_EXTLOAD)) { SDValue LDL = createLoadLR(MipsISD::LDL, DAG, LD, Chain, Undef, IsLittle ? 7 : 0); return createLoadLR(MipsISD::LDR, DAG, LD, LDL.getValue(1), LDL, IsLittle ? 0 : 7); } SDValue LWL = createLoadLR(MipsISD::LWL, DAG, LD, Chain, Undef, IsLittle ? 3 : 0); SDValue LWR = createLoadLR(MipsISD::LWR, DAG, LD, LWL.getValue(1), LWL, IsLittle ? 0 : 3); // Expand // (set dst, (i32 (load baseptr))) or // (set dst, (i64 (sextload baseptr))) or // (set dst, (i64 (extload baseptr))) // to // (set tmp, (lwl (add baseptr, 3), undef)) // (set dst, (lwr baseptr, tmp)) if ((VT == MVT::i32) || (ExtType == ISD::SEXTLOAD) || (ExtType == ISD::EXTLOAD)) return LWR; assert((VT == MVT::i64) && (ExtType == ISD::ZEXTLOAD)); // Expand // (set dst, (i64 (zextload baseptr))) // to // (set tmp0, (lwl (add baseptr, 3), undef)) // (set tmp1, (lwr baseptr, tmp0)) // (set tmp2, (shl tmp1, 32)) // (set dst, (srl tmp2, 32)) SDLoc DL(LD); SDValue Const32 = DAG.getConstant(32, DL, MVT::i32); SDValue SLL = DAG.getNode(ISD::SHL, DL, MVT::i64, LWR, Const32); SDValue SRL = DAG.getNode(ISD::SRL, DL, MVT::i64, SLL, Const32); SDValue Ops[] = { SRL, LWR.getValue(1) }; return DAG.getMergeValues(Ops, DL); } static SDValue createStoreLR(unsigned Opc, SelectionDAG &DAG, StoreSDNode *SD, SDValue Chain, unsigned Offset) { SDValue Ptr = SD->getBasePtr(), Value = SD->getValue(); EVT MemVT = SD->getMemoryVT(), BasePtrVT = Ptr.getValueType(); SDLoc DL(SD); SDVTList VTList = DAG.getVTList(MVT::Other); if (Offset) Ptr = DAG.getNode(ISD::ADD, DL, BasePtrVT, Ptr, DAG.getConstant(Offset, DL, BasePtrVT)); SDValue Ops[] = { Chain, Value, Ptr }; return DAG.getMemIntrinsicNode(Opc, DL, VTList, Ops, MemVT, SD->getMemOperand()); } // Expand an unaligned 32 or 64-bit integer store node. static SDValue lowerUnalignedIntStore(StoreSDNode *SD, SelectionDAG &DAG, bool IsLittle) { SDValue Value = SD->getValue(), Chain = SD->getChain(); EVT VT = Value.getValueType(); // Expand // (store val, baseptr) or // (truncstore val, baseptr) // to // (swl val, (add baseptr, 3)) // (swr val, baseptr) if ((VT == MVT::i32) || SD->isTruncatingStore()) { SDValue SWL = createStoreLR(MipsISD::SWL, DAG, SD, Chain, IsLittle ? 3 : 0); return createStoreLR(MipsISD::SWR, DAG, SD, SWL, IsLittle ? 0 : 3); } assert(VT == MVT::i64); // Expand // (store val, baseptr) // to // (sdl val, (add baseptr, 7)) // (sdr val, baseptr) SDValue SDL = createStoreLR(MipsISD::SDL, DAG, SD, Chain, IsLittle ? 7 : 0); return createStoreLR(MipsISD::SDR, DAG, SD, SDL, IsLittle ? 0 : 7); } // Lower (store (fp_to_sint $fp) $ptr) to (store (TruncIntFP $fp), $ptr). static SDValue lowerFP_TO_SINT_STORE(StoreSDNode *SD, SelectionDAG &DAG) { SDValue Val = SD->getValue(); if (Val.getOpcode() != ISD::FP_TO_SINT) return SDValue(); EVT FPTy = EVT::getFloatingPointVT(Val.getValueSizeInBits()); SDValue Tr = DAG.getNode(MipsISD::TruncIntFP, SDLoc(Val), FPTy, Val.getOperand(0)); return DAG.getStore(SD->getChain(), SDLoc(SD), Tr, SD->getBasePtr(), SD->getPointerInfo(), SD->getAlignment(), SD->getMemOperand()->getFlags()); } SDValue MipsTargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { StoreSDNode *SD = cast(Op); EVT MemVT = SD->getMemoryVT(); // Lower unaligned integer stores. if (!Subtarget.systemSupportsUnalignedAccess() && (SD->getAlignment() < MemVT.getSizeInBits() / 8) && ((MemVT == MVT::i32) || (MemVT == MVT::i64))) return lowerUnalignedIntStore(SD, DAG, Subtarget.isLittle()); return lowerFP_TO_SINT_STORE(SD, DAG); } -SDValue MipsTargetLowering::lowerADD(SDValue Op, SelectionDAG &DAG) const { - if (Op->getOperand(0).getOpcode() != ISD::FRAMEADDR - || cast - (Op->getOperand(0).getOperand(0))->getZExtValue() != 0 - || Op->getOperand(1).getOpcode() != ISD::FRAME_TO_ARGS_OFFSET) - return SDValue(); +SDValue MipsTargetLowering::lowerEH_DWARF_CFA(SDValue Op, + SelectionDAG &DAG) const { - // The pattern - // (add (frameaddr 0), (frame_to_args_offset)) - // results from lowering llvm.eh.dwarf.cfa intrinsic. Transform it to - // (add FrameObject, 0) - // where FrameObject is a fixed StackObject with offset 0 which points to - // the old stack pointer. + // Return a fixed StackObject with offset 0 which points to the old stack + // pointer. MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); EVT ValTy = Op->getValueType(0); int FI = MFI->CreateFixedObject(Op.getValueSizeInBits() / 8, 0, false); - SDValue InArgsAddr = DAG.getFrameIndex(FI, ValTy); - SDLoc DL(Op); - return DAG.getNode(ISD::ADD, DL, ValTy, InArgsAddr, - DAG.getConstant(0, DL, ValTy)); + return DAG.getFrameIndex(FI, ValTy); } SDValue MipsTargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const { EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits()); SDValue Trunc = DAG.getNode(MipsISD::TruncIntFP, SDLoc(Op), FPTy, Op.getOperand(0)); return DAG.getNode(ISD::BITCAST, SDLoc(Op), Op.getValueType(), Trunc); } //===----------------------------------------------------------------------===// // Calling Convention Implementation //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // TODO: Implement a generic logic using tblgen that can support this. // Mips O32 ABI rules: // --- // i32 - Passed in A0, A1, A2, A3 and stack // f32 - Only passed in f32 registers if no int reg has been used yet to hold // an argument. Otherwise, passed in A1, A2, A3 and stack. // f64 - Only passed in two aliased f32 registers if no int reg has been used // yet to hold an argument. Otherwise, use A2, A3 and stack. If A1 is // not used, it must be shadowed. If only A3 is available, shadow it and // go to stack. // // For vararg functions, all arguments are passed in A0, A1, A2, A3 and stack. //===----------------------------------------------------------------------===// static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, ArrayRef F64Regs) { const MipsSubtarget &Subtarget = static_cast( State.getMachineFunction().getSubtarget()); static const MCPhysReg IntRegs[] = { Mips::A0, Mips::A1, Mips::A2, Mips::A3 }; static const MCPhysReg F32Regs[] = { Mips::F12, Mips::F14 }; // Do not process byval args here. if (ArgFlags.isByVal()) return true; // Promote i8 and i16 if (ArgFlags.isInReg() && !Subtarget.isLittle()) { if (LocVT == MVT::i8 || LocVT == MVT::i16 || LocVT == MVT::i32) { LocVT = MVT::i32; if (ArgFlags.isSExt()) LocInfo = CCValAssign::SExtUpper; else if (ArgFlags.isZExt()) LocInfo = CCValAssign::ZExtUpper; else LocInfo = CCValAssign::AExtUpper; } } // Promote i8 and i16 if (LocVT == MVT::i8 || LocVT == MVT::i16) { LocVT = MVT::i32; if (ArgFlags.isSExt()) LocInfo = CCValAssign::SExt; else if (ArgFlags.isZExt()) LocInfo = CCValAssign::ZExt; else LocInfo = CCValAssign::AExt; } unsigned Reg; // f32 and f64 are allocated in A0, A1, A2, A3 when either of the following // is true: function is vararg, argument is 3rd or higher, there is previous // argument which is not f32 or f64. bool AllocateFloatsInIntReg = State.isVarArg() || ValNo > 1 || State.getFirstUnallocated(F32Regs) != ValNo; unsigned OrigAlign = ArgFlags.getOrigAlign(); bool isI64 = (ValVT == MVT::i32 && OrigAlign == 8); if (ValVT == MVT::i32 || (ValVT == MVT::f32 && AllocateFloatsInIntReg)) { Reg = State.AllocateReg(IntRegs); // If this is the first part of an i64 arg, // the allocated register must be either A0 or A2. if (isI64 && (Reg == Mips::A1 || Reg == Mips::A3)) Reg = State.AllocateReg(IntRegs); LocVT = MVT::i32; } else if (ValVT == MVT::f64 && AllocateFloatsInIntReg) { // Allocate int register and shadow next int register. If first // available register is Mips::A1 or Mips::A3, shadow it too. Reg = State.AllocateReg(IntRegs); if (Reg == Mips::A1 || Reg == Mips::A3) Reg = State.AllocateReg(IntRegs); State.AllocateReg(IntRegs); LocVT = MVT::i32; } else if (ValVT.isFloatingPoint() && !AllocateFloatsInIntReg) { // we are guaranteed to find an available float register if (ValVT == MVT::f32) { Reg = State.AllocateReg(F32Regs); // Shadow int register State.AllocateReg(IntRegs); } else { Reg = State.AllocateReg(F64Regs); // Shadow int registers unsigned Reg2 = State.AllocateReg(IntRegs); if (Reg2 == Mips::A1 || Reg2 == Mips::A3) State.AllocateReg(IntRegs); State.AllocateReg(IntRegs); } } else llvm_unreachable("Cannot handle this ValVT."); if (!Reg) { unsigned Offset = State.AllocateStack(ValVT.getSizeInBits() >> 3, OrigAlign); State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); } else State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } static bool CC_MipsO32_FP32(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) { static const MCPhysReg F64Regs[] = { Mips::D6, Mips::D7 }; return CC_MipsO32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, F64Regs); } static bool CC_MipsO32_FP64(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) { static const MCPhysReg F64Regs[] = { Mips::D12_64, Mips::D14_64 }; return CC_MipsO32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, F64Regs); } static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) LLVM_ATTRIBUTE_UNUSED; #include "MipsGenCallingConv.inc" //===----------------------------------------------------------------------===// // Call Calling Convention Implementation //===----------------------------------------------------------------------===// // Return next O32 integer argument register. static unsigned getNextIntArgReg(unsigned Reg) { assert((Reg == Mips::A0) || (Reg == Mips::A2)); return (Reg == Mips::A0) ? Mips::A1 : Mips::A3; } SDValue MipsTargetLowering::passArgOnStack(SDValue StackPtr, unsigned Offset, SDValue Chain, SDValue Arg, const SDLoc &DL, bool IsTailCall, SelectionDAG &DAG) const { if (!IsTailCall) { SDValue PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(DAG.getDataLayout()), StackPtr, DAG.getIntPtrConstant(Offset, DL)); return DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo()); } MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); int FI = MFI->CreateFixedObject(Arg.getValueSizeInBits() / 8, Offset, false); SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); return DAG.getStore(Chain, DL, Arg, FIN, MachinePointerInfo(), /* Alignment = */ 0, MachineMemOperand::MOVolatile); } void MipsTargetLowering:: getOpndList(SmallVectorImpl &Ops, std::deque< std::pair > &RegsToPass, bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const { // Insert node "GP copy globalreg" before call to function. // // R_MIPS_CALL* operators (emitted when non-internal functions are called // in PIC mode) allow symbols to be resolved via lazy binding. // The lazy binding stub requires GP to point to the GOT. // Note that we don't need GP to point to the GOT for indirect calls // (when R_MIPS_CALL* is not used for the call) because Mips linker generates // lazy binding stub for a function only when R_MIPS_CALL* are the only relocs // used for the function (that is, Mips linker doesn't generate lazy binding // stub for a function whose address is taken in the program). if (IsPICCall && !InternalLinkage && IsCallReloc) { unsigned GPReg = ABI.IsN64() ? Mips::GP_64 : Mips::GP; EVT Ty = ABI.IsN64() ? MVT::i64 : MVT::i32; RegsToPass.push_back(std::make_pair(GPReg, getGlobalReg(CLI.DAG, Ty))); } // Build a sequence of copy-to-reg nodes chained together with token // chain and flag operands which copy the outgoing args into registers. // The InFlag in necessary since all emitted instructions must be // stuck together. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = CLI.DAG.getCopyToReg(Chain, CLI.DL, RegsToPass[i].first, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } // Add argument registers to the end of the list so that they are // known live into the call. for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) Ops.push_back(CLI.DAG.getRegister(RegsToPass[i].first, RegsToPass[i].second.getValueType())); // Add a register mask operand representing the call-preserved registers. const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); const uint32_t *Mask = TRI->getCallPreservedMask(CLI.DAG.getMachineFunction(), CLI.CallConv); assert(Mask && "Missing call preserved mask for calling convention"); if (Subtarget.inMips16HardFloat()) { if (GlobalAddressSDNode *G = dyn_cast(CLI.Callee)) { llvm::StringRef Sym = G->getGlobal()->getName(); Function *F = G->getGlobal()->getParent()->getFunction(Sym); if (F && F->hasFnAttribute("__Mips16RetHelper")) { Mask = MipsRegisterInfo::getMips16RetHelperMask(); } } } Ops.push_back(CLI.DAG.getRegisterMask(Mask)); if (InFlag.getNode()) Ops.push_back(InFlag); } /// LowerCall - functions arguments are copied from virtual regs to /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted. SDValue MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { SelectionDAG &DAG = CLI.DAG; SDLoc DL = CLI.DL; SmallVectorImpl &Outs = CLI.Outs; SmallVectorImpl &OutVals = CLI.OutVals; SmallVectorImpl &Ins = CLI.Ins; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; bool &IsTailCall = CLI.IsTailCall; CallingConv::ID CallConv = CLI.CallConv; bool IsVarArg = CLI.IsVarArg; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); const TargetFrameLowering *TFL = Subtarget.getFrameLowering(); MipsFunctionInfo *FuncInfo = MF.getInfo(); bool IsPIC = isPositionIndependent(); // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; MipsCCState CCInfo( CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext(), MipsCCState::getSpecialCallingConvForCallee(Callee.getNode(), Subtarget)); // Allocate the reserved argument area. It seems strange to do this from the // caller side but removing it breaks the frame size calculation. CCInfo.AllocateStack(ABI.GetCalleeAllocdArgSizeInBytes(CallConv), 1); CCInfo.AnalyzeCallOperands(Outs, CC_Mips, CLI.getArgs(), Callee.getNode()); // Get a count of how many bytes are to be pushed on the stack. unsigned NextStackOffset = CCInfo.getNextStackOffset(); // Check if it's really possible to do a tail call. if (IsTailCall) IsTailCall = isEligibleForTailCallOptimization( CCInfo, NextStackOffset, *MF.getInfo()); if (!IsTailCall && CLI.CS && CLI.CS->isMustTailCall()) report_fatal_error("failed to perform tail call elimination on a call " "site marked musttail"); if (IsTailCall) ++NumTailCalls; // Chain is the output chain of the last Load/Store or CopyToReg node. // ByValChain is the output chain of the last Memcpy node created for copying // byval arguments to the stack. unsigned StackAlignment = TFL->getStackAlignment(); NextStackOffset = alignTo(NextStackOffset, StackAlignment); SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, DL, true); if (!IsTailCall) Chain = DAG.getCALLSEQ_START(Chain, NextStackOffsetVal, DL); SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, ABI.IsN64() ? Mips::SP_64 : Mips::SP, getPointerTy(DAG.getDataLayout())); std::deque< std::pair > RegsToPass; SmallVector MemOpChains; CCInfo.rewindByValRegsInfo(); // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { SDValue Arg = OutVals[i]; CCValAssign &VA = ArgLocs[i]; MVT ValVT = VA.getValVT(), LocVT = VA.getLocVT(); ISD::ArgFlagsTy Flags = Outs[i].Flags; bool UseUpperBits = false; // ByVal Arg. if (Flags.isByVal()) { unsigned FirstByValReg, LastByValReg; unsigned ByValIdx = CCInfo.getInRegsParamsProcessed(); CCInfo.getInRegsParamInfo(ByValIdx, FirstByValReg, LastByValReg); assert(Flags.getByValSize() && "ByVal args of size 0 should have been ignored by front-end."); assert(ByValIdx < CCInfo.getInRegsParamsCount()); assert(!IsTailCall && "Do not tail-call optimize if there is a byval argument."); passByValArg(Chain, DL, RegsToPass, MemOpChains, StackPtr, MFI, DAG, Arg, FirstByValReg, LastByValReg, Flags, Subtarget.isLittle(), VA); CCInfo.nextInRegsParam(); continue; } // Promote the value if needed. switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: if (VA.isRegLoc()) { if ((ValVT == MVT::f32 && LocVT == MVT::i32) || (ValVT == MVT::f64 && LocVT == MVT::i64) || (ValVT == MVT::i64 && LocVT == MVT::f64)) Arg = DAG.getNode(ISD::BITCAST, DL, LocVT, Arg); else if (ValVT == MVT::f64 && LocVT == MVT::i32) { SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Arg, DAG.getConstant(0, DL, MVT::i32)); SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Arg, DAG.getConstant(1, DL, MVT::i32)); if (!Subtarget.isLittle()) std::swap(Lo, Hi); unsigned LocRegLo = VA.getLocReg(); unsigned LocRegHigh = getNextIntArgReg(LocRegLo); RegsToPass.push_back(std::make_pair(LocRegLo, Lo)); RegsToPass.push_back(std::make_pair(LocRegHigh, Hi)); continue; } } break; case CCValAssign::BCvt: Arg = DAG.getNode(ISD::BITCAST, DL, LocVT, Arg); break; case CCValAssign::SExtUpper: UseUpperBits = true; // Fallthrough case CCValAssign::SExt: Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, LocVT, Arg); break; case CCValAssign::ZExtUpper: UseUpperBits = true; // Fallthrough case CCValAssign::ZExt: Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, LocVT, Arg); break; case CCValAssign::AExtUpper: UseUpperBits = true; // Fallthrough case CCValAssign::AExt: Arg = DAG.getNode(ISD::ANY_EXTEND, DL, LocVT, Arg); break; } if (UseUpperBits) { unsigned ValSizeInBits = Outs[i].ArgVT.getSizeInBits(); unsigned LocSizeInBits = VA.getLocVT().getSizeInBits(); Arg = DAG.getNode( ISD::SHL, DL, VA.getLocVT(), Arg, DAG.getConstant(LocSizeInBits - ValSizeInBits, DL, VA.getLocVT())); } // Arguments that can be passed on register must be kept at // RegsToPass vector if (VA.isRegLoc()) { RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); continue; } // Register can't get to this point... assert(VA.isMemLoc()); // emit ISD::STORE whichs stores the // parameter value to a stack Location MemOpChains.push_back(passArgOnStack(StackPtr, VA.getLocMemOffset(), Chain, Arg, DL, IsTailCall, DAG)); } // Transform all store nodes into one single node because all store // nodes are independent of each other. if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol // node so that legalize doesn't hack it. bool IsPICCall = (ABI.IsN64() || IsPIC); // true if calls are translated to // jalr $25 bool GlobalOrExternal = false, InternalLinkage = false, IsCallReloc = false; SDValue CalleeLo; EVT Ty = Callee.getValueType(); if (GlobalAddressSDNode *G = dyn_cast(Callee)) { if (IsPICCall) { const GlobalValue *Val = G->getGlobal(); InternalLinkage = Val->hasInternalLinkage(); if (InternalLinkage) Callee = getAddrLocal(G, DL, Ty, DAG, ABI.IsN32() || ABI.IsN64()); else if (LargeGOT) { Callee = getAddrGlobalLargeGOT(G, DL, Ty, DAG, MipsII::MO_CALL_HI16, MipsII::MO_CALL_LO16, Chain, FuncInfo->callPtrInfo(Val)); IsCallReloc = true; } else { Callee = getAddrGlobal(G, DL, Ty, DAG, MipsII::MO_GOT_CALL, Chain, FuncInfo->callPtrInfo(Val)); IsCallReloc = true; } } else Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, getPointerTy(DAG.getDataLayout()), 0, MipsII::MO_NO_FLAG); GlobalOrExternal = true; } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { const char *Sym = S->getSymbol(); if (!ABI.IsN64() && !IsPIC) // !N64 && static Callee = DAG.getTargetExternalSymbol( Sym, getPointerTy(DAG.getDataLayout()), MipsII::MO_NO_FLAG); else if (LargeGOT) { Callee = getAddrGlobalLargeGOT(S, DL, Ty, DAG, MipsII::MO_CALL_HI16, MipsII::MO_CALL_LO16, Chain, FuncInfo->callPtrInfo(Sym)); IsCallReloc = true; } else { // N64 || PIC Callee = getAddrGlobal(S, DL, Ty, DAG, MipsII::MO_GOT_CALL, Chain, FuncInfo->callPtrInfo(Sym)); IsCallReloc = true; } GlobalOrExternal = true; } SmallVector Ops(1, Chain); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, InternalLinkage, IsCallReloc, CLI, Callee, Chain); if (IsTailCall) return DAG.getNode(MipsISD::TailCall, DL, MVT::Other, Ops); Chain = DAG.getNode(MipsISD::JmpLink, DL, NodeTys, Ops); SDValue InFlag = Chain.getValue(1); // Create the CALLSEQ_END node. Chain = DAG.getCALLSEQ_END(Chain, NextStackOffsetVal, DAG.getIntPtrConstant(0, DL, true), InFlag, DL); InFlag = Chain.getValue(1); // Handle result values, copying them out of physregs into vregs that we // return. return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG, InVals, CLI); } /// LowerCallResult - Lower the result values of a call into the /// appropriate copies out of appropriate physical registers. SDValue MipsTargetLowering::LowerCallResult( SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl &InVals, TargetLowering::CallLoweringInfo &CLI) const { // Assign locations to each value returned by this call. SmallVector RVLocs; MipsCCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeCallResult(Ins, RetCC_Mips, CLI); // Copy all of the result registers out of their specified physreg. for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); SDValue Val = DAG.getCopyFromReg(Chain, DL, RVLocs[i].getLocReg(), RVLocs[i].getLocVT(), InFlag); Chain = Val.getValue(1); InFlag = Val.getValue(2); if (VA.isUpperBitsInLoc()) { unsigned ValSizeInBits = Ins[i].ArgVT.getSizeInBits(); unsigned LocSizeInBits = VA.getLocVT().getSizeInBits(); unsigned Shift = VA.getLocInfo() == CCValAssign::ZExtUpper ? ISD::SRL : ISD::SRA; Val = DAG.getNode( Shift, DL, VA.getLocVT(), Val, DAG.getConstant(LocSizeInBits - ValSizeInBits, DL, VA.getLocVT())); } switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::BCvt: Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); break; case CCValAssign::AExt: case CCValAssign::AExtUpper: Val = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Val); break; case CCValAssign::ZExt: case CCValAssign::ZExtUpper: Val = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Val, DAG.getValueType(VA.getValVT())); Val = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Val); break; case CCValAssign::SExt: case CCValAssign::SExtUpper: Val = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Val, DAG.getValueType(VA.getValVT())); Val = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Val); break; } InVals.push_back(Val); } return Chain; } static SDValue UnpackFromArgumentSlot(SDValue Val, const CCValAssign &VA, EVT ArgVT, const SDLoc &DL, SelectionDAG &DAG) { MVT LocVT = VA.getLocVT(); EVT ValVT = VA.getValVT(); // Shift into the upper bits if necessary. switch (VA.getLocInfo()) { default: break; case CCValAssign::AExtUpper: case CCValAssign::SExtUpper: case CCValAssign::ZExtUpper: { unsigned ValSizeInBits = ArgVT.getSizeInBits(); unsigned LocSizeInBits = VA.getLocVT().getSizeInBits(); unsigned Opcode = VA.getLocInfo() == CCValAssign::ZExtUpper ? ISD::SRL : ISD::SRA; Val = DAG.getNode( Opcode, DL, VA.getLocVT(), Val, DAG.getConstant(LocSizeInBits - ValSizeInBits, DL, VA.getLocVT())); break; } } // If this is an value smaller than the argument slot size (32-bit for O32, // 64-bit for N32/N64), it has been promoted in some way to the argument slot // size. Extract the value and insert any appropriate assertions regarding // sign/zero extension. switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::AExtUpper: case CCValAssign::AExt: Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val); break; case CCValAssign::SExtUpper: case CCValAssign::SExt: Val = DAG.getNode(ISD::AssertSext, DL, LocVT, Val, DAG.getValueType(ValVT)); Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val); break; case CCValAssign::ZExtUpper: case CCValAssign::ZExt: Val = DAG.getNode(ISD::AssertZext, DL, LocVT, Val, DAG.getValueType(ValVT)); Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val); break; case CCValAssign::BCvt: Val = DAG.getNode(ISD::BITCAST, DL, ValVT, Val); break; } return Val; } //===----------------------------------------------------------------------===// // Formal Arguments Calling Convention Implementation //===----------------------------------------------------------------------===// /// LowerFormalArguments - transform physical registers into virtual registers /// and generate load operations for arguments places on the stack. SDValue MipsTargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MipsFunctionInfo *MipsFI = MF.getInfo(); MipsFI->setVarArgsFrameIndex(0); // Used with vargs to acumulate store chains. std::vector OutChains; // Assign locations to all of the incoming arguments. SmallVector ArgLocs; MipsCCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext()); CCInfo.AllocateStack(ABI.GetCalleeAllocdArgSizeInBytes(CallConv), 1); const Function *Func = DAG.getMachineFunction().getFunction(); Function::const_arg_iterator FuncArg = Func->arg_begin(); if (Func->hasFnAttribute("interrupt") && !Func->arg_empty()) report_fatal_error( "Functions with the interrupt attribute cannot have arguments!"); CCInfo.AnalyzeFormalArguments(Ins, CC_Mips_FixedArg); MipsFI->setFormalArgInfo(CCInfo.getNextStackOffset(), CCInfo.getInRegsParamsCount() > 0); unsigned CurArgIdx = 0; CCInfo.rewindByValRegsInfo(); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; if (Ins[i].isOrigArg()) { std::advance(FuncArg, Ins[i].getOrigArgIndex() - CurArgIdx); CurArgIdx = Ins[i].getOrigArgIndex(); } EVT ValVT = VA.getValVT(); ISD::ArgFlagsTy Flags = Ins[i].Flags; bool IsRegLoc = VA.isRegLoc(); if (Flags.isByVal()) { assert(Ins[i].isOrigArg() && "Byval arguments cannot be implicit"); unsigned FirstByValReg, LastByValReg; unsigned ByValIdx = CCInfo.getInRegsParamsProcessed(); CCInfo.getInRegsParamInfo(ByValIdx, FirstByValReg, LastByValReg); assert(Flags.getByValSize() && "ByVal args of size 0 should have been ignored by front-end."); assert(ByValIdx < CCInfo.getInRegsParamsCount()); copyByValRegs(Chain, DL, OutChains, DAG, Flags, InVals, &*FuncArg, FirstByValReg, LastByValReg, VA, CCInfo); CCInfo.nextInRegsParam(); continue; } // Arguments stored on registers if (IsRegLoc) { MVT RegVT = VA.getLocVT(); unsigned ArgReg = VA.getLocReg(); const TargetRegisterClass *RC = getRegClassFor(RegVT); // Transform the arguments stored on // physical registers into virtual ones unsigned Reg = addLiveIn(DAG.getMachineFunction(), ArgReg, RC); SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT); ArgValue = UnpackFromArgumentSlot(ArgValue, VA, Ins[i].ArgVT, DL, DAG); // Handle floating point arguments passed in integer registers and // long double arguments passed in floating point registers. if ((RegVT == MVT::i32 && ValVT == MVT::f32) || (RegVT == MVT::i64 && ValVT == MVT::f64) || (RegVT == MVT::f64 && ValVT == MVT::i64)) ArgValue = DAG.getNode(ISD::BITCAST, DL, ValVT, ArgValue); else if (ABI.IsO32() && RegVT == MVT::i32 && ValVT == MVT::f64) { unsigned Reg2 = addLiveIn(DAG.getMachineFunction(), getNextIntArgReg(ArgReg), RC); SDValue ArgValue2 = DAG.getCopyFromReg(Chain, DL, Reg2, RegVT); if (!Subtarget.isLittle()) std::swap(ArgValue, ArgValue2); ArgValue = DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, ArgValue, ArgValue2); } InVals.push_back(ArgValue); } else { // VA.isRegLoc() MVT LocVT = VA.getLocVT(); if (ABI.IsO32()) { // We ought to be able to use LocVT directly but O32 sets it to i32 // when allocating floating point values to integer registers. // This shouldn't influence how we load the value into registers unless // we are targeting softfloat. if (VA.getValVT().isFloatingPoint() && !Subtarget.useSoftFloat()) LocVT = VA.getValVT(); } // sanity check assert(VA.isMemLoc()); // The stack pointer offset is relative to the caller stack frame. int FI = MFI->CreateFixedObject(LocVT.getSizeInBits() / 8, VA.getLocMemOffset(), true); // Create load nodes to retrieve arguments from the stack SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); SDValue ArgValue = DAG.getLoad( LocVT, DL, Chain, FIN, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); OutChains.push_back(ArgValue.getValue(1)); ArgValue = UnpackFromArgumentSlot(ArgValue, VA, Ins[i].ArgVT, DL, DAG); InVals.push_back(ArgValue); } } for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { // The mips ABIs for returning structs by value requires that we copy // the sret argument into $v0 for the return. Save the argument into // a virtual register so that we can access it from the return points. if (Ins[i].Flags.isSRet()) { unsigned Reg = MipsFI->getSRetReturnReg(); if (!Reg) { Reg = MF.getRegInfo().createVirtualRegister( getRegClassFor(ABI.IsN64() ? MVT::i64 : MVT::i32)); MipsFI->setSRetReturnReg(Reg); } SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[i]); Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain); break; } } if (IsVarArg) writeVarArgRegs(OutChains, Chain, DL, DAG, CCInfo); // All stores are grouped in one node to allow the matching between // the size of Ins and InVals. This only happens when on varg functions if (!OutChains.empty()) { OutChains.push_back(Chain); Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); } return Chain; } //===----------------------------------------------------------------------===// // Return Value Calling Convention Implementation //===----------------------------------------------------------------------===// bool MipsTargetLowering::CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl &Outs, LLVMContext &Context) const { SmallVector RVLocs; MipsCCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); return CCInfo.CheckReturn(Outs, RetCC_Mips); } bool MipsTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { if (Subtarget.hasMips3() && Subtarget.useSoftFloat()) { if (Type == MVT::i32) return true; } return IsSigned; } SDValue MipsTargetLowering::LowerInterruptReturn(SmallVectorImpl &RetOps, const SDLoc &DL, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MipsFunctionInfo *MipsFI = MF.getInfo(); MipsFI->setISR(); return DAG.getNode(MipsISD::ERet, DL, MVT::Other, RetOps); } SDValue MipsTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &DL, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of // the return value to a location SmallVector RVLocs; MachineFunction &MF = DAG.getMachineFunction(); // CCState - Info about the registers and stack slot. MipsCCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); // Analyze return values. CCInfo.AnalyzeReturn(Outs, RetCC_Mips); SDValue Flag; SmallVector RetOps(1, Chain); // Copy the result values into the output registers. for (unsigned i = 0; i != RVLocs.size(); ++i) { SDValue Val = OutVals[i]; CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); bool UseUpperBits = false; switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::BCvt: Val = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Val); break; case CCValAssign::AExtUpper: UseUpperBits = true; // Fallthrough case CCValAssign::AExt: Val = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Val); break; case CCValAssign::ZExtUpper: UseUpperBits = true; // Fallthrough case CCValAssign::ZExt: Val = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Val); break; case CCValAssign::SExtUpper: UseUpperBits = true; // Fallthrough case CCValAssign::SExt: Val = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Val); break; } if (UseUpperBits) { unsigned ValSizeInBits = Outs[i].ArgVT.getSizeInBits(); unsigned LocSizeInBits = VA.getLocVT().getSizeInBits(); Val = DAG.getNode( ISD::SHL, DL, VA.getLocVT(), Val, DAG.getConstant(LocSizeInBits - ValSizeInBits, DL, VA.getLocVT())); } Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Flag); // Guarantee that all emitted copies are stuck together with flags. Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } // The mips ABIs for returning structs by value requires that we copy // the sret argument into $v0 for the return. We saved the argument into // a virtual register in the entry block, so now we copy the value out // and into $v0. if (MF.getFunction()->hasStructRetAttr()) { MipsFunctionInfo *MipsFI = MF.getInfo(); unsigned Reg = MipsFI->getSRetReturnReg(); if (!Reg) llvm_unreachable("sret virtual register not created in the entry block"); SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy(DAG.getDataLayout())); unsigned V0 = ABI.IsN64() ? Mips::V0_64 : Mips::V0; Chain = DAG.getCopyToReg(Chain, DL, V0, Val, Flag); Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(V0, getPointerTy(DAG.getDataLayout()))); } RetOps[0] = Chain; // Update chain. // Add the flag if we have it. if (Flag.getNode()) RetOps.push_back(Flag); // ISRs must use "eret". if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt")) return LowerInterruptReturn(RetOps, DL, DAG); // Standard return on Mips is a "jr $ra" return DAG.getNode(MipsISD::Ret, DL, MVT::Other, RetOps); } //===----------------------------------------------------------------------===// // Mips Inline Assembly Support //===----------------------------------------------------------------------===// /// getConstraintType - Given a constraint letter, return the type of /// constraint it is for this target. MipsTargetLowering::ConstraintType MipsTargetLowering::getConstraintType(StringRef Constraint) const { // Mips specific constraints // GCC config/mips/constraints.md // // 'd' : An address register. Equivalent to r // unless generating MIPS16 code. // 'y' : Equivalent to r; retained for // backwards compatibility. // 'c' : A register suitable for use in an indirect // jump. This will always be $25 for -mabicalls. // 'l' : The lo register. 1 word storage. // 'x' : The hilo register pair. Double word storage. if (Constraint.size() == 1) { switch (Constraint[0]) { default : break; case 'd': case 'y': case 'f': case 'c': case 'l': case 'x': return C_RegisterClass; case 'R': return C_Memory; } } if (Constraint == "ZC") return C_Memory; return TargetLowering::getConstraintType(Constraint); } /// Examine constraint type and operand type and determine a weight value. /// This object must already have been set up with the operand type /// and the current alternative constraint selected. TargetLowering::ConstraintWeight MipsTargetLowering::getSingleConstraintMatchWeight( AsmOperandInfo &info, const char *constraint) const { ConstraintWeight weight = CW_Invalid; Value *CallOperandVal = info.CallOperandVal; // If we don't have a value, we can't do a match, // but allow it at the lowest weight. if (!CallOperandVal) return CW_Default; Type *type = CallOperandVal->getType(); // Look at the constraint type. switch (*constraint) { default: weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); break; case 'd': case 'y': if (type->isIntegerTy()) weight = CW_Register; break; case 'f': // FPU or MSA register if (Subtarget.hasMSA() && type->isVectorTy() && cast(type)->getBitWidth() == 128) weight = CW_Register; else if (type->isFloatTy()) weight = CW_Register; break; case 'c': // $25 for indirect jumps case 'l': // lo register case 'x': // hilo register pair if (type->isIntegerTy()) weight = CW_SpecificReg; break; case 'I': // signed 16 bit immediate case 'J': // integer zero case 'K': // unsigned 16 bit immediate case 'L': // signed 32 bit immediate where lower 16 bits are 0 case 'N': // immediate in the range of -65535 to -1 (inclusive) case 'O': // signed 15 bit immediate (+- 16383) case 'P': // immediate in the range of 65535 to 1 (inclusive) if (isa(CallOperandVal)) weight = CW_Constant; break; case 'R': weight = CW_Memory; break; } return weight; } /// This is a helper function to parse a physical register string and split it /// into non-numeric and numeric parts (Prefix and Reg). The first boolean flag /// that is returned indicates whether parsing was successful. The second flag /// is true if the numeric part exists. static std::pair parsePhysicalReg(StringRef C, StringRef &Prefix, unsigned long long &Reg) { if (C.front() != '{' || C.back() != '}') return std::make_pair(false, false); // Search for the first numeric character. StringRef::const_iterator I, B = C.begin() + 1, E = C.end() - 1; I = std::find_if(B, E, isdigit); Prefix = StringRef(B, I - B); // The second flag is set to false if no numeric characters were found. if (I == E) return std::make_pair(true, false); // Parse the numeric characters. return std::make_pair(!getAsUnsignedInteger(StringRef(I, E - I), 10, Reg), true); } std::pair MipsTargetLowering:: parseRegForInlineAsmConstraint(StringRef C, MVT VT) const { const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); const TargetRegisterClass *RC; StringRef Prefix; unsigned long long Reg; std::pair R = parsePhysicalReg(C, Prefix, Reg); if (!R.first) return std::make_pair(0U, nullptr); if ((Prefix == "hi" || Prefix == "lo")) { // Parse hi/lo. // No numeric characters follow "hi" or "lo". if (R.second) return std::make_pair(0U, nullptr); RC = TRI->getRegClass(Prefix == "hi" ? Mips::HI32RegClassID : Mips::LO32RegClassID); return std::make_pair(*(RC->begin()), RC); } else if (Prefix.startswith("$msa")) { // Parse $msa(ir|csr|access|save|modify|request|map|unmap) // No numeric characters follow the name. if (R.second) return std::make_pair(0U, nullptr); Reg = StringSwitch(Prefix) .Case("$msair", Mips::MSAIR) .Case("$msacsr", Mips::MSACSR) .Case("$msaaccess", Mips::MSAAccess) .Case("$msasave", Mips::MSASave) .Case("$msamodify", Mips::MSAModify) .Case("$msarequest", Mips::MSARequest) .Case("$msamap", Mips::MSAMap) .Case("$msaunmap", Mips::MSAUnmap) .Default(0); if (!Reg) return std::make_pair(0U, nullptr); RC = TRI->getRegClass(Mips::MSACtrlRegClassID); return std::make_pair(Reg, RC); } if (!R.second) return std::make_pair(0U, nullptr); if (Prefix == "$f") { // Parse $f0-$f31. // If the size of FP registers is 64-bit or Reg is an even number, select // the 64-bit register class. Otherwise, select the 32-bit register class. if (VT == MVT::Other) VT = (Subtarget.isFP64bit() || !(Reg % 2)) ? MVT::f64 : MVT::f32; RC = getRegClassFor(VT); if (RC == &Mips::AFGR64RegClass) { assert(Reg % 2 == 0); Reg >>= 1; } } else if (Prefix == "$fcc") // Parse $fcc0-$fcc7. RC = TRI->getRegClass(Mips::FCCRegClassID); else if (Prefix == "$w") { // Parse $w0-$w31. RC = getRegClassFor((VT == MVT::Other) ? MVT::v16i8 : VT); } else { // Parse $0-$31. assert(Prefix == "$"); RC = getRegClassFor((VT == MVT::Other) ? MVT::i32 : VT); } assert(Reg < RC->getNumRegs()); return std::make_pair(*(RC->begin() + Reg), RC); } /// Given a register class constraint, like 'r', if this corresponds directly /// to an LLVM register class, return a register of 0 and the register class /// pointer. std::pair MipsTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { if (Constraint.size() == 1) { switch (Constraint[0]) { case 'd': // Address register. Same as 'r' unless generating MIPS16 code. case 'y': // Same as 'r'. Exists for compatibility. case 'r': if (VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) { if (Subtarget.inMips16Mode()) return std::make_pair(0U, &Mips::CPU16RegsRegClass); return std::make_pair(0U, &Mips::GPR32RegClass); } if (VT == MVT::i64 && !Subtarget.isGP64bit()) return std::make_pair(0U, &Mips::GPR32RegClass); if (VT == MVT::i64 && Subtarget.isGP64bit()) return std::make_pair(0U, &Mips::GPR64RegClass); // This will generate an error message return std::make_pair(0U, nullptr); case 'f': // FPU or MSA register if (VT == MVT::v16i8) return std::make_pair(0U, &Mips::MSA128BRegClass); else if (VT == MVT::v8i16 || VT == MVT::v8f16) return std::make_pair(0U, &Mips::MSA128HRegClass); else if (VT == MVT::v4i32 || VT == MVT::v4f32) return std::make_pair(0U, &Mips::MSA128WRegClass); else if (VT == MVT::v2i64 || VT == MVT::v2f64) return std::make_pair(0U, &Mips::MSA128DRegClass); else if (VT == MVT::f32) return std::make_pair(0U, &Mips::FGR32RegClass); else if ((VT == MVT::f64) && (!Subtarget.isSingleFloat())) { if (Subtarget.isFP64bit()) return std::make_pair(0U, &Mips::FGR64RegClass); return std::make_pair(0U, &Mips::AFGR64RegClass); } break; case 'c': // register suitable for indirect jump if (VT == MVT::i32) return std::make_pair((unsigned)Mips::T9, &Mips::GPR32RegClass); assert(VT == MVT::i64 && "Unexpected type."); return std::make_pair((unsigned)Mips::T9_64, &Mips::GPR64RegClass); case 'l': // register suitable for indirect jump if (VT == MVT::i32) return std::make_pair((unsigned)Mips::LO0, &Mips::LO32RegClass); return std::make_pair((unsigned)Mips::LO0_64, &Mips::LO64RegClass); case 'x': // register suitable for indirect jump // Fixme: Not triggering the use of both hi and low // This will generate an error message return std::make_pair(0U, nullptr); } } std::pair R; R = parseRegForInlineAsmConstraint(Constraint, VT); if (R.second) return R; return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); } /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. void MipsTargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector&Ops, SelectionDAG &DAG) const { SDLoc DL(Op); SDValue Result; // Only support length 1 constraints for now. if (Constraint.length() > 1) return; char ConstraintLetter = Constraint[0]; switch (ConstraintLetter) { default: break; // This will fall through to the generic implementation case 'I': // Signed 16 bit constant // If this fails, the parent routine will give an error if (ConstantSDNode *C = dyn_cast(Op)) { EVT Type = Op.getValueType(); int64_t Val = C->getSExtValue(); if (isInt<16>(Val)) { Result = DAG.getTargetConstant(Val, DL, Type); break; } } return; case 'J': // integer zero if (ConstantSDNode *C = dyn_cast(Op)) { EVT Type = Op.getValueType(); int64_t Val = C->getZExtValue(); if (Val == 0) { Result = DAG.getTargetConstant(0, DL, Type); break; } } return; case 'K': // unsigned 16 bit immediate if (ConstantSDNode *C = dyn_cast(Op)) { EVT Type = Op.getValueType(); uint64_t Val = (uint64_t)C->getZExtValue(); if (isUInt<16>(Val)) { Result = DAG.getTargetConstant(Val, DL, Type); break; } } return; case 'L': // signed 32 bit immediate where lower 16 bits are 0 if (ConstantSDNode *C = dyn_cast(Op)) { EVT Type = Op.getValueType(); int64_t Val = C->getSExtValue(); if ((isInt<32>(Val)) && ((Val & 0xffff) == 0)){ Result = DAG.getTargetConstant(Val, DL, Type); break; } } return; case 'N': // immediate in the range of -65535 to -1 (inclusive) if (ConstantSDNode *C = dyn_cast(Op)) { EVT Type = Op.getValueType(); int64_t Val = C->getSExtValue(); if ((Val >= -65535) && (Val <= -1)) { Result = DAG.getTargetConstant(Val, DL, Type); break; } } return; case 'O': // signed 15 bit immediate if (ConstantSDNode *C = dyn_cast(Op)) { EVT Type = Op.getValueType(); int64_t Val = C->getSExtValue(); if ((isInt<15>(Val))) { Result = DAG.getTargetConstant(Val, DL, Type); break; } } return; case 'P': // immediate in the range of 1 to 65535 (inclusive) if (ConstantSDNode *C = dyn_cast(Op)) { EVT Type = Op.getValueType(); int64_t Val = C->getSExtValue(); if ((Val <= 65535) && (Val >= 1)) { Result = DAG.getTargetConstant(Val, DL, Type); break; } } return; } if (Result.getNode()) { Ops.push_back(Result); return; } TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); } bool MipsTargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const { // No global is ever allowed as a base. if (AM.BaseGV) return false; switch (AM.Scale) { case 0: // "r+i" or just "i", depending on HasBaseReg. break; case 1: if (!AM.HasBaseReg) // allow "r+i". break; return false; // disallow "r+r" or "r+r+i". default: return false; } return true; } bool MipsTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The Mips target isn't yet aware of offsets. return false; } EVT MipsTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const { if (Subtarget.hasMips64()) return MVT::i64; return MVT::i32; } bool MipsTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { if (VT != MVT::f32 && VT != MVT::f64) return false; if (Imm.isNegZero()) return false; return Imm.isZero(); } unsigned MipsTargetLowering::getJumpTableEncoding() const { if (ABI.IsN64()) return MachineJumpTableInfo::EK_GPRel64BlockAddress; return TargetLowering::getJumpTableEncoding(); } bool MipsTargetLowering::useSoftFloat() const { return Subtarget.useSoftFloat(); } void MipsTargetLowering::copyByValRegs( SDValue Chain, const SDLoc &DL, std::vector &OutChains, SelectionDAG &DAG, const ISD::ArgFlagsTy &Flags, SmallVectorImpl &InVals, const Argument *FuncArg, unsigned FirstReg, unsigned LastReg, const CCValAssign &VA, MipsCCState &State) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); unsigned GPRSizeInBytes = Subtarget.getGPRSizeInBytes(); unsigned NumRegs = LastReg - FirstReg; unsigned RegAreaSize = NumRegs * GPRSizeInBytes; unsigned FrameObjSize = std::max(Flags.getByValSize(), RegAreaSize); int FrameObjOffset; ArrayRef ByValArgRegs = ABI.GetByValArgRegs(); if (RegAreaSize) FrameObjOffset = (int)ABI.GetCalleeAllocdArgSizeInBytes(State.getCallingConv()) - (int)((ByValArgRegs.size() - FirstReg) * GPRSizeInBytes); else FrameObjOffset = VA.getLocMemOffset(); // Create frame object. EVT PtrTy = getPointerTy(DAG.getDataLayout()); int FI = MFI->CreateFixedObject(FrameObjSize, FrameObjOffset, true); SDValue FIN = DAG.getFrameIndex(FI, PtrTy); InVals.push_back(FIN); if (!NumRegs) return; // Copy arg registers. MVT RegTy = MVT::getIntegerVT(GPRSizeInBytes * 8); const TargetRegisterClass *RC = getRegClassFor(RegTy); for (unsigned I = 0; I < NumRegs; ++I) { unsigned ArgReg = ByValArgRegs[FirstReg + I]; unsigned VReg = addLiveIn(MF, ArgReg, RC); unsigned Offset = I * GPRSizeInBytes; SDValue StorePtr = DAG.getNode(ISD::ADD, DL, PtrTy, FIN, DAG.getConstant(Offset, DL, PtrTy)); SDValue Store = DAG.getStore(Chain, DL, DAG.getRegister(VReg, RegTy), StorePtr, MachinePointerInfo(FuncArg, Offset)); OutChains.push_back(Store); } } // Copy byVal arg to registers and stack. void MipsTargetLowering::passByValArg( SDValue Chain, const SDLoc &DL, std::deque> &RegsToPass, SmallVectorImpl &MemOpChains, SDValue StackPtr, MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg, unsigned FirstReg, unsigned LastReg, const ISD::ArgFlagsTy &Flags, bool isLittle, const CCValAssign &VA) const { unsigned ByValSizeInBytes = Flags.getByValSize(); unsigned OffsetInBytes = 0; // From beginning of struct unsigned RegSizeInBytes = Subtarget.getGPRSizeInBytes(); unsigned Alignment = std::min(Flags.getByValAlign(), RegSizeInBytes); EVT PtrTy = getPointerTy(DAG.getDataLayout()), RegTy = MVT::getIntegerVT(RegSizeInBytes * 8); unsigned NumRegs = LastReg - FirstReg; if (NumRegs) { ArrayRef ArgRegs = ABI.GetByValArgRegs(); bool LeftoverBytes = (NumRegs * RegSizeInBytes > ByValSizeInBytes); unsigned I = 0; // Copy words to registers. for (; I < NumRegs - LeftoverBytes; ++I, OffsetInBytes += RegSizeInBytes) { SDValue LoadPtr = DAG.getNode(ISD::ADD, DL, PtrTy, Arg, DAG.getConstant(OffsetInBytes, DL, PtrTy)); SDValue LoadVal = DAG.getLoad(RegTy, DL, Chain, LoadPtr, MachinePointerInfo(), Alignment); MemOpChains.push_back(LoadVal.getValue(1)); unsigned ArgReg = ArgRegs[FirstReg + I]; RegsToPass.push_back(std::make_pair(ArgReg, LoadVal)); } // Return if the struct has been fully copied. if (ByValSizeInBytes == OffsetInBytes) return; // Copy the remainder of the byval argument with sub-word loads and shifts. if (LeftoverBytes) { SDValue Val; for (unsigned LoadSizeInBytes = RegSizeInBytes / 2, TotalBytesLoaded = 0; OffsetInBytes < ByValSizeInBytes; LoadSizeInBytes /= 2) { unsigned RemainingSizeInBytes = ByValSizeInBytes - OffsetInBytes; if (RemainingSizeInBytes < LoadSizeInBytes) continue; // Load subword. SDValue LoadPtr = DAG.getNode(ISD::ADD, DL, PtrTy, Arg, DAG.getConstant(OffsetInBytes, DL, PtrTy)); SDValue LoadVal = DAG.getExtLoad( ISD::ZEXTLOAD, DL, RegTy, Chain, LoadPtr, MachinePointerInfo(), MVT::getIntegerVT(LoadSizeInBytes * 8), Alignment); MemOpChains.push_back(LoadVal.getValue(1)); // Shift the loaded value. unsigned Shamt; if (isLittle) Shamt = TotalBytesLoaded * 8; else Shamt = (RegSizeInBytes - (TotalBytesLoaded + LoadSizeInBytes)) * 8; SDValue Shift = DAG.getNode(ISD::SHL, DL, RegTy, LoadVal, DAG.getConstant(Shamt, DL, MVT::i32)); if (Val.getNode()) Val = DAG.getNode(ISD::OR, DL, RegTy, Val, Shift); else Val = Shift; OffsetInBytes += LoadSizeInBytes; TotalBytesLoaded += LoadSizeInBytes; Alignment = std::min(Alignment, LoadSizeInBytes); } unsigned ArgReg = ArgRegs[FirstReg + I]; RegsToPass.push_back(std::make_pair(ArgReg, Val)); return; } } // Copy remainder of byval arg to it with memcpy. unsigned MemCpySize = ByValSizeInBytes - OffsetInBytes; SDValue Src = DAG.getNode(ISD::ADD, DL, PtrTy, Arg, DAG.getConstant(OffsetInBytes, DL, PtrTy)); SDValue Dst = DAG.getNode(ISD::ADD, DL, PtrTy, StackPtr, DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); Chain = DAG.getMemcpy(Chain, DL, Dst, Src, DAG.getConstant(MemCpySize, DL, PtrTy), Alignment, /*isVolatile=*/false, /*AlwaysInline=*/false, /*isTailCall=*/false, MachinePointerInfo(), MachinePointerInfo()); MemOpChains.push_back(Chain); } void MipsTargetLowering::writeVarArgRegs(std::vector &OutChains, SDValue Chain, const SDLoc &DL, SelectionDAG &DAG, CCState &State) const { ArrayRef ArgRegs = ABI.GetVarArgRegs(); unsigned Idx = State.getFirstUnallocated(ArgRegs); unsigned RegSizeInBytes = Subtarget.getGPRSizeInBytes(); MVT RegTy = MVT::getIntegerVT(RegSizeInBytes * 8); const TargetRegisterClass *RC = getRegClassFor(RegTy); MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MipsFunctionInfo *MipsFI = MF.getInfo(); // Offset of the first variable argument from stack pointer. int VaArgOffset; if (ArgRegs.size() == Idx) VaArgOffset = alignTo(State.getNextStackOffset(), RegSizeInBytes); else { VaArgOffset = (int)ABI.GetCalleeAllocdArgSizeInBytes(State.getCallingConv()) - (int)(RegSizeInBytes * (ArgRegs.size() - Idx)); } // Record the frame index of the first variable argument // which is a value necessary to VASTART. int FI = MFI->CreateFixedObject(RegSizeInBytes, VaArgOffset, true); MipsFI->setVarArgsFrameIndex(FI); // Copy the integer registers that have not been used for argument passing // to the argument register save area. For O32, the save area is allocated // in the caller's stack frame, while for N32/64, it is allocated in the // callee's stack frame. for (unsigned I = Idx; I < ArgRegs.size(); ++I, VaArgOffset += RegSizeInBytes) { unsigned Reg = addLiveIn(MF, ArgRegs[I], RC); SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegTy); FI = MFI->CreateFixedObject(RegSizeInBytes, VaArgOffset, true); SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, MachinePointerInfo()); cast(Store.getNode())->getMemOperand()->setValue( (Value *)nullptr); OutChains.push_back(Store); } } void MipsTargetLowering::HandleByVal(CCState *State, unsigned &Size, unsigned Align) const { const TargetFrameLowering *TFL = Subtarget.getFrameLowering(); assert(Size && "Byval argument's size shouldn't be 0."); Align = std::min(Align, TFL->getStackAlignment()); unsigned FirstReg = 0; unsigned NumRegs = 0; if (State->getCallingConv() != CallingConv::Fast) { unsigned RegSizeInBytes = Subtarget.getGPRSizeInBytes(); ArrayRef IntArgRegs = ABI.GetByValArgRegs(); // FIXME: The O32 case actually describes no shadow registers. const MCPhysReg *ShadowRegs = ABI.IsO32() ? IntArgRegs.data() : Mips64DPRegs; // We used to check the size as well but we can't do that anymore since // CCState::HandleByVal() rounds up the size after calling this function. assert(!(Align % RegSizeInBytes) && "Byval argument's alignment should be a multiple of" "RegSizeInBytes."); FirstReg = State->getFirstUnallocated(IntArgRegs); // If Align > RegSizeInBytes, the first arg register must be even. // FIXME: This condition happens to do the right thing but it's not the // right way to test it. We want to check that the stack frame offset // of the register is aligned. if ((Align > RegSizeInBytes) && (FirstReg % 2)) { State->AllocateReg(IntArgRegs[FirstReg], ShadowRegs[FirstReg]); ++FirstReg; } // Mark the registers allocated. Size = alignTo(Size, RegSizeInBytes); for (unsigned I = FirstReg; Size > 0 && (I < IntArgRegs.size()); Size -= RegSizeInBytes, ++I, ++NumRegs) State->AllocateReg(IntArgRegs[I], ShadowRegs[I]); } State->addInRegsParamInfo(FirstReg, FirstReg + NumRegs); } MachineBasicBlock *MipsTargetLowering::emitPseudoSELECT(MachineInstr &MI, MachineBasicBlock *BB, bool isFPCmp, unsigned Opc) const { assert(!(Subtarget.hasMips4() || Subtarget.hasMips32()) && "Subtarget already supports SELECT nodes with the use of" "conditional-move instructions."); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); // To "insert" a SELECT instruction, we actually have to insert the // diamond control-flow pattern. The incoming instruction knows the // destination vreg to set, the condition code register to branch on, the // true/false values to select between, and a branch opcode to use. const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction::iterator It = ++BB->getIterator(); // thisMBB: // ... // TrueVal = ... // setcc r1, r2, r3 // bNE r1, r0, copy1MBB // fallthrough --> copy0MBB MachineBasicBlock *thisMBB = BB; MachineFunction *F = BB->getParent(); MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(It, copy0MBB); F->insert(It, sinkMBB); // Transfer the remainder of BB and its successor edges to sinkMBB. sinkMBB->splice(sinkMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); sinkMBB->transferSuccessorsAndUpdatePHIs(BB); // Next, add the true and fallthrough blocks as its successors. BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); if (isFPCmp) { // bc1[tf] cc, sinkMBB BuildMI(BB, DL, TII->get(Opc)) .addReg(MI.getOperand(1).getReg()) .addMBB(sinkMBB); } else { // bne rs, $0, sinkMBB BuildMI(BB, DL, TII->get(Opc)) .addReg(MI.getOperand(1).getReg()) .addReg(Mips::ZERO) .addMBB(sinkMBB); } // copy0MBB: // %FalseValue = ... // # fallthrough to sinkMBB BB = copy0MBB; // Update machine-CFG edges BB->addSuccessor(sinkMBB); // sinkMBB: // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] // ... BB = sinkMBB; BuildMI(*BB, BB->begin(), DL, TII->get(Mips::PHI), MI.getOperand(0).getReg()) .addReg(MI.getOperand(2).getReg()) .addMBB(thisMBB) .addReg(MI.getOperand(3).getReg()) .addMBB(copy0MBB); MI.eraseFromParent(); // The pseudo instruction is gone now. return BB; } // FIXME? Maybe this could be a TableGen attribute on some registers and // this table could be generated automatically from RegInfo. unsigned MipsTargetLowering::getRegisterByName(const char* RegName, EVT VT, SelectionDAG &DAG) const { // Named registers is expected to be fairly rare. For now, just support $28 // since the linux kernel uses it. if (Subtarget.isGP64bit()) { unsigned Reg = StringSwitch(RegName) .Case("$28", Mips::GP_64) .Default(0); if (Reg) return Reg; } else { unsigned Reg = StringSwitch(RegName) .Case("$28", Mips::GP) .Default(0); if (Reg) return Reg; } report_fatal_error("Invalid register name global variable"); } Index: projects/clang390-import/contrib/llvm/lib/Target/Mips/MipsISelLowering.h =================================================================== --- projects/clang390-import/contrib/llvm/lib/Target/Mips/MipsISelLowering.h (revision 305682) +++ projects/clang390-import/contrib/llvm/lib/Target/Mips/MipsISelLowering.h (revision 305683) @@ -1,611 +1,611 @@ //===-- MipsISelLowering.h - Mips DAG Lowering Interface --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file defines the interfaces that Mips uses to lower LLVM code into a // selection DAG. // //===----------------------------------------------------------------------===// #ifndef LLVM_LIB_TARGET_MIPS_MIPSISELLOWERING_H #define LLVM_LIB_TARGET_MIPS_MIPSISELLOWERING_H #include "MCTargetDesc/MipsABIInfo.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "Mips.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/IR/Function.h" #include "llvm/Target/TargetLowering.h" #include #include namespace llvm { namespace MipsISD { enum NodeType : unsigned { // Start the numbering from where ISD NodeType finishes. FIRST_NUMBER = ISD::BUILTIN_OP_END, // Jump and link (call) JmpLink, // Tail call TailCall, // Get the Higher 16 bits from a 32-bit immediate // No relation with Mips Hi register Hi, // Get the Lower 16 bits from a 32-bit immediate // No relation with Mips Lo register Lo, // Handle gp_rel (small data/bss sections) relocation. GPRel, // Thread Pointer ThreadPointer, // Floating Point Branch Conditional FPBrcond, // Floating Point Compare FPCmp, // Floating Point Conditional Moves CMovFP_T, CMovFP_F, // FP-to-int truncation node. TruncIntFP, // Return Ret, // Interrupt, exception, error trap Return ERet, // Software Exception Return. EH_RETURN, // Node used to extract integer from accumulator. MFHI, MFLO, // Node used to insert integers to accumulator. MTLOHI, // Mult nodes. Mult, Multu, // MAdd/Sub nodes MAdd, MAddu, MSub, MSubu, // DivRem(u) DivRem, DivRemU, DivRem16, DivRemU16, BuildPairF64, ExtractElementF64, Wrapper, DynAlloc, Sync, Ext, Ins, // EXTR.W instrinsic nodes. EXTP, EXTPDP, EXTR_S_H, EXTR_W, EXTR_R_W, EXTR_RS_W, SHILO, MTHLIP, // DPA.W intrinsic nodes. MULSAQ_S_W_PH, MAQ_S_W_PHL, MAQ_S_W_PHR, MAQ_SA_W_PHL, MAQ_SA_W_PHR, DPAU_H_QBL, DPAU_H_QBR, DPSU_H_QBL, DPSU_H_QBR, DPAQ_S_W_PH, DPSQ_S_W_PH, DPAQ_SA_L_W, DPSQ_SA_L_W, DPA_W_PH, DPS_W_PH, DPAQX_S_W_PH, DPAQX_SA_W_PH, DPAX_W_PH, DPSX_W_PH, DPSQX_S_W_PH, DPSQX_SA_W_PH, MULSA_W_PH, MULT, MULTU, MADD_DSP, MADDU_DSP, MSUB_DSP, MSUBU_DSP, // DSP shift nodes. SHLL_DSP, SHRA_DSP, SHRL_DSP, // DSP setcc and select_cc nodes. SETCC_DSP, SELECT_CC_DSP, // Vector comparisons. // These take a vector and return a boolean. VALL_ZERO, VANY_ZERO, VALL_NONZERO, VANY_NONZERO, // These take a vector and return a vector bitmask. VCEQ, VCLE_S, VCLE_U, VCLT_S, VCLT_U, // Element-wise vector max/min. VSMAX, VSMIN, VUMAX, VUMIN, // Vector Shuffle with mask as an operand VSHF, // Generic shuffle SHF, // 4-element set shuffle. ILVEV, // Interleave even elements ILVOD, // Interleave odd elements ILVL, // Interleave left elements ILVR, // Interleave right elements PCKEV, // Pack even elements PCKOD, // Pack odd elements // Vector Lane Copy INSVE, // Copy element from one vector to another // Combined (XOR (OR $a, $b), -1) VNOR, // Extended vector element extraction VEXTRACT_SEXT_ELT, VEXTRACT_ZEXT_ELT, // Load/Store Left/Right nodes. LWL = ISD::FIRST_TARGET_MEMORY_OPCODE, LWR, SWL, SWR, LDL, LDR, SDL, SDR }; } //===--------------------------------------------------------------------===// // TargetLowering Implementation //===--------------------------------------------------------------------===// class MipsFunctionInfo; class MipsSubtarget; class MipsCCState; class MipsTargetLowering : public TargetLowering { bool isMicroMips; public: explicit MipsTargetLowering(const MipsTargetMachine &TM, const MipsSubtarget &STI); static const MipsTargetLowering *create(const MipsTargetMachine &TM, const MipsSubtarget &STI); /// createFastISel - This method returns a target specific FastISel object, /// or null if the target does not support "fast" ISel. FastISel *createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const override; MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { return MVT::i32; } bool isCheapToSpeculateCttz() const override; bool isCheapToSpeculateCtlz() const override; ISD::NodeType getExtendForAtomicOps() const override { return ISD::SIGN_EXTEND; } void LowerOperationWrapper(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const override; /// LowerOperation - Provide custom lowering hooks for some operations. SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; /// ReplaceNodeResults - Replace the results of node with an illegal result /// type with new values built out of custom code. /// void ReplaceNodeResults(SDNode *N, SmallVectorImpl&Results, SelectionDAG &DAG) const override; /// getTargetNodeName - This method returns the name of a target specific // DAG node. const char *getTargetNodeName(unsigned Opcode) const override; /// getSetCCResultType - get the ISD::SETCC result ValueType EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override; SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override; void HandleByVal(CCState *, unsigned &, unsigned) const override; unsigned getRegisterByName(const char* RegName, EVT VT, SelectionDAG &DAG) const override; /// If a physical register, this returns the register that receives the /// exception address on entry to an EH pad. unsigned getExceptionPointerRegister(const Constant *PersonalityFn) const override { return ABI.IsN64() ? Mips::A0_64 : Mips::A0; } /// If a physical register, this returns the register that receives the /// exception typeid on entry to a landing pad. unsigned getExceptionSelectorRegister(const Constant *PersonalityFn) const override { return ABI.IsN64() ? Mips::A1_64 : Mips::A1; } /// Returns true if a cast between SrcAS and DestAS is a noop. bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { // Mips doesn't have any special address spaces so we just reserve // the first 256 for software use (e.g. OpenCL) and treat casts // between them as noops. return SrcAS < 256 && DestAS < 256; } protected: SDValue getGlobalReg(SelectionDAG &DAG, EVT Ty) const; // This method creates the following nodes, which are necessary for // computing a local symbol's address: // // (add (load (wrapper $gp, %got(sym)), %lo(sym)) template SDValue getAddrLocal(NodeTy *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, bool IsN32OrN64) const { unsigned GOTFlag = IsN32OrN64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT; SDValue GOT = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty), getTargetNode(N, Ty, DAG, GOTFlag)); SDValue Load = DAG.getLoad(Ty, DL, DAG.getEntryNode(), GOT, MachinePointerInfo::getGOT(DAG.getMachineFunction())); unsigned LoFlag = IsN32OrN64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO; SDValue Lo = DAG.getNode(MipsISD::Lo, DL, Ty, getTargetNode(N, Ty, DAG, LoFlag)); return DAG.getNode(ISD::ADD, DL, Ty, Load, Lo); } // This method creates the following nodes, which are necessary for // computing a global symbol's address: // // (load (wrapper $gp, %got(sym))) template SDValue getAddrGlobal(NodeTy *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flag, SDValue Chain, const MachinePointerInfo &PtrInfo) const { SDValue Tgt = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty), getTargetNode(N, Ty, DAG, Flag)); return DAG.getLoad(Ty, DL, Chain, Tgt, PtrInfo); } // This method creates the following nodes, which are necessary for // computing a global symbol's address in large-GOT mode: // // (load (wrapper (add %hi(sym), $gp), %lo(sym))) template SDValue getAddrGlobalLargeGOT(NodeTy *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned HiFlag, unsigned LoFlag, SDValue Chain, const MachinePointerInfo &PtrInfo) const { SDValue Hi = DAG.getNode(MipsISD::Hi, DL, Ty, getTargetNode(N, Ty, DAG, HiFlag)); Hi = DAG.getNode(ISD::ADD, DL, Ty, Hi, getGlobalReg(DAG, Ty)); SDValue Wrapper = DAG.getNode(MipsISD::Wrapper, DL, Ty, Hi, getTargetNode(N, Ty, DAG, LoFlag)); return DAG.getLoad(Ty, DL, Chain, Wrapper, PtrInfo); } // This method creates the following nodes, which are necessary for // computing a symbol's address in non-PIC mode: // // (add %hi(sym), %lo(sym)) template SDValue getAddrNonPIC(NodeTy *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG) const { SDValue Hi = getTargetNode(N, Ty, DAG, MipsII::MO_ABS_HI); SDValue Lo = getTargetNode(N, Ty, DAG, MipsII::MO_ABS_LO); return DAG.getNode(ISD::ADD, DL, Ty, DAG.getNode(MipsISD::Hi, DL, Ty, Hi), DAG.getNode(MipsISD::Lo, DL, Ty, Lo)); } // This method creates the following nodes, which are necessary for // computing a symbol's address using gp-relative addressing: // // (add $gp, %gp_rel(sym)) template SDValue getAddrGPRel(NodeTy *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG) const { assert(Ty == MVT::i32); SDValue GPRel = getTargetNode(N, Ty, DAG, MipsII::MO_GPREL); return DAG.getNode(ISD::ADD, DL, Ty, DAG.getRegister(Mips::GP, Ty), DAG.getNode(MipsISD::GPRel, DL, DAG.getVTList(Ty), GPRel)); } /// This function fills Ops, which is the list of operands that will later /// be used when a function call node is created. It also generates /// copyToReg nodes to set up argument registers. virtual void getOpndList(SmallVectorImpl &Ops, std::deque< std::pair > &RegsToPass, bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const; protected: SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const; // Subtarget Info const MipsSubtarget &Subtarget; // Cache the ABI from the TargetMachine, we use it everywhere. const MipsABIInfo &ABI; private: // Create a TargetGlobalAddress node. SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG, unsigned Flag) const; // Create a TargetExternalSymbol node. SDValue getTargetNode(ExternalSymbolSDNode *N, EVT Ty, SelectionDAG &DAG, unsigned Flag) const; // Create a TargetBlockAddress node. SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG, unsigned Flag) const; // Create a TargetJumpTable node. SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG, unsigned Flag) const; // Create a TargetConstantPool node. SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG, unsigned Flag) const; // Lower Operand helpers SDValue LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals, TargetLowering::CallLoweringInfo &CLI) const; // Lower Operand specifics SDValue lowerBR_JT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBRCOND(SDValue Op, SelectionDAG &DAG) const; SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVAARG(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFABS(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const; SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG& DAG) const; SDValue lowerShiftRightParts(SDValue Op, SelectionDAG& DAG, bool IsSRA) const; - SDValue lowerADD(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; /// isEligibleForTailCallOptimization - Check whether the call is eligible /// for tail call optimization. virtual bool isEligibleForTailCallOptimization(const CCState &CCInfo, unsigned NextStackOffset, const MipsFunctionInfo &FI) const = 0; /// copyByValArg - Copy argument registers which were used to pass a byval /// argument to the stack. Create a stack frame object for the byval /// argument. void copyByValRegs(SDValue Chain, const SDLoc &DL, std::vector &OutChains, SelectionDAG &DAG, const ISD::ArgFlagsTy &Flags, SmallVectorImpl &InVals, const Argument *FuncArg, unsigned FirstReg, unsigned LastReg, const CCValAssign &VA, MipsCCState &State) const; /// passByValArg - Pass a byval argument in registers or on stack. void passByValArg(SDValue Chain, const SDLoc &DL, std::deque> &RegsToPass, SmallVectorImpl &MemOpChains, SDValue StackPtr, MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg, unsigned FirstReg, unsigned LastReg, const ISD::ArgFlagsTy &Flags, bool isLittle, const CCValAssign &VA) const; /// writeVarArgRegs - Write variable function arguments passed in registers /// to the stack. Also create a stack frame object for the first variable /// argument. void writeVarArgRegs(std::vector &OutChains, SDValue Chain, const SDLoc &DL, SelectionDAG &DAG, CCState &State) const; SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const override; SDValue passArgOnStack(SDValue StackPtr, unsigned Offset, SDValue Chain, SDValue Arg, const SDLoc &DL, bool IsTailCall, SelectionDAG &DAG) const; SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const override; bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl &Outs, LLVMContext &Context) const override; SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &dl, SelectionDAG &DAG) const override; SDValue LowerInterruptReturn(SmallVectorImpl &RetOps, const SDLoc &DL, SelectionDAG &DAG) const; bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override; // Inline asm support ConstraintType getConstraintType(StringRef Constraint) const override; /// Examine constraint string and operand type and determine a weight value. /// The operand object must already have been set up with the operand type. ConstraintWeight getSingleConstraintMatchWeight( AsmOperandInfo &info, const char *constraint) const override; /// This function parses registers that appear in inline-asm constraints. /// It returns pair (0, 0) on failure. std::pair parseRegForInlineAsmConstraint(StringRef C, MVT VT) const; std::pair getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override; /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. If hasMemory is /// true it means one of the asm constraint of the inline asm instruction /// being processed is 'm'. void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector &Ops, SelectionDAG &DAG) const override; unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override { if (ConstraintCode == "R") return InlineAsm::Constraint_R; else if (ConstraintCode == "ZC") return InlineAsm::Constraint_ZC; return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); } bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override; bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const override; /// isFPImmLegal - Returns true if the target can instruction select the /// specified FP immediate natively. If false, the legalizer will /// materialize the FP immediate as a load from a constant pool. bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; unsigned getJumpTableEncoding() const override; bool useSoftFloat() const override; bool shouldInsertFencesForAtomic(const Instruction *I) const override { return true; } /// Emit a sign-extension using sll/sra, seb, or seh appropriately. MachineBasicBlock *emitSignExtendToI32InReg(MachineInstr &MI, MachineBasicBlock *BB, unsigned Size, unsigned DstReg, unsigned SrcRec) const; MachineBasicBlock *emitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, unsigned Size, unsigned BinOpcode, bool Nand = false) const; MachineBasicBlock *emitAtomicBinaryPartword(MachineInstr &MI, MachineBasicBlock *BB, unsigned Size, unsigned BinOpcode, bool Nand = false) const; MachineBasicBlock *emitAtomicCmpSwap(MachineInstr &MI, MachineBasicBlock *BB, unsigned Size) const; MachineBasicBlock *emitAtomicCmpSwapPartword(MachineInstr &MI, MachineBasicBlock *BB, unsigned Size) const; MachineBasicBlock *emitSEL_D(MachineInstr &MI, MachineBasicBlock *BB) const; MachineBasicBlock *emitPseudoSELECT(MachineInstr &MI, MachineBasicBlock *BB, bool isFPCmp, unsigned Opc) const; }; /// Create MipsTargetLowering objects. const MipsTargetLowering * createMips16TargetLowering(const MipsTargetMachine &TM, const MipsSubtarget &STI); const MipsTargetLowering * createMipsSETargetLowering(const MipsTargetMachine &TM, const MipsSubtarget &STI); namespace Mips { FastISel *createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo); } } #endif Index: projects/clang390-import/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- projects/clang390-import/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp (revision 305682) +++ projects/clang390-import/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp (revision 305683) @@ -1,12106 +1,12122 @@ //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file implements the PPCISelLowering class. // //===----------------------------------------------------------------------===// #include "PPCISelLowering.h" #include "MCTargetDesc/PPCPredicates.h" #include "PPCCallingConv.h" #include "PPCCCState.h" #include "PPCMachineFunctionInfo.h" #include "PPCPerfectShuffle.h" #include "PPCTargetMachine.h" #include "PPCTargetObjectFile.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/Intrinsics.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" #include using namespace llvm; #define DEBUG_TYPE "ppc-lowering" static cl::opt DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); static cl::opt DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden); static cl::opt DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden); static cl::opt DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden); STATISTIC(NumTailCalls, "Number of tail calls"); STATISTIC(NumSiblingCalls, "Number of sibling calls"); // FIXME: Remove this once the bug has been fixed! extern cl::opt ANDIGlueBug; PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI) : TargetLowering(TM), Subtarget(STI) { // Use _setjmp/_longjmp instead of setjmp/longjmp. setUseUnderscoreSetJmp(true); setUseUnderscoreLongJmp(true); // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all // arguments are at least 4/8 bytes aligned. bool isPPC64 = Subtarget.isPPC64(); setMinStackArgumentAlignment(isPPC64 ? 8:4); // Set up the register classes. addRegisterClass(MVT::i32, &PPC::GPRCRegClass); if (!useSoftFloat()) { addRegisterClass(MVT::f32, &PPC::F4RCRegClass); addRegisterClass(MVT::f64, &PPC::F8RCRegClass); } // PowerPC has an i16 but no i8 (or i1) SEXTLOAD for (MVT VT : MVT::integer_valuetypes()) { setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand); } setTruncStoreAction(MVT::f64, MVT::f32, Expand); // PowerPC has pre-inc load and store's. setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal); setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal); setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal); setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal); setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal); setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal); setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal); if (Subtarget.useCRBits()) { setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); if (isPPC64 || Subtarget.hasFPCVT()) { setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote); AddPromotedToType (ISD::SINT_TO_FP, MVT::i1, isPPC64 ? MVT::i64 : MVT::i32); setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote); AddPromotedToType(ISD::UINT_TO_FP, MVT::i1, isPPC64 ? MVT::i64 : MVT::i32); } else { setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom); } // PowerPC does not support direct load / store of condition registers setOperationAction(ISD::LOAD, MVT::i1, Custom); setOperationAction(ISD::STORE, MVT::i1, Custom); // FIXME: Remove this once the ANDI glue bug is fixed: if (ANDIGlueBug) setOperationAction(ISD::TRUNCATE, MVT::i1, Custom); for (MVT VT : MVT::integer_valuetypes()) { setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); setTruncStoreAction(VT, MVT::i1, Expand); } addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass); } // This is used in the ppcf128->int sequence. Note it has different semantics // from FP_ROUND: that rounds to nearest, this rounds to zero. setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom); // We do not currently implement these libm ops for PowerPC. setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand); setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand); setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand); setOperationAction(ISD::FRINT, MVT::ppcf128, Expand); setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand); setOperationAction(ISD::FREM, MVT::ppcf128, Expand); // PowerPC has no SREM/UREM instructions setOperationAction(ISD::SREM, MVT::i32, Expand); setOperationAction(ISD::UREM, MVT::i32, Expand); setOperationAction(ISD::SREM, MVT::i64, Expand); setOperationAction(ISD::UREM, MVT::i64, Expand); // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM. setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); setOperationAction(ISD::UDIVREM, MVT::i32, Expand); setOperationAction(ISD::SDIVREM, MVT::i32, Expand); setOperationAction(ISD::UDIVREM, MVT::i64, Expand); setOperationAction(ISD::SDIVREM, MVT::i64, Expand); // We don't support sin/cos/sqrt/fmod/pow setOperationAction(ISD::FSIN , MVT::f64, Expand); setOperationAction(ISD::FCOS , MVT::f64, Expand); setOperationAction(ISD::FSINCOS, MVT::f64, Expand); setOperationAction(ISD::FREM , MVT::f64, Expand); setOperationAction(ISD::FPOW , MVT::f64, Expand); setOperationAction(ISD::FMA , MVT::f64, Legal); setOperationAction(ISD::FSIN , MVT::f32, Expand); setOperationAction(ISD::FCOS , MVT::f32, Expand); setOperationAction(ISD::FSINCOS, MVT::f32, Expand); setOperationAction(ISD::FREM , MVT::f32, Expand); setOperationAction(ISD::FPOW , MVT::f32, Expand); setOperationAction(ISD::FMA , MVT::f32, Legal); setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); // If we're enabling GP optimizations, use hardware square root if (!Subtarget.hasFSQRT() && !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() && Subtarget.hasFRE())) setOperationAction(ISD::FSQRT, MVT::f64, Expand); if (!Subtarget.hasFSQRT() && !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() && Subtarget.hasFRES())) setOperationAction(ISD::FSQRT, MVT::f32, Expand); if (Subtarget.hasFCPSGN()) { setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal); } else { setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); } if (Subtarget.hasFPRND()) { setOperationAction(ISD::FFLOOR, MVT::f64, Legal); setOperationAction(ISD::FCEIL, MVT::f64, Legal); setOperationAction(ISD::FTRUNC, MVT::f64, Legal); setOperationAction(ISD::FROUND, MVT::f64, Legal); setOperationAction(ISD::FFLOOR, MVT::f32, Legal); setOperationAction(ISD::FCEIL, MVT::f32, Legal); setOperationAction(ISD::FTRUNC, MVT::f32, Legal); setOperationAction(ISD::FROUND, MVT::f32, Legal); } // PowerPC does not have BSWAP, CTPOP or CTTZ setOperationAction(ISD::BSWAP, MVT::i32 , Expand); setOperationAction(ISD::CTTZ , MVT::i32 , Expand); setOperationAction(ISD::BSWAP, MVT::i64 , Expand); setOperationAction(ISD::CTTZ , MVT::i64 , Expand); if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) { setOperationAction(ISD::CTPOP, MVT::i32 , Legal); setOperationAction(ISD::CTPOP, MVT::i64 , Legal); } else { setOperationAction(ISD::CTPOP, MVT::i32 , Expand); setOperationAction(ISD::CTPOP, MVT::i64 , Expand); } // PowerPC does not have ROTR setOperationAction(ISD::ROTR, MVT::i32 , Expand); setOperationAction(ISD::ROTR, MVT::i64 , Expand); if (!Subtarget.useCRBits()) { // PowerPC does not have Select setOperationAction(ISD::SELECT, MVT::i32, Expand); setOperationAction(ISD::SELECT, MVT::i64, Expand); setOperationAction(ISD::SELECT, MVT::f32, Expand); setOperationAction(ISD::SELECT, MVT::f64, Expand); } // PowerPC wants to turn select_cc of FP into fsel when possible. setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); // PowerPC wants to optimize integer setcc a bit if (!Subtarget.useCRBits()) setOperationAction(ISD::SETCC, MVT::i32, Custom); // PowerPC does not have BRCOND which requires SetCC if (!Subtarget.useCRBits()) setOperationAction(ISD::BRCOND, MVT::Other, Expand); setOperationAction(ISD::BR_JT, MVT::Other, Expand); // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); // PowerPC does not have [U|S]INT_TO_FP setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); if (Subtarget.hasDirectMove() && isPPC64) { setOperationAction(ISD::BITCAST, MVT::f32, Legal); setOperationAction(ISD::BITCAST, MVT::i32, Legal); setOperationAction(ISD::BITCAST, MVT::i64, Legal); setOperationAction(ISD::BITCAST, MVT::f64, Legal); } else { setOperationAction(ISD::BITCAST, MVT::f32, Expand); setOperationAction(ISD::BITCAST, MVT::i32, Expand); setOperationAction(ISD::BITCAST, MVT::i64, Expand); setOperationAction(ISD::BITCAST, MVT::f64, Expand); } // We cannot sextinreg(i1). Expand to shifts. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support // SjLj exception handling but a light-weight setjmp/longjmp replacement to // support continuation, user-level threading, and etc.. As a result, no // other SjLj exception interfaces are implemented and please don't build // your own exception handling based on them. // LLVM/Clang supports zero-cost DWARF exception handling. setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); // We want to legalize GlobalAddress and ConstantPool nodes into the // appropriate instructions to materialize the address. setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); setOperationAction(ISD::BlockAddress, MVT::i32, Custom); setOperationAction(ISD::ConstantPool, MVT::i32, Custom); setOperationAction(ISD::JumpTable, MVT::i32, Custom); setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); setOperationAction(ISD::BlockAddress, MVT::i64, Custom); setOperationAction(ISD::ConstantPool, MVT::i64, Custom); setOperationAction(ISD::JumpTable, MVT::i64, Custom); // TRAP is legal. setOperationAction(ISD::TRAP, MVT::Other, Legal); // TRAMPOLINE is custom lowered. setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); // VASTART needs to be custom lowered to use the VarArgsFrameIndex setOperationAction(ISD::VASTART , MVT::Other, Custom); if (Subtarget.isSVR4ABI()) { if (isPPC64) { // VAARG always uses double-word chunks, so promote anything smaller. setOperationAction(ISD::VAARG, MVT::i1, Promote); AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64); setOperationAction(ISD::VAARG, MVT::i8, Promote); AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64); setOperationAction(ISD::VAARG, MVT::i16, Promote); AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64); setOperationAction(ISD::VAARG, MVT::i32, Promote); AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64); setOperationAction(ISD::VAARG, MVT::Other, Expand); } else { // VAARG is custom lowered with the 32-bit SVR4 ABI. setOperationAction(ISD::VAARG, MVT::Other, Custom); setOperationAction(ISD::VAARG, MVT::i64, Custom); } } else setOperationAction(ISD::VAARG, MVT::Other, Expand); if (Subtarget.isSVR4ABI() && !isPPC64) // VACOPY is custom lowered with the 32-bit SVR4 ABI. setOperationAction(ISD::VACOPY , MVT::Other, Custom); else setOperationAction(ISD::VACOPY , MVT::Other, Expand); // Use the default implementation. setOperationAction(ISD::VAEND , MVT::Other, Expand); setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom); setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom); setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom); + setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); + setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom); // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); // To handle counter-based loop conditions. setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom); // Comparisons that require checking two conditions. setCondCodeAction(ISD::SETULT, MVT::f32, Expand); setCondCodeAction(ISD::SETULT, MVT::f64, Expand); setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); setCondCodeAction(ISD::SETUGT, MVT::f64, Expand); setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand); setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand); setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); setCondCodeAction(ISD::SETOLE, MVT::f32, Expand); setCondCodeAction(ISD::SETOLE, MVT::f64, Expand); setCondCodeAction(ISD::SETONE, MVT::f32, Expand); setCondCodeAction(ISD::SETONE, MVT::f64, Expand); if (Subtarget.has64BitSupport()) { // They also have instructions for converting between i64 and fp. setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); // This is just the low 32 bits of a (signed) fp->i64 conversion. // We cannot do this with Promote because i64 is not a legal type. setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); } else { // PowerPC does not have FP_TO_UINT on 32-bit implementations. setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); } // With the instructions enabled under FPCVT, we can do everything. if (Subtarget.hasFPCVT()) { if (Subtarget.has64BitSupport()) { setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); } setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); } if (Subtarget.use64BitRegs()) { // 64-bit PowerPC implementations can support i64 types directly addRegisterClass(MVT::i64, &PPC::G8RCRegClass); // BUILD_PAIR can't be handled natively, and should be expanded to shl/or setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); // 64-bit PowerPC wants to expand i128 shifts itself. setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); } else { // 32-bit PowerPC wants to expand i64 shifts itself. setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); } if (Subtarget.hasAltivec()) { // First set operation action for all vector types to expand. Then we // will selectively turn on ones that can be effectively codegen'd. for (MVT VT : MVT::vector_valuetypes()) { // add/sub are legal for all supported vector VT's. setOperationAction(ISD::ADD, VT, Legal); setOperationAction(ISD::SUB, VT, Legal); // Vector instructions introduced in P8 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) { setOperationAction(ISD::CTPOP, VT, Legal); setOperationAction(ISD::CTLZ, VT, Legal); } else { setOperationAction(ISD::CTPOP, VT, Expand); setOperationAction(ISD::CTLZ, VT, Expand); } // We promote all shuffles to v16i8. setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote); AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8); // We promote all non-typed operations to v4i32. setOperationAction(ISD::AND , VT, Promote); AddPromotedToType (ISD::AND , VT, MVT::v4i32); setOperationAction(ISD::OR , VT, Promote); AddPromotedToType (ISD::OR , VT, MVT::v4i32); setOperationAction(ISD::XOR , VT, Promote); AddPromotedToType (ISD::XOR , VT, MVT::v4i32); setOperationAction(ISD::LOAD , VT, Promote); AddPromotedToType (ISD::LOAD , VT, MVT::v4i32); setOperationAction(ISD::SELECT, VT, Promote); AddPromotedToType (ISD::SELECT, VT, MVT::v4i32); setOperationAction(ISD::SELECT_CC, VT, Promote); AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32); setOperationAction(ISD::STORE, VT, Promote); AddPromotedToType (ISD::STORE, VT, MVT::v4i32); // No other operations are legal. setOperationAction(ISD::MUL , VT, Expand); setOperationAction(ISD::SDIV, VT, Expand); setOperationAction(ISD::SREM, VT, Expand); setOperationAction(ISD::UDIV, VT, Expand); setOperationAction(ISD::UREM, VT, Expand); setOperationAction(ISD::FDIV, VT, Expand); setOperationAction(ISD::FREM, VT, Expand); setOperationAction(ISD::FNEG, VT, Expand); setOperationAction(ISD::FSQRT, VT, Expand); setOperationAction(ISD::FLOG, VT, Expand); setOperationAction(ISD::FLOG10, VT, Expand); setOperationAction(ISD::FLOG2, VT, Expand); setOperationAction(ISD::FEXP, VT, Expand); setOperationAction(ISD::FEXP2, VT, Expand); setOperationAction(ISD::FSIN, VT, Expand); setOperationAction(ISD::FCOS, VT, Expand); setOperationAction(ISD::FABS, VT, Expand); setOperationAction(ISD::FPOWI, VT, Expand); setOperationAction(ISD::FFLOOR, VT, Expand); setOperationAction(ISD::FCEIL, VT, Expand); setOperationAction(ISD::FTRUNC, VT, Expand); setOperationAction(ISD::FRINT, VT, Expand); setOperationAction(ISD::FNEARBYINT, VT, Expand); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); setOperationAction(ISD::BUILD_VECTOR, VT, Expand); setOperationAction(ISD::MULHU, VT, Expand); setOperationAction(ISD::MULHS, VT, Expand); setOperationAction(ISD::UMUL_LOHI, VT, Expand); setOperationAction(ISD::SMUL_LOHI, VT, Expand); setOperationAction(ISD::UDIVREM, VT, Expand); setOperationAction(ISD::SDIVREM, VT, Expand); setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); setOperationAction(ISD::BSWAP, VT, Expand); setOperationAction(ISD::CTTZ, VT, Expand); setOperationAction(ISD::VSELECT, VT, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); setOperationAction(ISD::ROTL, VT, Expand); setOperationAction(ISD::ROTR, VT, Expand); for (MVT InnerVT : MVT::vector_valuetypes()) { setTruncStoreAction(VT, InnerVT, Expand); setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); } } // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle // with merges, splats, etc. setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); setOperationAction(ISD::AND , MVT::v4i32, Legal); setOperationAction(ISD::OR , MVT::v4i32, Legal); setOperationAction(ISD::XOR , MVT::v4i32, Legal); setOperationAction(ISD::LOAD , MVT::v4i32, Legal); setOperationAction(ISD::SELECT, MVT::v4i32, Subtarget.useCRBits() ? Legal : Expand); setOperationAction(ISD::STORE , MVT::v4i32, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass); addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass); addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass); addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass); setOperationAction(ISD::MUL, MVT::v4f32, Legal); setOperationAction(ISD::FMA, MVT::v4f32, Legal); if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) { setOperationAction(ISD::FDIV, MVT::v4f32, Legal); setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); } if (Subtarget.hasP8Altivec()) setOperationAction(ISD::MUL, MVT::v4i32, Legal); else setOperationAction(ISD::MUL, MVT::v4i32, Custom); setOperationAction(ISD::MUL, MVT::v8i16, Custom); setOperationAction(ISD::MUL, MVT::v16i8, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); // Altivec does not contain unordered floating-point compare instructions setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand); setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand); setCondCodeAction(ISD::SETO, MVT::v4f32, Expand); setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand); if (Subtarget.hasVSX()) { setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); if (Subtarget.hasP8Vector()) { setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal); } if (Subtarget.hasDirectMove() && isPPC64) { setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal); } setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); setOperationAction(ISD::FROUND, MVT::v2f64, Legal); setOperationAction(ISD::FROUND, MVT::v4f32, Legal); setOperationAction(ISD::MUL, MVT::v2f64, Legal); setOperationAction(ISD::FMA, MVT::v2f64, Legal); setOperationAction(ISD::FDIV, MVT::v2f64, Legal); setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); setOperationAction(ISD::VSELECT, MVT::v16i8, Legal); setOperationAction(ISD::VSELECT, MVT::v8i16, Legal); setOperationAction(ISD::VSELECT, MVT::v4i32, Legal); setOperationAction(ISD::VSELECT, MVT::v4f32, Legal); setOperationAction(ISD::VSELECT, MVT::v2f64, Legal); // Share the Altivec comparison restrictions. setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand); setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand); setCondCodeAction(ISD::SETO, MVT::v2f64, Expand); setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand); setOperationAction(ISD::LOAD, MVT::v2f64, Legal); setOperationAction(ISD::STORE, MVT::v2f64, Legal); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal); if (Subtarget.hasP8Vector()) addRegisterClass(MVT::f32, &PPC::VSSRCRegClass); addRegisterClass(MVT::f64, &PPC::VSFRCRegClass); addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass); addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass); addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass); if (Subtarget.hasP8Altivec()) { setOperationAction(ISD::SHL, MVT::v2i64, Legal); setOperationAction(ISD::SRA, MVT::v2i64, Legal); setOperationAction(ISD::SRL, MVT::v2i64, Legal); setOperationAction(ISD::SETCC, MVT::v2i64, Legal); } else { setOperationAction(ISD::SHL, MVT::v2i64, Expand); setOperationAction(ISD::SRA, MVT::v2i64, Expand); setOperationAction(ISD::SRL, MVT::v2i64, Expand); setOperationAction(ISD::SETCC, MVT::v2i64, Custom); // VSX v2i64 only supports non-arithmetic operations. setOperationAction(ISD::ADD, MVT::v2i64, Expand); setOperationAction(ISD::SUB, MVT::v2i64, Expand); } setOperationAction(ISD::LOAD, MVT::v2i64, Promote); AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64); setOperationAction(ISD::STORE, MVT::v2i64, Promote); AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); // Vector operation legalization checks the result type of // SIGN_EXTEND_INREG, overall legalization checks the inner type. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom); setOperationAction(ISD::FNEG, MVT::v4f32, Legal); setOperationAction(ISD::FNEG, MVT::v2f64, Legal); setOperationAction(ISD::FABS, MVT::v4f32, Legal); setOperationAction(ISD::FABS, MVT::v2f64, Legal); addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass); } if (Subtarget.hasP8Altivec()) { addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass); addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass); } if (Subtarget.hasP9Vector()) { setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Legal); } } if (Subtarget.hasQPX()) { setOperationAction(ISD::FADD, MVT::v4f64, Legal); setOperationAction(ISD::FSUB, MVT::v4f64, Legal); setOperationAction(ISD::FMUL, MVT::v4f64, Legal); setOperationAction(ISD::FREM, MVT::v4f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal); setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand); setOperationAction(ISD::LOAD , MVT::v4f64, Custom); setOperationAction(ISD::STORE , MVT::v4f64, Custom); setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom); setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom); if (!Subtarget.useCRBits()) setOperationAction(ISD::SELECT, MVT::v4f64, Expand); setOperationAction(ISD::VSELECT, MVT::v4f64, Legal); setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal); setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand); setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand); setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand); setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal); setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom); setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal); setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand); setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal); setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand); setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal); setOperationAction(ISD::FNEG , MVT::v4f64, Legal); setOperationAction(ISD::FABS , MVT::v4f64, Legal); setOperationAction(ISD::FSIN , MVT::v4f64, Expand); setOperationAction(ISD::FCOS , MVT::v4f64, Expand); setOperationAction(ISD::FPOWI , MVT::v4f64, Expand); setOperationAction(ISD::FPOW , MVT::v4f64, Expand); setOperationAction(ISD::FLOG , MVT::v4f64, Expand); setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand); setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand); setOperationAction(ISD::FEXP , MVT::v4f64, Expand); setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand); setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal); setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal); setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal); addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass); setOperationAction(ISD::FADD, MVT::v4f32, Legal); setOperationAction(ISD::FSUB, MVT::v4f32, Legal); setOperationAction(ISD::FMUL, MVT::v4f32, Legal); setOperationAction(ISD::FREM, MVT::v4f32, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal); setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand); setOperationAction(ISD::LOAD , MVT::v4f32, Custom); setOperationAction(ISD::STORE , MVT::v4f32, Custom); if (!Subtarget.useCRBits()) setOperationAction(ISD::SELECT, MVT::v4f32, Expand); setOperationAction(ISD::VSELECT, MVT::v4f32, Legal); setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal); setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand); setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand); setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand); setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal); setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand); setOperationAction(ISD::FNEG , MVT::v4f32, Legal); setOperationAction(ISD::FABS , MVT::v4f32, Legal); setOperationAction(ISD::FSIN , MVT::v4f32, Expand); setOperationAction(ISD::FCOS , MVT::v4f32, Expand); setOperationAction(ISD::FPOWI , MVT::v4f32, Expand); setOperationAction(ISD::FPOW , MVT::v4f32, Expand); setOperationAction(ISD::FLOG , MVT::v4f32, Expand); setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand); setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand); setOperationAction(ISD::FEXP , MVT::v4f32, Expand); setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand); setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal); setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal); setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal); addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass); setOperationAction(ISD::AND , MVT::v4i1, Legal); setOperationAction(ISD::OR , MVT::v4i1, Legal); setOperationAction(ISD::XOR , MVT::v4i1, Legal); if (!Subtarget.useCRBits()) setOperationAction(ISD::SELECT, MVT::v4i1, Expand); setOperationAction(ISD::VSELECT, MVT::v4i1, Legal); setOperationAction(ISD::LOAD , MVT::v4i1, Custom); setOperationAction(ISD::STORE , MVT::v4i1, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand); setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand); setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand); setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand); setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom); addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass); setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal); setOperationAction(ISD::FCEIL, MVT::v4f64, Legal); setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal); setOperationAction(ISD::FROUND, MVT::v4f64, Legal); setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); setOperationAction(ISD::FROUND, MVT::v4f32, Legal); setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand); setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand); // These need to set FE_INEXACT, and so cannot be vectorized here. setOperationAction(ISD::FRINT, MVT::v4f64, Expand); setOperationAction(ISD::FRINT, MVT::v4f32, Expand); if (TM.Options.UnsafeFPMath) { setOperationAction(ISD::FDIV, MVT::v4f64, Legal); setOperationAction(ISD::FSQRT, MVT::v4f64, Legal); setOperationAction(ISD::FDIV, MVT::v4f32, Legal); setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); } else { setOperationAction(ISD::FDIV, MVT::v4f64, Expand); setOperationAction(ISD::FSQRT, MVT::v4f64, Expand); setOperationAction(ISD::FDIV, MVT::v4f32, Expand); setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); } } if (Subtarget.has64BitSupport()) setOperationAction(ISD::PREFETCH, MVT::Other, Legal); setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom); if (!isPPC64) { setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); } setBooleanContents(ZeroOrOneBooleanContent); if (Subtarget.hasAltivec()) { // Altivec instructions set fields to all zeros or all ones. setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); } if (!isPPC64) { // These libcalls are not available in 32-bit. setLibcallName(RTLIB::SHL_I128, nullptr); setLibcallName(RTLIB::SRL_I128, nullptr); setLibcallName(RTLIB::SRA_I128, nullptr); } setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1); // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine(ISD::SINT_TO_FP); setTargetDAGCombine(ISD::BUILD_VECTOR); if (Subtarget.hasFPCVT()) setTargetDAGCombine(ISD::UINT_TO_FP); setTargetDAGCombine(ISD::LOAD); setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::BR_CC); if (Subtarget.useCRBits()) setTargetDAGCombine(ISD::BRCOND); setTargetDAGCombine(ISD::BSWAP); setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); setTargetDAGCombine(ISD::INTRINSIC_VOID); setTargetDAGCombine(ISD::SIGN_EXTEND); setTargetDAGCombine(ISD::ZERO_EXTEND); setTargetDAGCombine(ISD::ANY_EXTEND); if (Subtarget.useCRBits()) { setTargetDAGCombine(ISD::TRUNCATE); setTargetDAGCombine(ISD::SETCC); setTargetDAGCombine(ISD::SELECT_CC); } // Use reciprocal estimates. if (TM.Options.UnsafeFPMath) { setTargetDAGCombine(ISD::FDIV); setTargetDAGCombine(ISD::FSQRT); } // Darwin long double math library functions have $LDBL128 appended. if (Subtarget.isDarwin()) { setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128"); setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128"); setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128"); setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128"); setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128"); setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128"); setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128"); setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128"); setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128"); } // With 32 condition bits, we don't need to sink (and duplicate) compares // aggressively in CodeGenPrep. if (Subtarget.useCRBits()) { setHasMultipleConditionRegisters(); setJumpIsExpensive(); } setMinFunctionAlignment(2); if (Subtarget.isDarwin()) setPrefFunctionAlignment(4); switch (Subtarget.getDarwinDirective()) { default: break; case PPC::DIR_970: case PPC::DIR_A2: case PPC::DIR_E500mc: case PPC::DIR_E5500: case PPC::DIR_PWR4: case PPC::DIR_PWR5: case PPC::DIR_PWR5X: case PPC::DIR_PWR6: case PPC::DIR_PWR6X: case PPC::DIR_PWR7: case PPC::DIR_PWR8: case PPC::DIR_PWR9: setPrefFunctionAlignment(4); setPrefLoopAlignment(4); break; } if (Subtarget.enableMachineScheduler()) setSchedulingPreference(Sched::Source); else setSchedulingPreference(Sched::Hybrid); computeRegisterProperties(STI.getRegisterInfo()); // The Freescale cores do better with aggressive inlining of memcpy and // friends. GCC uses same threshold of 128 bytes (= 32 word stores). if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc || Subtarget.getDarwinDirective() == PPC::DIR_E5500) { MaxStoresPerMemset = 32; MaxStoresPerMemsetOptSize = 16; MaxStoresPerMemcpy = 32; MaxStoresPerMemcpyOptSize = 8; MaxStoresPerMemmove = 32; MaxStoresPerMemmoveOptSize = 8; } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) { // The A2 also benefits from (very) aggressive inlining of memcpy and // friends. The overhead of a the function call, even when warm, can be // over one hundred cycles. MaxStoresPerMemset = 128; MaxStoresPerMemcpy = 128; MaxStoresPerMemmove = 128; } } /// getMaxByValAlign - Helper for getByValTypeAlignment to determine /// the desired ByVal argument alignment. static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign, unsigned MaxMaxAlign) { if (MaxAlign == MaxMaxAlign) return; if (VectorType *VTy = dyn_cast(Ty)) { if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256) MaxAlign = 32; else if (VTy->getBitWidth() >= 128 && MaxAlign < 16) MaxAlign = 16; } else if (ArrayType *ATy = dyn_cast(Ty)) { unsigned EltAlign = 0; getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign); if (EltAlign > MaxAlign) MaxAlign = EltAlign; } else if (StructType *STy = dyn_cast(Ty)) { for (auto *EltTy : STy->elements()) { unsigned EltAlign = 0; getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign); if (EltAlign > MaxAlign) MaxAlign = EltAlign; if (MaxAlign == MaxMaxAlign) break; } } } /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate /// function arguments in the caller parameter area. unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty, const DataLayout &DL) const { // Darwin passes everything on 4 byte boundary. if (Subtarget.isDarwin()) return 4; // 16byte and wider vectors are passed on 16byte boundary. // The rest is 8 on PPC64 and 4 on PPC32 boundary. unsigned Align = Subtarget.isPPC64() ? 8 : 4; if (Subtarget.hasAltivec() || Subtarget.hasQPX()) getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16); return Align; } bool PPCTargetLowering::useSoftFloat() const { return Subtarget.useSoftFloat(); } const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { switch ((PPCISD::NodeType)Opcode) { case PPCISD::FIRST_NUMBER: break; case PPCISD::FSEL: return "PPCISD::FSEL"; case PPCISD::FCFID: return "PPCISD::FCFID"; case PPCISD::FCFIDU: return "PPCISD::FCFIDU"; case PPCISD::FCFIDS: return "PPCISD::FCFIDS"; case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS"; case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ"; case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ"; case PPCISD::FRE: return "PPCISD::FRE"; case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE"; case PPCISD::STFIWX: return "PPCISD::STFIWX"; case PPCISD::VMADDFP: return "PPCISD::VMADDFP"; case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP"; case PPCISD::VPERM: return "PPCISD::VPERM"; case PPCISD::XXSPLT: return "PPCISD::XXSPLT"; case PPCISD::XXINSERT: return "PPCISD::XXINSERT"; case PPCISD::VECSHL: return "PPCISD::VECSHL"; case PPCISD::CMPB: return "PPCISD::CMPB"; case PPCISD::Hi: return "PPCISD::Hi"; case PPCISD::Lo: return "PPCISD::Lo"; case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY"; case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET"; case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; case PPCISD::SRL: return "PPCISD::SRL"; case PPCISD::SRA: return "PPCISD::SRA"; case PPCISD::SHL: return "PPCISD::SHL"; case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE"; case PPCISD::CALL: return "PPCISD::CALL"; case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP"; case PPCISD::MTCTR: return "PPCISD::MTCTR"; case PPCISD::BCTRL: return "PPCISD::BCTRL"; case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC"; case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE"; case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP"; case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP"; case PPCISD::MFOCRF: return "PPCISD::MFOCRF"; case PPCISD::MFVSR: return "PPCISD::MFVSR"; case PPCISD::MTVSRA: return "PPCISD::MTVSRA"; case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ"; case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP"; case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP"; case PPCISD::ANDIo_1_EQ_BIT: return "PPCISD::ANDIo_1_EQ_BIT"; case PPCISD::ANDIo_1_GT_BIT: return "PPCISD::ANDIo_1_GT_BIT"; case PPCISD::VCMP: return "PPCISD::VCMP"; case PPCISD::VCMPo: return "PPCISD::VCMPo"; case PPCISD::LBRX: return "PPCISD::LBRX"; case PPCISD::STBRX: return "PPCISD::STBRX"; case PPCISD::LFIWAX: return "PPCISD::LFIWAX"; case PPCISD::LFIWZX: return "PPCISD::LFIWZX"; case PPCISD::LXVD2X: return "PPCISD::LXVD2X"; case PPCISD::STXVD2X: return "PPCISD::STXVD2X"; case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; case PPCISD::BDNZ: return "PPCISD::BDNZ"; case PPCISD::BDZ: return "PPCISD::BDZ"; case PPCISD::MFFS: return "PPCISD::MFFS"; case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; case PPCISD::CR6SET: return "PPCISD::CR6SET"; case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET"; case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT"; case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT"; case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA"; case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L"; case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS"; case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA"; case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L"; case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR"; case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR"; case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA"; case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L"; case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR"; case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR"; case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA"; case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; case PPCISD::SC: return "PPCISD::SC"; case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB"; case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE"; case PPCISD::RFEBB: return "PPCISD::RFEBB"; case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD"; case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN"; case PPCISD::QVFPERM: return "PPCISD::QVFPERM"; case PPCISD::QVGPCI: return "PPCISD::QVGPCI"; case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI"; case PPCISD::QVESPLATI: return "PPCISD::QVESPLATI"; case PPCISD::QBFLT: return "PPCISD::QBFLT"; case PPCISD::QVLFSb: return "PPCISD::QVLFSb"; } return nullptr; } EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C, EVT VT) const { if (!VT.isVector()) return Subtarget.useCRBits() ? MVT::i1 : MVT::i32; if (Subtarget.hasQPX()) return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements()); return VT.changeVectorElementTypeToInteger(); } bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const { assert(VT.isFloatingPoint() && "Non-floating-point FMA?"); return true; } //===----------------------------------------------------------------------===// // Node matching predicates, for use by the tblgen matching code. //===----------------------------------------------------------------------===// /// isFloatingPointZero - Return true if this is 0.0 or -0.0. static bool isFloatingPointZero(SDValue Op) { if (ConstantFPSDNode *CFP = dyn_cast(Op)) return CFP->getValueAPF().isZero(); else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { // Maybe this has already been legalized into the constant pool? if (ConstantPoolSDNode *CP = dyn_cast(Op.getOperand(1))) if (const ConstantFP *CFP = dyn_cast(CP->getConstVal())) return CFP->getValueAPF().isZero(); } return false; } /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return /// true if Op is undef or if it matches the specified value. static bool isConstantOrUndef(int Op, int Val) { return Op < 0 || Op == Val; } /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUHUM instruction. /// The ShuffleKind distinguishes between big-endian operations with /// two different inputs (0), either-endian operations with two identical /// inputs (1), and little-endian operations with two different inputs (2). /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG) { bool IsLE = DAG.getDataLayout().isLittleEndian(); if (ShuffleKind == 0) { if (IsLE) return false; for (unsigned i = 0; i != 16; ++i) if (!isConstantOrUndef(N->getMaskElt(i), i*2+1)) return false; } else if (ShuffleKind == 2) { if (!IsLE) return false; for (unsigned i = 0; i != 16; ++i) if (!isConstantOrUndef(N->getMaskElt(i), i*2)) return false; } else if (ShuffleKind == 1) { unsigned j = IsLE ? 0 : 1; for (unsigned i = 0; i != 8; ++i) if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) || !isConstantOrUndef(N->getMaskElt(i+8), i*2+j)) return false; } return true; } /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUWUM instruction. /// The ShuffleKind distinguishes between big-endian operations with /// two different inputs (0), either-endian operations with two identical /// inputs (1), and little-endian operations with two different inputs (2). /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG) { bool IsLE = DAG.getDataLayout().isLittleEndian(); if (ShuffleKind == 0) { if (IsLE) return false; for (unsigned i = 0; i != 16; i += 2) if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || !isConstantOrUndef(N->getMaskElt(i+1), i*2+3)) return false; } else if (ShuffleKind == 2) { if (!IsLE) return false; for (unsigned i = 0; i != 16; i += 2) if (!isConstantOrUndef(N->getMaskElt(i ), i*2) || !isConstantOrUndef(N->getMaskElt(i+1), i*2+1)) return false; } else if (ShuffleKind == 1) { unsigned j = IsLE ? 0 : 2; for (unsigned i = 0; i != 8; i += 2) if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) || !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) || !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1)) return false; } return true; } /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the /// current subtarget. /// /// The ShuffleKind distinguishes between big-endian operations with /// two different inputs (0), either-endian operations with two identical /// inputs (1), and little-endian operations with two different inputs (2). /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG) { const PPCSubtarget& Subtarget = static_cast(DAG.getSubtarget()); if (!Subtarget.hasP8Vector()) return false; bool IsLE = DAG.getDataLayout().isLittleEndian(); if (ShuffleKind == 0) { if (IsLE) return false; for (unsigned i = 0; i != 16; i += 4) if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) || !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) || !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) || !isConstantOrUndef(N->getMaskElt(i+3), i*2+7)) return false; } else if (ShuffleKind == 2) { if (!IsLE) return false; for (unsigned i = 0; i != 16; i += 4) if (!isConstantOrUndef(N->getMaskElt(i ), i*2) || !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) || !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) || !isConstantOrUndef(N->getMaskElt(i+3), i*2+3)) return false; } else if (ShuffleKind == 1) { unsigned j = IsLE ? 0 : 4; for (unsigned i = 0; i != 8; i += 4) if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) || !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) || !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) || !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) || !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) || !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) || !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3)) return false; } return true; } /// isVMerge - Common function, used to match vmrg* shuffles. /// static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart) { if (N->getValueType(0) != MVT::v16i8) return false; assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && "Unsupported merge size!"); for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j), LHSStart+j+i*UnitSize) || !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j), RHSStart+j+i*UnitSize)) return false; } return true; } /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes). /// The ShuffleKind distinguishes between big-endian merges with two /// different inputs (0), either-endian merges with two identical inputs (1), /// and little-endian merges with two different inputs (2). For the latter, /// the input operands are swapped (see PPCInstrAltivec.td). bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG) { if (DAG.getDataLayout().isLittleEndian()) { if (ShuffleKind == 1) // unary return isVMerge(N, UnitSize, 0, 0); else if (ShuffleKind == 2) // swapped return isVMerge(N, UnitSize, 0, 16); else return false; } else { if (ShuffleKind == 1) // unary return isVMerge(N, UnitSize, 8, 8); else if (ShuffleKind == 0) // normal return isVMerge(N, UnitSize, 8, 24); else return false; } } /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes). /// The ShuffleKind distinguishes between big-endian merges with two /// different inputs (0), either-endian merges with two identical inputs (1), /// and little-endian merges with two different inputs (2). For the latter, /// the input operands are swapped (see PPCInstrAltivec.td). bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG) { if (DAG.getDataLayout().isLittleEndian()) { if (ShuffleKind == 1) // unary return isVMerge(N, UnitSize, 8, 8); else if (ShuffleKind == 2) // swapped return isVMerge(N, UnitSize, 8, 24); else return false; } else { if (ShuffleKind == 1) // unary return isVMerge(N, UnitSize, 0, 0); else if (ShuffleKind == 0) // normal return isVMerge(N, UnitSize, 0, 16); else return false; } } /** * \brief Common function used to match vmrgew and vmrgow shuffles * * The indexOffset determines whether to look for even or odd words in * the shuffle mask. This is based on the of the endianness of the target * machine. * - Little Endian: * - Use offset of 0 to check for odd elements * - Use offset of 4 to check for even elements * - Big Endian: * - Use offset of 0 to check for even elements * - Use offset of 4 to check for odd elements * A detailed description of the vector element ordering for little endian and * big endian can be found at * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html * Targeting your applications - what little endian and big endian IBM XL C/C++ * compiler differences mean to you * * The mask to the shuffle vector instruction specifies the indices of the * elements from the two input vectors to place in the result. The elements are * numbered in array-access order, starting with the first vector. These vectors * are always of type v16i8, thus each vector will contain 16 elements of size * 8. More info on the shuffle vector can be found in the * http://llvm.org/docs/LangRef.html#shufflevector-instruction * Language Reference. * * The RHSStartValue indicates whether the same input vectors are used (unary) * or two different input vectors are used, based on the following: * - If the instruction uses the same vector for both inputs, the range of the * indices will be 0 to 15. In this case, the RHSStart value passed should * be 0. * - If the instruction has two different vectors then the range of the * indices will be 0 to 31. In this case, the RHSStart value passed should * be 16 (indices 0-15 specify elements in the first vector while indices 16 * to 31 specify elements in the second vector). * * \param[in] N The shuffle vector SD Node to analyze * \param[in] IndexOffset Specifies whether to look for even or odd elements * \param[in] RHSStartValue Specifies the starting index for the righthand input * vector to the shuffle_vector instruction * \return true iff this shuffle vector represents an even or odd word merge */ static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset, unsigned RHSStartValue) { if (N->getValueType(0) != MVT::v16i8) return false; for (unsigned i = 0; i < 2; ++i) for (unsigned j = 0; j < 4; ++j) if (!isConstantOrUndef(N->getMaskElt(i*4+j), i*RHSStartValue+j+IndexOffset) || !isConstantOrUndef(N->getMaskElt(i*4+j+8), i*RHSStartValue+j+IndexOffset+8)) return false; return true; } /** * \brief Determine if the specified shuffle mask is suitable for the vmrgew or * vmrgow instructions. * * \param[in] N The shuffle vector SD Node to analyze * \param[in] CheckEven Check for an even merge (true) or an odd merge (false) * \param[in] ShuffleKind Identify the type of merge: * - 0 = big-endian merge with two different inputs; * - 1 = either-endian merge with two identical inputs; * - 2 = little-endian merge with two different inputs (inputs are swapped for * little-endian merges). * \param[in] DAG The current SelectionDAG * \return true iff this shuffle mask */ bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG) { if (DAG.getDataLayout().isLittleEndian()) { unsigned indexOffset = CheckEven ? 4 : 0; if (ShuffleKind == 1) // Unary return isVMerge(N, indexOffset, 0); else if (ShuffleKind == 2) // swapped return isVMerge(N, indexOffset, 16); else return false; } else { unsigned indexOffset = CheckEven ? 0 : 4; if (ShuffleKind == 1) // Unary return isVMerge(N, indexOffset, 0); else if (ShuffleKind == 0) // Normal return isVMerge(N, indexOffset, 16); else return false; } return false; } /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift /// amount, otherwise return -1. /// The ShuffleKind distinguishes between big-endian operations with two /// different inputs (0), either-endian operations with two identical inputs /// (1), and little-endian operations with two different inputs (2). For the /// latter, the input operands are swapped (see PPCInstrAltivec.td). int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG) { if (N->getValueType(0) != MVT::v16i8) return -1; ShuffleVectorSDNode *SVOp = cast(N); // Find the first non-undef value in the shuffle mask. unsigned i; for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i) /*search*/; if (i == 16) return -1; // all undef. // Otherwise, check to see if the rest of the elements are consecutively // numbered from this value. unsigned ShiftAmt = SVOp->getMaskElt(i); if (ShiftAmt < i) return -1; ShiftAmt -= i; bool isLE = DAG.getDataLayout().isLittleEndian(); if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) { // Check the rest of the elements to see if they are consecutive. for (++i; i != 16; ++i) if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) return -1; } else if (ShuffleKind == 1) { // Check the rest of the elements to see if they are consecutive. for (++i; i != 16; ++i) if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15)) return -1; } else return -1; if (isLE) ShiftAmt = 16 - ShiftAmt; return ShiftAmt; } /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a splat of a single element that is suitable for input to /// VSPLTB/VSPLTH/VSPLTW. bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { assert(N->getValueType(0) == MVT::v16i8 && (EltSize == 1 || EltSize == 2 || EltSize == 4)); // The consecutive indices need to specify an element, not part of two // different elements. So abandon ship early if this isn't the case. if (N->getMaskElt(0) % EltSize != 0) return false; // This is a splat operation if each element of the permute is the same, and // if the value doesn't reference the second vector. unsigned ElementBase = N->getMaskElt(0); // FIXME: Handle UNDEF elements too! if (ElementBase >= 16) return false; // Check that the indices are consecutive, in the case of a multi-byte element // splatted with a v16i8 mask. for (unsigned i = 1; i != EltSize; ++i) if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase)) return false; for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { if (N->getMaskElt(i) < 0) continue; for (unsigned j = 0; j != EltSize; ++j) if (N->getMaskElt(i+j) != N->getMaskElt(j)) return false; } return true; } bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE) { // Check that the mask is shuffling words for (unsigned i = 0; i < 4; ++i) { unsigned B0 = N->getMaskElt(i*4); unsigned B1 = N->getMaskElt(i*4+1); unsigned B2 = N->getMaskElt(i*4+2); unsigned B3 = N->getMaskElt(i*4+3); if (B0 % 4) return false; if (B1 != B0+1 || B2 != B1+1 || B3 != B2+1) return false; } // Now we look at mask elements 0,4,8,12 unsigned M0 = N->getMaskElt(0) / 4; unsigned M1 = N->getMaskElt(4) / 4; unsigned M2 = N->getMaskElt(8) / 4; unsigned M3 = N->getMaskElt(12) / 4; unsigned LittleEndianShifts[] = { 2, 1, 0, 3 }; unsigned BigEndianShifts[] = { 3, 0, 1, 2 }; // Below, let H and L be arbitrary elements of the shuffle mask // where H is in the range [4,7] and L is in the range [0,3]. // H, 1, 2, 3 or L, 5, 6, 7 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) || (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) { ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3]; InsertAtByte = IsLE ? 12 : 0; Swap = M0 < 4; return true; } // 0, H, 2, 3 or 4, L, 6, 7 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) || (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) { ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3]; InsertAtByte = IsLE ? 8 : 4; Swap = M1 < 4; return true; } // 0, 1, H, 3 or 4, 5, L, 7 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) || (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) { ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3]; InsertAtByte = IsLE ? 4 : 8; Swap = M2 < 4; return true; } // 0, 1, 2, H or 4, 5, 6, L if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) || (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) { ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3]; InsertAtByte = IsLE ? 0 : 12; Swap = M3 < 4; return true; } // If both vector operands for the shuffle are the same vector, the mask will // contain only elements from the first one and the second one will be undef. if (N->getOperand(1).isUndef()) { ShiftElts = 0; Swap = true; unsigned XXINSERTWSrcElem = IsLE ? 2 : 1; if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) { InsertAtByte = IsLE ? 12 : 0; return true; } if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) { InsertAtByte = IsLE ? 8 : 4; return true; } if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) { InsertAtByte = IsLE ? 4 : 8; return true; } if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) { InsertAtByte = IsLE ? 0 : 12; return true; } } return false; } /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG) { ShuffleVectorSDNode *SVOp = cast(N); assert(isSplatShuffleMask(SVOp, EltSize)); if (DAG.getDataLayout().isLittleEndian()) return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize); else return SVOp->getMaskElt(0) / EltSize; } /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed /// by using a vspltis[bhw] instruction of the specified element size, return /// the constant being splatted. The ByteSize field indicates the number of /// bytes of each element [124] -> [bhw]. SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { SDValue OpVal(nullptr, 0); // If ByteSize of the splat is bigger than the element size of the // build_vector, then we have a case where we are checking for a splat where // multiple elements of the buildvector are folded together into a single // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8). unsigned EltSize = 16/N->getNumOperands(); if (EltSize < ByteSize) { unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval. SDValue UniquedVals[4]; assert(Multiple > 1 && Multiple <= 4 && "How can this happen?"); // See if all of the elements in the buildvector agree across. for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { if (N->getOperand(i).isUndef()) continue; // If the element isn't a constant, bail fully out. if (!isa(N->getOperand(i))) return SDValue(); if (!UniquedVals[i&(Multiple-1)].getNode()) UniquedVals[i&(Multiple-1)] = N->getOperand(i); else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i)) return SDValue(); // no match. } // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains // either constant or undef values that are identical for each chunk. See // if these chunks can form into a larger vspltis*. // Check to see if all of the leading entries are either 0 or -1. If // neither, then this won't fit into the immediate field. bool LeadingZero = true; bool LeadingOnes = true; for (unsigned i = 0; i != Multiple-1; ++i) { if (!UniquedVals[i].getNode()) continue; // Must have been undefs. LeadingZero &= isNullConstant(UniquedVals[i]); LeadingOnes &= isAllOnesConstant(UniquedVals[i]); } // Finally, check the least significant entry. if (LeadingZero) { if (!UniquedVals[Multiple-1].getNode()) return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef int Val = cast(UniquedVals[Multiple-1])->getZExtValue(); if (Val < 16) // 0,0,0,4 -> vspltisw(4) return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32); } if (LeadingOnes) { if (!UniquedVals[Multiple-1].getNode()) return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef int Val =cast(UniquedVals[Multiple-1])->getSExtValue(); if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2) return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32); } return SDValue(); } // Check to see if this buildvec has a single non-undef value in its elements. for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { if (N->getOperand(i).isUndef()) continue; if (!OpVal.getNode()) OpVal = N->getOperand(i); else if (OpVal != N->getOperand(i)) return SDValue(); } if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def. unsigned ValSizeInBytes = EltSize; uint64_t Value = 0; if (ConstantSDNode *CN = dyn_cast(OpVal)) { Value = CN->getZExtValue(); } else if (ConstantFPSDNode *CN = dyn_cast(OpVal)) { assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"); Value = FloatToBits(CN->getValueAPF().convertToFloat()); } // If the splat value is larger than the element value, then we can never do // this splat. The only case that we could fit the replicated bits into our // immediate field for would be zero, and we prefer to use vxor for it. if (ValSizeInBytes < ByteSize) return SDValue(); // If the element value is larger than the splat value, check if it consists // of a repeated bit pattern of size ByteSize. if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8)) return SDValue(); // Properly sign extend the value. int MaskVal = SignExtend32(Value, ByteSize * 8); // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros. if (MaskVal == 0) return SDValue(); // Finally, if this value fits in a 5 bit sext field, return it if (SignExtend32<5>(MaskVal) == MaskVal) return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32); return SDValue(); } /// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift /// amount, otherwise return -1. int PPC::isQVALIGNIShuffleMask(SDNode *N) { EVT VT = N->getValueType(0); if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1) return -1; ShuffleVectorSDNode *SVOp = cast(N); // Find the first non-undef value in the shuffle mask. unsigned i; for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i) /*search*/; if (i == 4) return -1; // all undef. // Otherwise, check to see if the rest of the elements are consecutively // numbered from this value. unsigned ShiftAmt = SVOp->getMaskElt(i); if (ShiftAmt < i) return -1; ShiftAmt -= i; // Check the rest of the elements to see if they are consecutive. for (++i; i != 4; ++i) if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) return -1; return ShiftAmt; } //===----------------------------------------------------------------------===// // Addressing Mode Selection //===----------------------------------------------------------------------===// /// isIntS16Immediate - This method tests to see if the node is either a 32-bit /// or 64-bit immediate, and if the value can be accurately represented as a /// sign extension from a 16-bit value. If so, this returns true and the /// immediate. static bool isIntS16Immediate(SDNode *N, short &Imm) { if (!isa(N)) return false; Imm = (short)cast(N)->getZExtValue(); if (N->getValueType(0) == MVT::i32) return Imm == (int32_t)cast(N)->getZExtValue(); else return Imm == (int64_t)cast(N)->getZExtValue(); } static bool isIntS16Immediate(SDValue Op, short &Imm) { return isIntS16Immediate(Op.getNode(), Imm); } /// SelectAddressRegReg - Given the specified addressed, check to see if it /// can be represented as an indexed [r+r] operation. Returns false if it /// can be more efficiently represented with [r+imm]. bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const { short imm = 0; if (N.getOpcode() == ISD::ADD) { if (isIntS16Immediate(N.getOperand(1), imm)) return false; // r+i if (N.getOperand(1).getOpcode() == PPCISD::Lo) return false; // r+i Base = N.getOperand(0); Index = N.getOperand(1); return true; } else if (N.getOpcode() == ISD::OR) { if (isIntS16Immediate(N.getOperand(1), imm)) return false; // r+i can fold it if we can. // If this is an or of disjoint bitfields, we can codegen this as an add // (for better address arithmetic) if the LHS and RHS of the OR are provably // disjoint. APInt LHSKnownZero, LHSKnownOne; APInt RHSKnownZero, RHSKnownOne; DAG.computeKnownBits(N.getOperand(0), LHSKnownZero, LHSKnownOne); if (LHSKnownZero.getBoolValue()) { DAG.computeKnownBits(N.getOperand(1), RHSKnownZero, RHSKnownOne); // If all of the bits are known zero on the LHS or RHS, the add won't // carry. if (~(LHSKnownZero | RHSKnownZero) == 0) { Base = N.getOperand(0); Index = N.getOperand(1); return true; } } } return false; } // If we happen to be doing an i64 load or store into a stack slot that has // less than a 4-byte alignment, then the frame-index elimination may need to // use an indexed load or store instruction (because the offset may not be a // multiple of 4). The extra register needed to hold the offset comes from the // register scavenger, and it is possible that the scavenger will need to use // an emergency spill slot. As a result, we need to make sure that a spill slot // is allocated when doing an i64 load/store into a less-than-4-byte-aligned // stack slot. static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) { // FIXME: This does not handle the LWA case. if (VT != MVT::i64) return; // NOTE: We'll exclude negative FIs here, which come from argument // lowering, because there are no known test cases triggering this problem // using packed structures (or similar). We can remove this exclusion if // we find such a test case. The reason why this is so test-case driven is // because this entire 'fixup' is only to prevent crashes (from the // register scavenger) on not-really-valid inputs. For example, if we have: // %a = alloca i1 // %b = bitcast i1* %a to i64* // store i64* a, i64 b // then the store should really be marked as 'align 1', but is not. If it // were marked as 'align 1' then the indexed form would have been // instruction-selected initially, and the problem this 'fixup' is preventing // won't happen regardless. if (FrameIdx < 0) return; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); unsigned Align = MFI->getObjectAlignment(FrameIdx); if (Align >= 4) return; PPCFunctionInfo *FuncInfo = MF.getInfo(); FuncInfo->setHasNonRISpills(); } /// Returns true if the address N can be represented by a base register plus /// a signed 16-bit displacement [r+imm], and if it is not better /// represented as reg+reg. If Aligned is true, only accept displacements /// suitable for STD and friends, i.e. multiples of 4. bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, bool Aligned) const { // FIXME dl should come from parent load or store, not from address SDLoc dl(N); // If this can be more profitably realized as r+r, fail. if (SelectAddressRegReg(N, Disp, Base, DAG)) return false; if (N.getOpcode() == ISD::ADD) { short imm = 0; if (isIntS16Immediate(N.getOperand(1), imm) && (!Aligned || (imm & 3) == 0)) { Disp = DAG.getTargetConstant(imm, dl, N.getValueType()); if (FrameIndexSDNode *FI = dyn_cast(N.getOperand(0))) { Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); } else { Base = N.getOperand(0); } return true; // [r+i] } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { // Match LOAD (ADD (X, Lo(G))). assert(!cast(N.getOperand(1).getOperand(1))->getZExtValue() && "Cannot handle constant offsets yet!"); Disp = N.getOperand(1).getOperand(0); // The global address. assert(Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable); Base = N.getOperand(0); return true; // [&g+r] } } else if (N.getOpcode() == ISD::OR) { short imm = 0; if (isIntS16Immediate(N.getOperand(1), imm) && (!Aligned || (imm & 3) == 0)) { // If this is an or of disjoint bitfields, we can codegen this as an add // (for better address arithmetic) if the LHS and RHS of the OR are // provably disjoint. APInt LHSKnownZero, LHSKnownOne; DAG.computeKnownBits(N.getOperand(0), LHSKnownZero, LHSKnownOne); if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { // If all of the bits are known zero on the LHS or RHS, the add won't // carry. if (FrameIndexSDNode *FI = dyn_cast(N.getOperand(0))) { Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); } else { Base = N.getOperand(0); } Disp = DAG.getTargetConstant(imm, dl, N.getValueType()); return true; } } } else if (ConstantSDNode *CN = dyn_cast(N)) { // Loading from a constant address. // If this address fits entirely in a 16-bit sext immediate field, codegen // this as "d, 0" short Imm; if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) { Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0)); Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, CN->getValueType(0)); return true; } // Handle 32-bit sext immediates with LIS + addr mode. if ((CN->getValueType(0) == MVT::i32 || (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) && (!Aligned || (CN->getZExtValue() & 3) == 0)) { int Addr = (int)CN->getZExtValue(); // Otherwise, break this down into an LIS + disp. Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32); Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl, MVT::i32); unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0); return true; } } Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout())); if (FrameIndexSDNode *FI = dyn_cast(N)) { Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); } else Base = N; return true; // [r+0] } /// SelectAddressRegRegOnly - Given the specified addressed, force it to be /// represented as an indexed [r+r] operation. bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const { // Check to see if we can easily represent this as an [r+r] address. This // will fail if it thinks that the address is more profitably represented as // reg+imm, e.g. where imm = 0. if (SelectAddressRegReg(N, Base, Index, DAG)) return true; // If the operand is an addition, always emit this as [r+r], since this is // better (for code size, and execution, as the memop does the add for free) // than emitting an explicit add. if (N.getOpcode() == ISD::ADD) { Base = N.getOperand(0); Index = N.getOperand(1); return true; } // Otherwise, do it the hard way, using R0 as the base register. Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, N.getValueType()); Index = N; return true; } /// getPreIndexedAddressParts - returns true by value, base pointer and /// offset pointer and addressing mode by reference if the node's address /// can be legally represented as pre-indexed load / store address. bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const { if (DisablePPCPreinc) return false; bool isLoad = true; SDValue Ptr; EVT VT; unsigned Alignment; if (LoadSDNode *LD = dyn_cast(N)) { Ptr = LD->getBasePtr(); VT = LD->getMemoryVT(); Alignment = LD->getAlignment(); } else if (StoreSDNode *ST = dyn_cast(N)) { Ptr = ST->getBasePtr(); VT = ST->getMemoryVT(); Alignment = ST->getAlignment(); isLoad = false; } else return false; // PowerPC doesn't have preinc load/store instructions for vectors (except // for QPX, which does have preinc r+r forms). if (VT.isVector()) { if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) { return false; } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) { AM = ISD::PRE_INC; return true; } } if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) { // Common code will reject creating a pre-inc form if the base pointer // is a frame index, or if N is a store and the base pointer is either // the same as or a predecessor of the value being stored. Check for // those situations here, and try with swapped Base/Offset instead. bool Swap = false; if (isa(Base) || isa(Base)) Swap = true; else if (!isLoad) { SDValue Val = cast(N)->getValue(); if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode())) Swap = true; } if (Swap) std::swap(Base, Offset); AM = ISD::PRE_INC; return true; } // LDU/STU can only handle immediates that are a multiple of 4. if (VT != MVT::i64) { if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false)) return false; } else { // LDU/STU need an address with at least 4-byte alignment. if (Alignment < 4) return false; if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true)) return false; } if (LoadSDNode *LD = dyn_cast(N)) { // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of // sext i32 to i64 when addr mode is r+i. if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 && LD->getExtensionType() == ISD::SEXTLOAD && isa(Offset)) return false; } AM = ISD::PRE_INC; return true; } //===----------------------------------------------------------------------===// // LowerOperation implementation //===----------------------------------------------------------------------===// /// Return true if we should reference labels using a PICBase, set the HiOpFlags /// and LoOpFlags to the target MO flags. static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV = nullptr) { HiOpFlags = PPCII::MO_HA; LoOpFlags = PPCII::MO_LO; // Don't use the pic base if not in PIC relocation model. if (IsPIC) { HiOpFlags |= PPCII::MO_PIC_FLAG; LoOpFlags |= PPCII::MO_PIC_FLAG; } // If this is a reference to a global value that requires a non-lazy-ptr, make // sure that instruction lowering adds it. if (GV && Subtarget.hasLazyResolverStub(GV)) { HiOpFlags |= PPCII::MO_NLP_FLAG; LoOpFlags |= PPCII::MO_NLP_FLAG; if (GV->hasHiddenVisibility()) { HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; } } } static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG) { SDLoc DL(HiPart); EVT PtrVT = HiPart.getValueType(); SDValue Zero = DAG.getConstant(0, DL, PtrVT); SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero); SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero); // With PIC, the first instruction is actually "GR+hi(&G)". if (isPIC) Hi = DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi); // Generate non-pic code that has direct accesses to the constant pool. // The address of the global is just (hi(&g)+lo(&g)). return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo); } static void setUsesTOCBasePtr(MachineFunction &MF) { PPCFunctionInfo *FuncInfo = MF.getInfo(); FuncInfo->setUsesTOCBasePtr(); } static void setUsesTOCBasePtr(SelectionDAG &DAG) { setUsesTOCBasePtr(DAG.getMachineFunction()); } static SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, bool Is64Bit, SDValue GA) { EVT VT = Is64Bit ? MVT::i64 : MVT::i32; SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT); SDValue Ops[] = { GA, Reg }; return DAG.getMemIntrinsicNode( PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT, MachinePointerInfo::getGOT(DAG.getMachineFunction()), 0, false, true, false, 0); } SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); ConstantPoolSDNode *CP = cast(Op); const Constant *C = CP->getConstVal(); // 64-bit SVR4 ABI code is always position-independent. // The actual address of the GlobalValue is stored in the TOC. if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0); return getTOCEntry(DAG, SDLoc(CP), true, GA); } unsigned MOHiFlag, MOLoFlag; bool IsPIC = isPositionIndependent(); getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag); if (IsPIC && Subtarget.isSVR4ABI()) { SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), PPCII::MO_PIC_FLAG); return getTOCEntry(DAG, SDLoc(CP), false, GA); } SDValue CPIHi = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag); SDValue CPILo = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag); return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG); } SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); JumpTableSDNode *JT = cast(Op); // 64-bit SVR4 ABI code is always position-independent. // The actual address of the GlobalValue is stored in the TOC. if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); return getTOCEntry(DAG, SDLoc(JT), true, GA); } unsigned MOHiFlag, MOLoFlag; bool IsPIC = isPositionIndependent(); getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag); if (IsPIC && Subtarget.isSVR4ABI()) { SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, PPCII::MO_PIC_FLAG); return getTOCEntry(DAG, SDLoc(GA), false, GA); } SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag); SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag); return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG); } SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); BlockAddressSDNode *BASDN = cast(Op); const BlockAddress *BA = BASDN->getBlockAddress(); // 64-bit SVR4 ABI code is always position-independent. // The actual BlockAddress is stored in the TOC. if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()); return getTOCEntry(DAG, SDLoc(BASDN), true, GA); } unsigned MOHiFlag, MOLoFlag; bool IsPIC = isPositionIndependent(); getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag); SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag); SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag); return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG); } SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { // FIXME: TLS addresses currently use medium model code sequences, // which is the most useful form. Eventually support for small and // large models could be added if users need it, at the cost of // additional complexity. GlobalAddressSDNode *GA = cast(Op); if (DAG.getTarget().Options.EmulatedTLS) return LowerToTLSEmulatedModel(GA, DAG); SDLoc dl(GA); const GlobalValue *GV = GA->getGlobal(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); bool is64bit = Subtarget.isPPC64(); const Module *M = DAG.getMachineFunction().getFunction()->getParent(); PICLevel::Level picLevel = M->getPICLevel(); TLSModel::Model Model = getTargetMachine().getTLSModel(GV); if (Model == TLSModel::LocalExec) { SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_HA); SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_LO); SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2, is64bit ? MVT::i64 : MVT::i32); SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg); return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi); } if (Model == TLSModel::InitialExec) { SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLS); SDValue GOTPtr; if (is64bit) { setUsesTOCBasePtr(DAG); SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA); } else GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT); SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr); return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS); } if (Model == TLSModel::GeneralDynamic) { SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); SDValue GOTPtr; if (is64bit) { setUsesTOCBasePtr(DAG); SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT, GOTReg, TGA); } else { if (picLevel == PICLevel::SmallPIC) GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT); else GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); } return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT, GOTPtr, TGA, TGA); } if (Model == TLSModel::LocalDynamic) { SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); SDValue GOTPtr; if (is64bit) { setUsesTOCBasePtr(DAG); SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT, GOTReg, TGA); } else { if (picLevel == PICLevel::SmallPIC) GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT); else GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); } SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl, PtrVT, GOTPtr, TGA, TGA); SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT, TLSAddr, TGA); return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA); } llvm_unreachable("Unknown TLS model!"); } SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); GlobalAddressSDNode *GSDN = cast(Op); SDLoc DL(GSDN); const GlobalValue *GV = GSDN->getGlobal(); // 64-bit SVR4 ABI code is always position-independent. // The actual address of the GlobalValue is stored in the TOC. if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset()); return getTOCEntry(DAG, DL, true, GA); } unsigned MOHiFlag, MOLoFlag; bool IsPIC = isPositionIndependent(); getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV); if (IsPIC && Subtarget.isSVR4ABI()) { SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), PPCII::MO_PIC_FLAG); return getTOCEntry(DAG, DL, false, GA); } SDValue GAHi = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag); SDValue GALo = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag); SDValue Ptr = LowerLabelRef(GAHi, GALo, IsPIC, DAG); // If the global reference is actually to a non-lazy-pointer, we have to do an // extra load to get the address of the global. if (MOHiFlag & PPCII::MO_NLP_FLAG) Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); return Ptr; } SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { ISD::CondCode CC = cast(Op.getOperand(2))->get(); SDLoc dl(Op); if (Op.getValueType() == MVT::v2i64) { // When the operands themselves are v2i64 values, we need to do something // special because VSX has no underlying comparison operations for these. if (Op.getOperand(0).getValueType() == MVT::v2i64) { // Equality can be handled by casting to the legal type for Altivec // comparisons, everything else needs to be expanded. if (CC == ISD::SETEQ || CC == ISD::SETNE) { return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, DAG.getSetCC(dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)), DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)), CC)); } return SDValue(); } // We handle most of these in the usual way. return Op; } // If we're comparing for equality to zero, expose the fact that this is // implemented as a ctlz/srl pair on ppc, so that the dag combiner can // fold the new nodes. if (ConstantSDNode *C = dyn_cast(Op.getOperand(1))) { if (C->isNullValue() && CC == ISD::SETEQ) { EVT VT = Op.getOperand(0).getValueType(); SDValue Zext = Op.getOperand(0); if (VT.bitsLT(MVT::i32)) { VT = MVT::i32; Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0)); } unsigned Log2b = Log2_32(VT.getSizeInBits()); SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext); SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz, DAG.getConstant(Log2b, dl, MVT::i32)); return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc); } // Leave comparisons against 0 and -1 alone for now, since they're usually // optimized. FIXME: revisit this when we can custom lower all setcc // optimizations. if (C->isAllOnesValue() || C->isNullValue()) return SDValue(); } // If we have an integer seteq/setne, turn it into a compare against zero // by xor'ing the rhs with the lhs, which is faster than setting a // condition register, reading it back out, and masking the correct bit. The // normal approach here uses sub to do this instead of xor. Using xor exposes // the result to other bit-twiddling opportunities. EVT LHSVT = Op.getOperand(0).getValueType(); if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { EVT VT = Op.getValueType(); SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0), Op.getOperand(1)); return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC); } return SDValue(); } SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); EVT VT = Node->getValueType(0); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue InChain = Node->getOperand(0); SDValue VAListPtr = Node->getOperand(1); const Value *SV = cast(Node->getOperand(2))->getValue(); SDLoc dl(Node); assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only"); // gpr_index SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, VAListPtr, MachinePointerInfo(SV), MVT::i8); InChain = GprIndex.getValue(1); if (VT == MVT::i64) { // Check if GprIndex is even SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex, DAG.getConstant(1, dl, MVT::i32)); SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd, DAG.getConstant(0, dl, MVT::i32), ISD::SETNE); SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex, DAG.getConstant(1, dl, MVT::i32)); // Align GprIndex to be even if it isn't GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne, GprIndex); } // fpr index is 1 byte after gpr SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, DAG.getConstant(1, dl, MVT::i32)); // fpr SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, FprPtr, MachinePointerInfo(SV), MVT::i8); InChain = FprIndex.getValue(1); SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, DAG.getConstant(8, dl, MVT::i32)); SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, DAG.getConstant(4, dl, MVT::i32)); // areas SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo()); InChain = OverflowArea.getValue(1); SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo()); InChain = RegSaveArea.getValue(1); // select overflow_area if index > 8 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex, DAG.getConstant(8, dl, MVT::i32), ISD::SETLT); // adjustment constant gpr_index * 4/8 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex, DAG.getConstant(VT.isInteger() ? 4 : 8, dl, MVT::i32)); // OurReg = RegSaveArea + RegConstant SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea, RegConstant); // Floating types are 32 bytes into RegSaveArea if (VT.isFloatingPoint()) OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg, DAG.getConstant(32, dl, MVT::i32)); // increase {f,g}pr_index by 1 (or 2 if VT is i64) SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex, DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl, MVT::i32)); InChain = DAG.getTruncStore(InChain, dl, IndexPlus1, VT.isInteger() ? VAListPtr : FprPtr, MachinePointerInfo(SV), MVT::i8); // determine if we should load from reg_save_area or overflow_area SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea); // increase overflow_area by 4/8 if gpr/fpr > 8 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea, DAG.getConstant(VT.isInteger() ? 4 : 8, dl, MVT::i32)); OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea, OverflowAreaPlusN); InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr, MachinePointerInfo(), MVT::i32); return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo()); } SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only"); // We have to copy the entire va_list struct: // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2), DAG.getConstant(12, SDLoc(Op), MVT::i32), 8, false, true, false, MachinePointerInfo(), MachinePointerInfo()); } SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { return Op.getOperand(0); } SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Trmp = Op.getOperand(1); // trampoline SDValue FPtr = Op.getOperand(2); // nested function SDValue Nest = Op.getOperand(3); // 'nest' parameter value SDLoc dl(Op); EVT PtrVT = getPointerTy(DAG.getDataLayout()); bool isPPC64 = (PtrVT == MVT::i64); Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Ty = IntPtrTy; Entry.Node = Trmp; Args.push_back(Entry); // TrampSize == (isPPC64 ? 48 : 40); Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl, isPPC64 ? MVT::i64 : MVT::i32); Args.push_back(Entry); Entry.Node = FPtr; Args.push_back(Entry); Entry.Node = Nest; Args.push_back(Entry); // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(Chain) .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args)); std::pair CallResult = LowerCallTo(CLI); return CallResult.second; } SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); PPCFunctionInfo *FuncInfo = MF.getInfo(); EVT PtrVT = getPointerTy(MF.getDataLayout()); SDLoc dl(Op); if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) { // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); const Value *SV = cast(Op.getOperand(2))->getValue(); return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), MachinePointerInfo(SV)); } // For the 32-bit SVR4 ABI we follow the layout of the va_list struct. // We suppose the given va_list is already allocated. // // typedef struct { // char gpr; /* index into the array of 8 GPRs // * stored in the register save area // * gpr=0 corresponds to r3, // * gpr=1 to r4, etc. // */ // char fpr; /* index into the array of 8 FPRs // * stored in the register save area // * fpr=0 corresponds to f1, // * fpr=1 to f2, etc. // */ // char *overflow_arg_area; // /* location on stack that holds // * the next overflow argument // */ // char *reg_save_area; // /* where r3:r10 and f1:f8 (if saved) // * are stored // */ // } va_list[1]; SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32); SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32); SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(), PtrVT); SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); uint64_t FrameOffset = PtrVT.getSizeInBits()/8; SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT); uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1; SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT); uint64_t FPROffset = 1; SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT); const Value *SV = cast(Op.getOperand(2))->getValue(); // Store first byte : number of int regs SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1), MachinePointerInfo(SV), MVT::i8); uint64_t nextOffset = FPROffset; SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1), ConstFPROffset); // Store second byte : number of float regs SDValue secondStore = DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, MachinePointerInfo(SV, nextOffset), MVT::i8); nextOffset += StackOffset; nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset); // Store second word : arguments given on stack SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, MachinePointerInfo(SV, nextOffset)); nextOffset += FrameOffset; nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset); // Store third word : arguments given in registers return DAG.getStore(thirdStore, dl, FR, nextPtr, MachinePointerInfo(SV, nextOffset)); } #include "PPCGenCallingConv.inc" // Function whose sole purpose is to kill compiler warnings // stemming from unused functions included from PPCGenCallingConv.inc. CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const { return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS; } bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { return true; } bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { static const MCPhysReg ArgRegs[] = { PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, }; const unsigned NumArgRegs = array_lengthof(ArgRegs); unsigned RegNum = State.getFirstUnallocated(ArgRegs); // Skip one register if the first unallocated register has an even register // number and there are still argument registers available which have not been // allocated yet. RegNum is actually an index into ArgRegs, which means we // need to skip a register if RegNum is odd. if (RegNum != NumArgRegs && RegNum % 2 == 1) { State.AllocateReg(ArgRegs[RegNum]); } // Always return false here, as this function only makes sure that the first // unallocated register has an odd register number and does not actually // allocate a register for the current argument. return false; } bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { static const MCPhysReg ArgRegs[] = { PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, PPC::F8 }; const unsigned NumArgRegs = array_lengthof(ArgRegs); unsigned RegNum = State.getFirstUnallocated(ArgRegs); // If there is only one Floating-point register left we need to put both f64 // values of a split ppc_fp128 value on the stack. if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) { State.AllocateReg(ArgRegs[RegNum]); } // Always return false here, as this function only makes sure that the two f64 // values a ppc_fp128 value is split into are both passed in registers or both // passed on the stack and does not actually allocate a register for the // current argument. return false; } /// FPR - The set of FP registers that should be allocated for arguments, /// on Darwin. static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13}; /// QFPR - The set of QPX registers that should be allocated for arguments. static const MCPhysReg QFPR[] = { PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7, PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13}; /// CalculateStackSlotSize - Calculates the size reserved for this argument on /// the stack. static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize) { unsigned ArgSize = ArgVT.getStoreSize(); if (Flags.isByVal()) ArgSize = Flags.getByValSize(); // Round up to multiples of the pointer size, except for array members, // which are always packed. if (!Flags.isInConsecutiveRegs()) ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; return ArgSize; } /// CalculateStackSlotAlignment - Calculates the alignment of this argument /// on the stack. static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize) { unsigned Align = PtrByteSize; // Altivec parameters are padded to a 16 byte boundary. if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 || ArgVT == MVT::v1i128) Align = 16; // QPX vector types stored in double-precision are padded to a 32 byte // boundary. else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1) Align = 32; // ByVal parameters are aligned as requested. if (Flags.isByVal()) { unsigned BVAlign = Flags.getByValAlign(); if (BVAlign > PtrByteSize) { if (BVAlign % PtrByteSize != 0) llvm_unreachable( "ByVal alignment is not a multiple of the pointer size"); Align = BVAlign; } } // Array members are always packed to their original alignment. if (Flags.isInConsecutiveRegs()) { // If the array member was split into multiple registers, the first // needs to be aligned to the size of the full type. (Except for // ppcf128, which is only aligned as its f64 components.) if (Flags.isSplit() && OrigVT != MVT::ppcf128) Align = OrigVT.getStoreSize(); else Align = ArgVT.getStoreSize(); } return Align; } /// CalculateStackSlotUsed - Return whether this argument will use its /// stack slot (instead of being passed in registers). ArgOffset, /// AvailableFPRs, and AvailableVRs must hold the current argument /// position, and will be updated to account for this argument. static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs, bool HasQPX) { bool UseMemory = false; // Respect alignment of argument on the stack. unsigned Align = CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; // If there's no space left in the argument save area, we must // use memory (this check also catches zero-sized arguments). if (ArgOffset >= LinkageSize + ParamAreaSize) UseMemory = true; // Allocate argument on the stack. ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); if (Flags.isInConsecutiveRegsLast()) ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; // If we overran the argument save area, we must use memory // (this check catches arguments passed partially in memory) if (ArgOffset > LinkageSize + ParamAreaSize) UseMemory = true; // However, if the argument is actually passed in an FPR or a VR, // we don't use memory after all. if (!Flags.isByVal()) { if (ArgVT == MVT::f32 || ArgVT == MVT::f64 || // QPX registers overlap with the scalar FP registers. (HasQPX && (ArgVT == MVT::v4f32 || ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1))) if (AvailableFPRs > 0) { --AvailableFPRs; return false; } if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 || ArgVT == MVT::v1i128) if (AvailableVRs > 0) { --AvailableVRs; return false; } } return UseMemory; } /// EnsureStackAlignment - Round stack frame size up from NumBytes to /// ensure minimum alignment required for target. static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes) { unsigned TargetAlign = Lowering->getStackAlignment(); unsigned AlignMask = TargetAlign - 1; NumBytes = (NumBytes + AlignMask) & ~AlignMask; return NumBytes; } SDValue PPCTargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { if (Subtarget.isSVR4ABI()) { if (Subtarget.isPPC64()) return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals); else return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals); } else { return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals); } } SDValue PPCTargetLowering::LowerFormalArguments_32SVR4( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { // 32-bit SVR4 ABI Stack Frame Layout: // +-----------------------------------+ // +--> | Back chain | // | +-----------------------------------+ // | | Floating-point register save area | // | +-----------------------------------+ // | | General register save area | // | +-----------------------------------+ // | | CR save word | // | +-----------------------------------+ // | | VRSAVE save word | // | +-----------------------------------+ // | | Alignment padding | // | +-----------------------------------+ // | | Vector register save area | // | +-----------------------------------+ // | | Local variable space | // | +-----------------------------------+ // | | Parameter list area | // | +-----------------------------------+ // | | LR save word | // | +-----------------------------------+ // SP--> +--- | Back chain | // +-----------------------------------+ // // Specifications: // System V Application Binary Interface PowerPC Processor Supplement // AltiVec Technology Programming Interface Manual MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); PPCFunctionInfo *FuncInfo = MF.getInfo(); EVT PtrVT = getPointerTy(MF.getDataLayout()); // Potential tail calls could cause overwriting of argument stack slots. bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && (CallConv == CallingConv::Fast)); unsigned PtrByteSize = 4; // Assign locations to all of the incoming arguments. SmallVector ArgLocs; PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext()); // Reserve space for the linkage area on the stack. unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); CCInfo.AllocateStack(LinkageSize, PtrByteSize); if (useSoftFloat()) CCInfo.PreAnalyzeFormalArguments(Ins); CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4); CCInfo.clearWasPPCF128(); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; // Arguments stored in registers. if (VA.isRegLoc()) { const TargetRegisterClass *RC; EVT ValVT = VA.getValVT(); switch (ValVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("ValVT not supported by formal arguments Lowering"); case MVT::i1: case MVT::i32: RC = &PPC::GPRCRegClass; break; case MVT::f32: if (Subtarget.hasP8Vector()) RC = &PPC::VSSRCRegClass; else RC = &PPC::F4RCRegClass; break; case MVT::f64: if (Subtarget.hasVSX()) RC = &PPC::VSFRCRegClass; else RC = &PPC::F8RCRegClass; break; case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: RC = &PPC::VRRCRegClass; break; case MVT::v4f32: RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass; break; case MVT::v2f64: case MVT::v2i64: RC = &PPC::VSHRCRegClass; break; case MVT::v4f64: RC = &PPC::QFRCRegClass; break; case MVT::v4i1: RC = &PPC::QBRCRegClass; break; } // Transform the arguments stored in physical registers into virtual ones. unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ValVT == MVT::i1 ? MVT::i32 : ValVT); if (ValVT == MVT::i1) ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue); InVals.push_back(ArgValue); } else { // Argument stored in memory. assert(VA.isMemLoc()); unsigned ArgSize = VA.getLocVT().getStoreSize(); int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), isImmutable); // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, PtrVT); InVals.push_back( DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo())); } } // Assign locations to all of the incoming aggregate by value arguments. // Aggregates passed by value are stored in the local variable space of the // caller's stack frame, right above the parameter list area. SmallVector ByValArgLocs; CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), ByValArgLocs, *DAG.getContext()); // Reserve stack space for the allocations in CCInfo. CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal); // Area that is at least reserved in the caller of this function. unsigned MinReservedArea = CCByValInfo.getNextStackOffset(); MinReservedArea = std::max(MinReservedArea, LinkageSize); // Set the size that is at least reserved in caller of this function. Tail // call optimized function's reserved stack space needs to be aligned so that // taking the difference between two stack areas will result in an aligned // stack. MinReservedArea = EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea); FuncInfo->setMinReservedArea(MinReservedArea); SmallVector MemOps; // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. if (isVarArg) { static const MCPhysReg GPArgRegs[] = { PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, }; const unsigned NumGPArgRegs = array_lengthof(GPArgRegs); static const MCPhysReg FPArgRegs[] = { PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, PPC::F8 }; unsigned NumFPArgRegs = array_lengthof(FPArgRegs); if (useSoftFloat()) NumFPArgRegs = 0; FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs)); FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs)); // Make room for NumGPArgRegs and NumFPArgRegs. int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 + NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8; FuncInfo->setVarArgsStackOffset( MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, CCInfo.getNextStackOffset(), true)); FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false)); SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); // The fixed integer arguments of a variadic function are stored to the // VarArgsFrameIndex on the stack so that they may be loaded by // dereferencing the result of va_next. for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) { // Get an existing live-in vreg, or add a new one. unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]); if (!VReg) VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); MemOps.push_back(Store); // Increment the address by four for the next argument to store SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT); FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); } // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6 // is set. // The double arguments are stored to the VarArgsFrameIndex // on the stack. for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) { // Get an existing live-in vreg, or add a new one. unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]); if (!VReg) VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); MemOps.push_back(Store); // Increment the address by eight for the next argument to store SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl, PtrVT); FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); } } if (!MemOps.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); return Chain; } // PPC64 passes i8, i16, and i32 values in i64 registers. Promote // value to MVT::i64 and then truncate to the correct register size. SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, SelectionDAG &DAG, SDValue ArgVal, const SDLoc &dl) const { if (Flags.isSExt()) ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal, DAG.getValueType(ObjectVT)); else if (Flags.isZExt()) ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal, DAG.getValueType(ObjectVT)); return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal); } SDValue PPCTargetLowering::LowerFormalArguments_64SVR4( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { // TODO: add description of PPC stack frame format, or at least some docs. // bool isELFv2ABI = Subtarget.isELFv2ABI(); bool isLittleEndian = Subtarget.isLittleEndian(); MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); PPCFunctionInfo *FuncInfo = MF.getInfo(); assert(!(CallConv == CallingConv::Fast && isVarArg) && "fastcc not supported on varargs functions"); EVT PtrVT = getPointerTy(MF.getDataLayout()); // Potential tail calls could cause overwriting of argument stack slots. bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && (CallConv == CallingConv::Fast)); unsigned PtrByteSize = 8; unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); static const MCPhysReg GPR[] = { PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; static const MCPhysReg VSRH[] = { PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8, PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 }; const unsigned Num_GPR_Regs = array_lengthof(GPR); const unsigned Num_FPR_Regs = 13; const unsigned Num_VR_Regs = array_lengthof(VR); const unsigned Num_QFPR_Regs = Num_FPR_Regs; // Do a first pass over the arguments to determine whether the ABI // guarantees that our caller has allocated the parameter save area // on its stack frame. In the ELFv1 ABI, this is always the case; // in the ELFv2 ABI, it is true if this is a vararg function or if // any parameter is located in a stack slot. bool HasParameterArea = !isELFv2ABI || isVarArg; unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize; unsigned NumBytes = LinkageSize; unsigned AvailableFPRs = Num_FPR_Regs; unsigned AvailableVRs = Num_VR_Regs; for (unsigned i = 0, e = Ins.size(); i != e; ++i) { if (Ins[i].Flags.isNest()) continue; if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags, PtrByteSize, LinkageSize, ParamAreaSize, NumBytes, AvailableFPRs, AvailableVRs, Subtarget.hasQPX())) HasParameterArea = true; } // Add DAG nodes to load the arguments or copy them out of registers. On // entry to a function on PPC, the arguments start after the linkage area, // although the first ones are often in registers. unsigned ArgOffset = LinkageSize; unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; unsigned &QFPR_idx = FPR_idx; SmallVector MemOps; Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); unsigned CurArgIdx = 0; for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { SDValue ArgVal; bool needsLoad = false; EVT ObjectVT = Ins[ArgNo].VT; EVT OrigVT = Ins[ArgNo].ArgVT; unsigned ObjSize = ObjectVT.getStoreSize(); unsigned ArgSize = ObjSize; ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; if (Ins[ArgNo].isOrigArg()) { std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx); CurArgIdx = Ins[ArgNo].getOrigArgIndex(); } // We re-align the argument offset for each argument, except when using the // fast calling convention, when we need to make sure we do that only when // we'll actually use a stack slot. unsigned CurArgOffset, Align; auto ComputeArgOffset = [&]() { /* Respect alignment of argument on the stack. */ Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize); ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; CurArgOffset = ArgOffset; }; if (CallConv != CallingConv::Fast) { ComputeArgOffset(); /* Compute GPR index associated with argument offset. */ GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; GPR_idx = std::min(GPR_idx, Num_GPR_Regs); } // FIXME the codegen can be much improved in some cases. // We do not have to keep everything in memory. if (Flags.isByVal()) { assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"); if (CallConv == CallingConv::Fast) ComputeArgOffset(); // ObjSize is the true size, ArgSize rounded up to multiple of registers. ObjSize = Flags.getByValSize(); ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; // Empty aggregate parameters do not take up registers. Examples: // struct { } a; // union { } b; // int c[0]; // etc. However, we have to provide a place-holder in InVals, so // pretend we have an 8-byte item at the current address for that // purpose. if (!ObjSize) { int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); InVals.push_back(FIN); continue; } // Create a stack object covering all stack doublewords occupied // by the argument. If the argument is (fully or partially) on // the stack, or if the argument is fully in registers but the // caller has allocated the parameter save anyway, we can refer // directly to the caller's stack frame. Otherwise, create a // local copy in our own frame. int FI; if (HasParameterArea || ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize) FI = MFI->CreateFixedObject(ArgSize, ArgOffset, false, true); else FI = MFI->CreateStackObject(ArgSize, Align, false); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); // Handle aggregates smaller than 8 bytes. if (ObjSize < PtrByteSize) { // The value of the object is its address, which differs from the // address of the enclosing doubleword on big-endian systems. SDValue Arg = FIN; if (!isLittleEndian) { SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT); Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff); } InVals.push_back(Arg); if (GPR_idx != Num_GPR_Regs) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store; if (ObjSize==1 || ObjSize==2 || ObjSize==4) { EVT ObjType = (ObjSize == 1 ? MVT::i8 : (ObjSize == 2 ? MVT::i16 : MVT::i32)); Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg, MachinePointerInfo(&*FuncArg), ObjType); } else { // For sizes that don't fit a truncating store (3, 5, 6, 7), // store the whole register as-is to the parameter save area // slot. Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo(&*FuncArg)); } MemOps.push_back(Store); } // Whether we copied from a register or not, advance the offset // into the parameter save area by a full doubleword. ArgOffset += PtrByteSize; continue; } // The value of the object is its address, which is the address of // its first stack doubleword. InVals.push_back(FIN); // Store whatever pieces of the object are in registers to memory. for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { if (GPR_idx == Num_GPR_Regs) break; unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Addr = FIN; if (j) { SDValue Off = DAG.getConstant(j, dl, PtrVT); Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off); } SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr, MachinePointerInfo(&*FuncArg, j)); MemOps.push_back(Store); ++GPR_idx; } ArgOffset += ArgSize; continue; } switch (ObjectVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unhandled argument type!"); case MVT::i1: case MVT::i32: case MVT::i64: if (Flags.isNest()) { // The 'nest' parameter, if any, is passed in R11. unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); break; } // These can be scalar arguments or elements of an integer array type // passed directly. Clang may use those instead of "byval" aggregate // types to avoid forcing arguments to memory unnecessarily. if (GPR_idx != Num_GPR_Regs) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) // PPC64 passes i8, i16, and i32 values in i64 registers. Promote // value to MVT::i64 and then truncate to the correct register size. ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); } else { if (CallConv == CallingConv::Fast) ComputeArgOffset(); needsLoad = true; ArgSize = PtrByteSize; } if (CallConv != CallingConv::Fast || needsLoad) ArgOffset += 8; break; case MVT::f32: case MVT::f64: // These can be scalar arguments or elements of a float array type // passed directly. The latter are used to implement ELFv2 homogenous // float aggregates. if (FPR_idx != Num_FPR_Regs) { unsigned VReg; if (ObjectVT == MVT::f32) VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasP8Vector() ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass); else VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX() ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); ++FPR_idx; } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) { // FIXME: We may want to re-enable this for CallingConv::Fast on the P8 // once we support fp <-> gpr moves. // This can only ever happen in the presence of f32 array types, // since otherwise we never run out of FPRs before running out // of GPRs. unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); if (ObjectVT == MVT::f32) { if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0)) ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal, DAG.getConstant(32, dl, MVT::i32)); ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal); } ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal); } else { if (CallConv == CallingConv::Fast) ComputeArgOffset(); needsLoad = true; } // When passing an array of floats, the array occupies consecutive // space in the argument area; only round up to the next doubleword // at the end of the array. Otherwise, each float takes 8 bytes. if (CallConv != CallingConv::Fast || needsLoad) { ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize; ArgOffset += ArgSize; if (Flags.isInConsecutiveRegsLast()) ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; } break; case MVT::v4f32: case MVT::v4i32: case MVT::v8i16: case MVT::v16i8: case MVT::v2f64: case MVT::v2i64: case MVT::v1i128: if (!Subtarget.hasQPX()) { // These can be scalar arguments or elements of a vector array type // passed directly. The latter are used to implement ELFv2 homogenous // vector aggregates. if (VR_idx != Num_VR_Regs) { unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ? MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) : MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); ++VR_idx; } else { if (CallConv == CallingConv::Fast) ComputeArgOffset(); needsLoad = true; } if (CallConv != CallingConv::Fast || needsLoad) ArgOffset += 16; break; } // not QPX assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 && "Invalid QPX parameter type"); /* fall through */ case MVT::v4f64: case MVT::v4i1: // QPX vectors are treated like their scalar floating-point subregisters // (except that they're larger). unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32; if (QFPR_idx != Num_QFPR_Regs) { const TargetRegisterClass *RC; switch (ObjectVT.getSimpleVT().SimpleTy) { case MVT::v4f64: RC = &PPC::QFRCRegClass; break; case MVT::v4f32: RC = &PPC::QSRCRegClass; break; default: RC = &PPC::QBRCRegClass; break; } unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); ++QFPR_idx; } else { if (CallConv == CallingConv::Fast) ComputeArgOffset(); needsLoad = true; } if (CallConv != CallingConv::Fast || needsLoad) ArgOffset += Sz; break; } // We need to load the argument to a virtual register if we determined // above that we ran out of physical registers of the appropriate type. if (needsLoad) { if (ObjSize < ArgSize && !isLittleEndian) CurArgOffset += ArgSize - ObjSize; int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, isImmutable); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo()); } InVals.push_back(ArgVal); } // Area that is at least reserved in the caller of this function. unsigned MinReservedArea; if (HasParameterArea) MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize); else MinReservedArea = LinkageSize; // Set the size that is at least reserved in caller of this function. Tail // call optimized functions' reserved stack space needs to be aligned so that // taking the difference between two stack areas will result in an aligned // stack. MinReservedArea = EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea); FuncInfo->setMinReservedArea(MinReservedArea); // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. if (isVarArg) { int Depth = ArgOffset; FuncInfo->setVarArgsFrameIndex( MFI->CreateFixedObject(PtrByteSize, Depth, true)); SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); // If this function is vararg, store any remaining integer argument regs // to their spots on the stack so that they may be loaded by dereferencing // the result of va_next. for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; GPR_idx < Num_GPR_Regs; ++GPR_idx) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); MemOps.push_back(Store); // Increment the address by four for the next argument to store SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT); FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); } } if (!MemOps.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); return Chain; } SDValue PPCTargetLowering::LowerFormalArguments_Darwin( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { // TODO: add description of PPC stack frame format, or at least some docs. // MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); PPCFunctionInfo *FuncInfo = MF.getInfo(); EVT PtrVT = getPointerTy(MF.getDataLayout()); bool isPPC64 = PtrVT == MVT::i64; // Potential tail calls could cause overwriting of argument stack slots. bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && (CallConv == CallingConv::Fast)); unsigned PtrByteSize = isPPC64 ? 8 : 4; unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); unsigned ArgOffset = LinkageSize; // Area that is at least reserved in caller of this function. unsigned MinReservedArea = ArgOffset; static const MCPhysReg GPR_32[] = { // 32-bit registers. PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, }; static const MCPhysReg GPR_64[] = { // 64-bit registers. PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; const unsigned Num_GPR_Regs = array_lengthof(GPR_32); const unsigned Num_FPR_Regs = 13; const unsigned Num_VR_Regs = array_lengthof( VR); unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32; // In 32-bit non-varargs functions, the stack space for vectors is after the // stack space for non-vectors. We do not use this space unless we have // too many vectors to fit in registers, something that only occurs in // constructed examples:), but we have to walk the arglist to figure // that out...for the pathological case, compute VecArgOffset as the // start of the vector parameter area. Computing VecArgOffset is the // entire point of the following loop. unsigned VecArgOffset = ArgOffset; if (!isVarArg && !isPPC64) { for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { EVT ObjectVT = Ins[ArgNo].VT; ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; if (Flags.isByVal()) { // ObjSize is the true size, ArgSize rounded up to multiple of regs. unsigned ObjSize = Flags.getByValSize(); unsigned ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; VecArgOffset += ArgSize; continue; } switch(ObjectVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unhandled argument type!"); case MVT::i1: case MVT::i32: case MVT::f32: VecArgOffset += 4; break; case MVT::i64: // PPC64 case MVT::f64: // FIXME: We are guaranteed to be !isPPC64 at this point. // Does MVT::i64 apply? VecArgOffset += 8; break; case MVT::v4f32: case MVT::v4i32: case MVT::v8i16: case MVT::v16i8: // Nothing to do, we're only looking at Nonvector args here. break; } } } // We've found where the vector parameter area in memory is. Skip the // first 12 parameters; these don't use that memory. VecArgOffset = ((VecArgOffset+15)/16)*16; VecArgOffset += 12*16; // Add DAG nodes to load the arguments or copy them out of registers. On // entry to a function on PPC, the arguments start after the linkage area, // although the first ones are often in registers. SmallVector MemOps; unsigned nAltivecParamsAtEnd = 0; Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); unsigned CurArgIdx = 0; for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { SDValue ArgVal; bool needsLoad = false; EVT ObjectVT = Ins[ArgNo].VT; unsigned ObjSize = ObjectVT.getSizeInBits()/8; unsigned ArgSize = ObjSize; ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; if (Ins[ArgNo].isOrigArg()) { std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx); CurArgIdx = Ins[ArgNo].getOrigArgIndex(); } unsigned CurArgOffset = ArgOffset; // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary. if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 || ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) { if (isVarArg || isPPC64) { MinReservedArea = ((MinReservedArea+15)/16)*16; MinReservedArea += CalculateStackSlotSize(ObjectVT, Flags, PtrByteSize); } else nAltivecParamsAtEnd++; } else // Calculate min reserved area. MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT, Flags, PtrByteSize); // FIXME the codegen can be much improved in some cases. // We do not have to keep everything in memory. if (Flags.isByVal()) { assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"); // ObjSize is the true size, ArgSize rounded up to multiple of registers. ObjSize = Flags.getByValSize(); ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; // Objects of size 1 and 2 are right justified, everything else is // left justified. This means the memory address is adjusted forwards. if (ObjSize==1 || ObjSize==2) { CurArgOffset = CurArgOffset + (4 - ObjSize); } // The value of the object is its address. int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, false, true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); InVals.push_back(FIN); if (ObjSize==1 || ObjSize==2) { if (GPR_idx != Num_GPR_Regs) { unsigned VReg; if (isPPC64) VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); else VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16; SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo(&*FuncArg), ObjType); MemOps.push_back(Store); ++GPR_idx; } ArgOffset += PtrByteSize; continue; } for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { // Store whatever pieces of the object are in registers // to memory. ArgOffset will be the address of the beginning // of the object. if (GPR_idx != Num_GPR_Regs) { unsigned VReg; if (isPPC64) VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); else VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo(&*FuncArg, j)); MemOps.push_back(Store); ++GPR_idx; ArgOffset += PtrByteSize; } else { ArgOffset += ArgSize - (ArgOffset-CurArgOffset); break; } } continue; } switch (ObjectVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unhandled argument type!"); case MVT::i1: case MVT::i32: if (!isPPC64) { if (GPR_idx != Num_GPR_Regs) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); if (ObjectVT == MVT::i1) ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal); ++GPR_idx; } else { needsLoad = true; ArgSize = PtrByteSize; } // All int arguments reserve stack space in the Darwin ABI. ArgOffset += PtrByteSize; break; } // FALLTHROUGH case MVT::i64: // PPC64 if (GPR_idx != Num_GPR_Regs) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) // PPC64 passes i8, i16, and i32 values in i64 registers. Promote // value to MVT::i64 and then truncate to the correct register size. ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); ++GPR_idx; } else { needsLoad = true; ArgSize = PtrByteSize; } // All int arguments reserve stack space in the Darwin ABI. ArgOffset += 8; break; case MVT::f32: case MVT::f64: // Every 4 bytes of argument space consumes one of the GPRs available for // argument passing. if (GPR_idx != Num_GPR_Regs) { ++GPR_idx; if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64) ++GPR_idx; } if (FPR_idx != Num_FPR_Regs) { unsigned VReg; if (ObjectVT == MVT::f32) VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); else VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); ++FPR_idx; } else { needsLoad = true; } // All FP arguments reserve stack space in the Darwin ABI. ArgOffset += isPPC64 ? 8 : ObjSize; break; case MVT::v4f32: case MVT::v4i32: case MVT::v8i16: case MVT::v16i8: // Note that vector arguments in registers don't reserve stack space, // except in varargs functions. if (VR_idx != Num_VR_Regs) { unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); if (isVarArg) { while ((ArgOffset % 16) != 0) { ArgOffset += PtrByteSize; if (GPR_idx != Num_GPR_Regs) GPR_idx++; } ArgOffset += 16; GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64? } ++VR_idx; } else { if (!isVarArg && !isPPC64) { // Vectors go after all the nonvectors. CurArgOffset = VecArgOffset; VecArgOffset += 16; } else { // Vectors are aligned. ArgOffset = ((ArgOffset+15)/16)*16; CurArgOffset = ArgOffset; ArgOffset += 16; } needsLoad = true; } break; } // We need to load the argument to a virtual register if we determined above // that we ran out of physical registers of the appropriate type. if (needsLoad) { int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset + (ArgSize - ObjSize), isImmutable); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo()); } InVals.push_back(ArgVal); } // Allow for Altivec parameters at the end, if needed. if (nAltivecParamsAtEnd) { MinReservedArea = ((MinReservedArea+15)/16)*16; MinReservedArea += 16*nAltivecParamsAtEnd; } // Area that is at least reserved in the caller of this function. MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize); // Set the size that is at least reserved in caller of this function. Tail // call optimized functions' reserved stack space needs to be aligned so that // taking the difference between two stack areas will result in an aligned // stack. MinReservedArea = EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea); FuncInfo->setMinReservedArea(MinReservedArea); // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. if (isVarArg) { int Depth = ArgOffset; FuncInfo->setVarArgsFrameIndex( MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, Depth, true)); SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); // If this function is vararg, store any remaining integer argument regs // to their spots on the stack so that they may be loaded by dereferencing // the result of va_next. for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { unsigned VReg; if (isPPC64) VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); else VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); MemOps.push_back(Store); // Increment the address by four for the next argument to store SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT); FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); } } if (!MemOps.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); return Chain; } /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be /// adjusted to accommodate the arguments for the tailcall. static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall, unsigned ParamSize) { if (!isTailCall) return 0; PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo(); unsigned CallerMinReservedArea = FI->getMinReservedArea(); int SPDiff = (int)CallerMinReservedArea - (int)ParamSize; // Remember only if the new adjustement is bigger. if (SPDiff < FI->getTailCallSPDelta()) FI->setTailCallSPDelta(SPDiff); return SPDiff; } static bool isFunctionGlobalAddress(SDValue Callee); static bool resideInSameModule(SDValue Callee, Reloc::Model RelMod) { // If !G, Callee can be an external symbol. GlobalAddressSDNode *G = dyn_cast(Callee); if (!G) return false; const GlobalValue *GV = G->getGlobal(); if (GV->isDeclaration()) return false; switch(GV->getLinkage()) { default: llvm_unreachable("unknow linkage type"); case GlobalValue::AvailableExternallyLinkage: case GlobalValue::ExternalWeakLinkage: return false; // Callee with weak linkage is allowed if it has hidden or protected // visibility case GlobalValue::LinkOnceAnyLinkage: case GlobalValue::LinkOnceODRLinkage: // e.g. c++ inline functions case GlobalValue::WeakAnyLinkage: case GlobalValue::WeakODRLinkage: // e.g. c++ template instantiation if (GV->hasDefaultVisibility()) return false; case GlobalValue::ExternalLinkage: case GlobalValue::InternalLinkage: case GlobalValue::PrivateLinkage: break; } // With '-fPIC', calling default visiblity function need insert 'nop' after // function call, no matter that function resides in same module or not, so // we treat it as in different module. if (RelMod == Reloc::PIC_ && GV->hasDefaultVisibility()) return false; return true; } static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl &Outs) { assert(Subtarget.isSVR4ABI() && Subtarget.isPPC64()); const unsigned PtrByteSize = 8; const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); static const MCPhysReg GPR[] = { PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; const unsigned NumGPRs = array_lengthof(GPR); const unsigned NumFPRs = 13; const unsigned NumVRs = array_lengthof(VR); const unsigned ParamAreaSize = NumGPRs * PtrByteSize; unsigned NumBytes = LinkageSize; unsigned AvailableFPRs = NumFPRs; unsigned AvailableVRs = NumVRs; for (const ISD::OutputArg& Param : Outs) { if (Param.Flags.isNest()) continue; if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize, LinkageSize, ParamAreaSize, NumBytes, AvailableFPRs, AvailableVRs, Subtarget.hasQPX())) return true; } return false; } static bool hasSameArgumentList(const Function *CallerFn, ImmutableCallSite *CS) { if (CS->arg_size() != CallerFn->getArgumentList().size()) return false; ImmutableCallSite::arg_iterator CalleeArgIter = CS->arg_begin(); ImmutableCallSite::arg_iterator CalleeArgEnd = CS->arg_end(); Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin(); for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) { const Value* CalleeArg = *CalleeArgIter; const Value* CallerArg = &(*CallerArgIter); if (CalleeArg == CallerArg) continue; // e.g. @caller([4 x i64] %a, [4 x i64] %b) { // tail call @callee([4 x i64] undef, [4 x i64] %b) // } // 1st argument of callee is undef and has the same type as caller. if (CalleeArg->getType() == CallerArg->getType() && isa(CalleeArg)) continue; return false; } return true; } bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4( SDValue Callee, CallingConv::ID CalleeCC, ImmutableCallSite *CS, bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &Ins, SelectionDAG& DAG) const { bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt; if (DisableSCO && !TailCallOpt) return false; // Variadic argument functions are not supported. if (isVarArg) return false; MachineFunction &MF = DAG.getMachineFunction(); CallingConv::ID CallerCC = MF.getFunction()->getCallingConv(); // Tail or Sibling call optimization (TCO/SCO) needs callee and caller has // the same calling convention if (CallerCC != CalleeCC) return false; // SCO support C calling convention if (CalleeCC != CallingConv::Fast && CalleeCC != CallingConv::C) return false; // Caller contains any byval parameter is not supported. if (std::any_of(Ins.begin(), Ins.end(), [](const ISD::InputArg& IA) { return IA.Flags.isByVal(); })) return false; // Callee contains any byval parameter is not supported, too. // Note: This is a quick work around, because in some cases, e.g. // caller's stack size > callee's stack size, we are still able to apply // sibling call optimization. See: https://reviews.llvm.org/D23441#513574 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); })) return false; // No TCO/SCO on indirect call because Caller have to restore its TOC if (!isFunctionGlobalAddress(Callee) && !isa(Callee)) return false; // Check if Callee resides in the same module, because for now, PPC64 SVR4 ABI // (ELFv1/ELFv2) doesn't allow tail calls to a symbol resides in another // module. // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977 if (!resideInSameModule(Callee, getTargetMachine().getRelocationModel())) return false; // TCO allows altering callee ABI, so we don't have to check further. if (CalleeCC == CallingConv::Fast && TailCallOpt) return true; if (DisableSCO) return false; // If callee use the same argument list that caller is using, then we can // apply SCO on this case. If it is not, then we need to check if callee needs // stack for passing arguments. if (!hasSameArgumentList(MF.getFunction(), CS) && needStackSlotPassParameters(Subtarget, Outs)) { return false; } return true; } /// IsEligibleForTailCallOptimization - Check whether the call is eligible /// for tail call optimization. Targets which want to do tail call /// optimization should implement this function. bool PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, const SmallVectorImpl &Ins, SelectionDAG& DAG) const { if (!getTargetMachine().Options.GuaranteedTailCallOpt) return false; // Variable argument functions are not supported. if (isVarArg) return false; MachineFunction &MF = DAG.getMachineFunction(); CallingConv::ID CallerCC = MF.getFunction()->getCallingConv(); if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { // Functions containing by val parameters are not supported. for (unsigned i = 0; i != Ins.size(); i++) { ISD::ArgFlagsTy Flags = Ins[i].Flags; if (Flags.isByVal()) return false; } // Non-PIC/GOT tail calls are supported. if (getTargetMachine().getRelocationModel() != Reloc::PIC_) return true; // At the moment we can only do local tail calls (in same module, hidden // or protected) if we are generating PIC. if (GlobalAddressSDNode *G = dyn_cast(Callee)) return G->getGlobal()->hasHiddenVisibility() || G->getGlobal()->hasProtectedVisibility(); } return false; } /// isCallCompatibleAddress - Return the immediate to use if the specified /// 32-bit value is representable in the immediate field of a BxA instruction. static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) { ConstantSDNode *C = dyn_cast(Op); if (!C) return nullptr; int Addr = C->getZExtValue(); if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. SignExtend32<26>(Addr) != Addr) return nullptr; // Top 6 bits have to be sext of immediate. return DAG .getConstant( (int)C->getZExtValue() >> 2, SDLoc(Op), DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout())) .getNode(); } namespace { struct TailCallArgumentInfo { SDValue Arg; SDValue FrameIdxOp; int FrameIdx; TailCallArgumentInfo() : FrameIdx(0) {} }; } /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot. static void StoreTailCallArgumentsToStackSlot( SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl &TailCallArgs, SmallVectorImpl &MemOpChains, const SDLoc &dl) { for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) { SDValue Arg = TailCallArgs[i].Arg; SDValue FIN = TailCallArgs[i].FrameIdxOp; int FI = TailCallArgs[i].FrameIdx; // Store relative to framepointer. MemOpChains.push_back(DAG.getStore( Chain, dl, Arg, FIN, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI))); } } /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to /// the appropriate stack slot for the tail call optimized function call. static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl) { if (SPDiff) { // Calculate the new stack slot for the return address. MachineFunction &MF = DAG.getMachineFunction(); const PPCSubtarget &Subtarget = MF.getSubtarget(); const PPCFrameLowering *FL = Subtarget.getFrameLowering(); bool isPPC64 = Subtarget.isPPC64(); int SlotSize = isPPC64 ? 8 : 4; int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset(); int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewRetAddrLoc, true); EVT VT = isPPC64 ? MVT::i64 : MVT::i32; SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx, MachinePointerInfo::getFixedStack(MF, NewRetAddr)); // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack // slot as the FP is never overwritten. if (Subtarget.isDarwinABI()) { int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset(); int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc, true); SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx, MachinePointerInfo::getFixedStack( DAG.getMachineFunction(), NewFPIdx)); } } return Chain; } /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate /// the position of the argument. static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl& TailCallArguments) { int Offset = ArgOffset + SPDiff; uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8; int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); EVT VT = isPPC64 ? MVT::i64 : MVT::i32; SDValue FIN = DAG.getFrameIndex(FI, VT); TailCallArgumentInfo Info; Info.Arg = Arg; Info.FrameIdxOp = FIN; Info.FrameIdx = FI; TailCallArguments.push_back(Info); } /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address /// stack slot. Returns the chain as result and the loaded frame pointers in /// LROpOut/FPOpout. Used when tail calling. SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr( SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut, SDValue &FPOpOut, const SDLoc &dl) const { if (SPDiff) { // Load the LR and FP stack slot for later adjusting. EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32; LROpOut = getReturnAddrFrameIndex(DAG); LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo()); Chain = SDValue(LROpOut.getNode(), 1); // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack // slot as the FP is never overwritten. if (Subtarget.isDarwinABI()) { FPOpOut = getFramePointerFrameIndex(DAG); FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo()); Chain = SDValue(FPOpOut.getNode(), 1); } } return Chain; } /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified /// by "Src" to address "Dst" of size "Size". Alignment information is /// specified by the specific parameter attribute. The copy will be passed as /// a byval function parameter. /// Sometimes what we are copying is the end of a larger object, the part that /// does not fit in registers. static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl) { SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32); return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), false, false, false, MachinePointerInfo(), MachinePointerInfo()); } /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of /// tail calls. static void LowerMemOpCallTo( SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl &MemOpChains, SmallVectorImpl &TailCallArguments, const SDLoc &dl) { EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); if (!isTailCall) { if (isVector) { SDValue StackPtr; if (isPPC64) StackPtr = DAG.getRegister(PPC::X1, MVT::i64); else StackPtr = DAG.getRegister(PPC::R1, MVT::i32); PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, DAG.getConstant(ArgOffset, dl, PtrVT)); } MemOpChains.push_back( DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo())); // Calculate and remember argument location. } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset, TailCallArguments); } static void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl &TailCallArguments) { // Emit a sequence of copyto/copyfrom virtual registers for arguments that // might overwrite each other in case of tail call optimization. SmallVector MemOpChains2; // Do not flag preceding copytoreg stuff together with the following stuff. InFlag = SDValue(); StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments, MemOpChains2, dl); if (!MemOpChains2.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2); // Store the return address to the appropriate stack slot. Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl); // Emit callseq_end just before tailcall node. Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), DAG.getIntPtrConstant(0, dl, true), InFlag, dl); InFlag = Chain.getValue(1); } // Is this global address that of a function that can be called by name? (as // opposed to something that must hold a descriptor for an indirect call). static bool isFunctionGlobalAddress(SDValue Callee) { if (GlobalAddressSDNode *G = dyn_cast(Callee)) { if (Callee.getOpcode() == ISD::GlobalTLSAddress || Callee.getOpcode() == ISD::TargetGlobalTLSAddress) return false; return G->getGlobal()->getValueType()->isFunctionTy(); } return false; } static unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, SDValue CallSeqStart, const SDLoc &dl, int SPDiff, bool isTailCall, bool isPatchPoint, bool hasNest, SmallVectorImpl> &RegsToPass, SmallVectorImpl &Ops, std::vector &NodeTys, ImmutableCallSite *CS, const PPCSubtarget &Subtarget) { bool isPPC64 = Subtarget.isPPC64(); bool isSVR4ABI = Subtarget.isSVR4ABI(); bool isELFv2ABI = Subtarget.isELFv2ABI(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); NodeTys.push_back(MVT::Other); // Returns a chain NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use. unsigned CallOpc = PPCISD::CALL; bool needIndirectCall = true; if (!isSVR4ABI || !isPPC64) if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) { // If this is an absolute destination address, use the munged value. Callee = SDValue(Dest, 0); needIndirectCall = false; } // PC-relative references to external symbols should go through $stub, unless // we're building with the leopard linker or later, which automatically // synthesizes these stubs. const TargetMachine &TM = DAG.getTarget(); const Module *Mod = DAG.getMachineFunction().getFunction()->getParent(); const GlobalValue *GV = nullptr; if (auto *G = dyn_cast(Callee)) GV = G->getGlobal(); bool Local = TM.shouldAssumeDSOLocal(*Mod, GV); bool UsePlt = !Local && Subtarget.isTargetELF() && !isPPC64; if (isFunctionGlobalAddress(Callee)) { GlobalAddressSDNode *G = cast(Callee); // A call to a TLS address is actually an indirect call to a // thread-specific pointer. unsigned OpFlags = 0; if (UsePlt) OpFlags = PPCII::MO_PLT; // If the callee is a GlobalAddress/ExternalSymbol node (quite common, // every direct call is) turn it into a TargetGlobalAddress / // TargetExternalSymbol node so that legalize doesn't hack it. Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, Callee.getValueType(), 0, OpFlags); needIndirectCall = false; } if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { unsigned char OpFlags = 0; if (UsePlt) OpFlags = PPCII::MO_PLT; Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(), OpFlags); needIndirectCall = false; } if (isPatchPoint) { // We'll form an invalid direct call when lowering a patchpoint; the full // sequence for an indirect call is complicated, and many of the // instructions introduced might have side effects (and, thus, can't be // removed later). The call itself will be removed as soon as the // argument/return lowering is complete, so the fact that it has the wrong // kind of operands should not really matter. needIndirectCall = false; } if (needIndirectCall) { // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair // to do the call, we can't use PPCISD::CALL. SDValue MTCTROps[] = {Chain, Callee, InFlag}; if (isSVR4ABI && isPPC64 && !isELFv2ABI) { // Function pointers in the 64-bit SVR4 ABI do not point to the function // entry point, but to the function descriptor (the function entry point // address is part of the function descriptor though). // The function descriptor is a three doubleword structure with the // following fields: function entry point, TOC base address and // environment pointer. // Thus for a call through a function pointer, the following actions need // to be performed: // 1. Save the TOC of the caller in the TOC save area of its stack // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()). // 2. Load the address of the function entry point from the function // descriptor. // 3. Load the TOC of the callee from the function descriptor into r2. // 4. Load the environment pointer from the function descriptor into // r11. // 5. Branch to the function entry point address. // 6. On return of the callee, the TOC of the caller needs to be // restored (this is done in FinishCall()). // // The loads are scheduled at the beginning of the call sequence, and the // register copies are flagged together to ensure that no other // operations can be scheduled in between. E.g. without flagging the // copies together, a TOC access in the caller could be scheduled between // the assignment of the callee TOC and the branch to the callee, which // results in the TOC access going through the TOC of the callee instead // of going through the TOC of the caller, which leads to incorrect code. // Load the address of the function entry point from the function // descriptor. SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1); if (LDChain.getValueType() == MVT::Glue) LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2); auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors() ? MachineMemOperand::MOInvariant : MachineMemOperand::MONone; MachinePointerInfo MPI(CS ? CS->getCalledValue() : nullptr); SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI, /* Alignment = */ 8, MMOFlags); // Load environment pointer into r11. SDValue PtrOff = DAG.getIntPtrConstant(16, dl); SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff); SDValue LoadEnvPtr = DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, MPI.getWithOffset(16), /* Alignment = */ 8, MMOFlags); SDValue TOCOff = DAG.getIntPtrConstant(8, dl); SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff); SDValue TOCPtr = DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, MPI.getWithOffset(8), /* Alignment = */ 8, MMOFlags); setUsesTOCBasePtr(DAG); SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr, InFlag); Chain = TOCVal.getValue(0); InFlag = TOCVal.getValue(1); // If the function call has an explicit 'nest' parameter, it takes the // place of the environment pointer. if (!hasNest) { SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr, InFlag); Chain = EnvVal.getValue(0); InFlag = EnvVal.getValue(1); } MTCTROps[0] = Chain; MTCTROps[1] = LoadFuncPtr; MTCTROps[2] = InFlag; } Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2)); InFlag = Chain.getValue(1); NodeTys.clear(); NodeTys.push_back(MVT::Other); NodeTys.push_back(MVT::Glue); Ops.push_back(Chain); CallOpc = PPCISD::BCTRL; Callee.setNode(nullptr); // Add use of X11 (holding environment pointer) if (isSVR4ABI && isPPC64 && !isELFv2ABI && !hasNest) Ops.push_back(DAG.getRegister(PPC::X11, PtrVT)); // Add CTR register as callee so a bctr can be emitted later. if (isTailCall) Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT)); } // If this is a direct call, pass the chain and the callee. if (Callee.getNode()) { Ops.push_back(Chain); Ops.push_back(Callee); } // If this is a tail call add stack pointer delta. if (isTailCall) Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32)); // Add argument registers to the end of the list so that they are known live // into the call. for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) Ops.push_back(DAG.getRegister(RegsToPass[i].first, RegsToPass[i].second.getValueType())); // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live // into the call. if (isSVR4ABI && isPPC64 && !isPatchPoint) { setUsesTOCBasePtr(DAG); Ops.push_back(DAG.getRegister(PPC::X2, PtrVT)); } return CallOpc; } static bool isLocalCall(const SDValue &Callee) { if (GlobalAddressSDNode *G = dyn_cast(Callee)) return G->getGlobal()->isStrongDefinitionForLinker(); return false; } SDValue PPCTargetLowering::LowerCallResult( SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { SmallVector RVLocs; CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC); // Copy all of the result registers out of their specified physreg. for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), InFlag); Chain = Val.getValue(1); InFlag = Val.getValue(2); switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::AExt: Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); break; case CCValAssign::ZExt: Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val, DAG.getValueType(VA.getValVT())); Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); break; case CCValAssign::SExt: Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val, DAG.getValueType(VA.getValVT())); Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); break; } InVals.push_back(Val); } return Chain; } SDValue PPCTargetLowering::FinishCall( CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg, bool isPatchPoint, bool hasNest, SelectionDAG &DAG, SmallVector, 8> &RegsToPass, SDValue InFlag, SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff, unsigned NumBytes, const SmallVectorImpl &Ins, SmallVectorImpl &InVals, ImmutableCallSite *CS) const { std::vector NodeTys; SmallVector Ops; unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl, SPDiff, isTailCall, isPatchPoint, hasNest, RegsToPass, Ops, NodeTys, CS, Subtarget); // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64()) Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32)); // When performing tail call optimization the callee pops its arguments off // the stack. Account for this here so these bytes can be pushed back on in // PPCFrameLowering::eliminateCallFramePseudoInstr. int BytesCalleePops = (CallConv == CallingConv::Fast && getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0; // Add a register mask operand representing the call-preserved registers. const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); const uint32_t *Mask = TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv); assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); if (InFlag.getNode()) Ops.push_back(InFlag); // Emit tail call. if (isTailCall) { assert(((Callee.getOpcode() == ISD::Register && cast(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa(Callee)) && "Expecting an global address, external symbol, absolute value or register"); DAG.getMachineFunction().getFrameInfo()->setHasTailCall(); return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops); } // Add a NOP immediately after the branch instruction when using the 64-bit // SVR4 ABI. At link time, if caller and callee are in a different module and // thus have a different TOC, the call will be replaced with a call to a stub // function which saves the current TOC, loads the TOC of the callee and // branches to the callee. The NOP will be replaced with a load instruction // which restores the TOC of the caller from the TOC save slot of the current // stack frame. If caller and callee belong to the same module (and have the // same TOC), the NOP will remain unchanged. if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() && !isPatchPoint) { if (CallOpc == PPCISD::BCTRL) { // This is a call through a function pointer. // Restore the caller TOC from the save area into R2. // See PrepareCall() for more information about calls through function // pointers in the 64-bit SVR4 ABI. // We are using a target-specific load with r2 hard coded, because the // result of a target-independent load would never go directly into r2, // since r2 is a reserved register (which prevents the register allocator // from allocating it), resulting in an additional register being // allocated and an unnecessary move instruction being generated. CallOpc = PPCISD::BCTRL_LOAD_TOC; EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT); unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff); // The address needs to go after the chain input but before the flag (or // any other variadic arguments). Ops.insert(std::next(Ops.begin()), AddTOC); } else if ((CallOpc == PPCISD::CALL) && (!isLocalCall(Callee) || DAG.getTarget().getRelocationModel() == Reloc::PIC_)) // Otherwise insert NOP for non-local calls. CallOpc = PPCISD::CALL_NOP; } Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); InFlag = Chain.getValue(1); Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), DAG.getIntPtrConstant(BytesCalleePops, dl, true), InFlag, dl); if (!Ins.empty()) InFlag = Chain.getValue(1); return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, InVals); } SDValue PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { SelectionDAG &DAG = CLI.DAG; SDLoc &dl = CLI.DL; SmallVectorImpl &Outs = CLI.Outs; SmallVectorImpl &OutVals = CLI.OutVals; SmallVectorImpl &Ins = CLI.Ins; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; bool &isTailCall = CLI.IsTailCall; CallingConv::ID CallConv = CLI.CallConv; bool isVarArg = CLI.IsVarArg; bool isPatchPoint = CLI.IsPatchPoint; ImmutableCallSite *CS = CLI.CS; if (isTailCall) { if (Subtarget.useLongCalls() && !(CS && CS->isMustTailCall())) isTailCall = false; else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) isTailCall = IsEligibleForTailCallOptimization_64SVR4(Callee, CallConv, CS, isVarArg, Outs, Ins, DAG); else isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, Ins, DAG); if (isTailCall) { ++NumTailCalls; if (!getTargetMachine().Options.GuaranteedTailCallOpt) ++NumSiblingCalls; assert(isa(Callee) && "Callee should be an llvm::Function object."); DEBUG( const GlobalValue *GV = cast(Callee)->getGlobal(); const unsigned Width = 80 - strlen("TCO caller: ") - strlen(", callee linkage: 0, 0"); dbgs() << "TCO caller: " << left_justify(DAG.getMachineFunction().getName(), Width) << ", callee linkage: " << GV->getVisibility() << ", " << GV->getLinkage() << "\n" ); } } if (!isTailCall && CS && CS->isMustTailCall()) report_fatal_error("failed to perform tail call elimination on a call " "site marked musttail"); // When long calls (i.e. indirect calls) are always used, calls are always // made via function pointer. If we have a function name, first translate it // into a pointer. if (Subtarget.useLongCalls() && isa(Callee) && !isTailCall) Callee = LowerGlobalAddress(Callee, DAG); if (Subtarget.isSVR4ABI()) { if (Subtarget.isPPC64()) return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg, isTailCall, isPatchPoint, Outs, OutVals, Ins, dl, DAG, InVals, CS); else return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg, isTailCall, isPatchPoint, Outs, OutVals, Ins, dl, DAG, InVals, CS); } return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg, isTailCall, isPatchPoint, Outs, OutVals, Ins, dl, DAG, InVals, CS); } SDValue PPCTargetLowering::LowerCall_32SVR4( SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool isTailCall, bool isPatchPoint, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals, ImmutableCallSite *CS) const { // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description // of the 32-bit SVR4 ABI stack frame layout. assert((CallConv == CallingConv::C || CallConv == CallingConv::Fast) && "Unknown calling convention!"); unsigned PtrByteSize = 4; MachineFunction &MF = DAG.getMachineFunction(); // Mark this function as potentially containing a function that contains a // tail call. As a consequence the frame pointer will be used for dynamicalloc // and restoring the callers stack pointer in this functions epilog. This is // done because by tail calling the called function might overwrite the value // in this function's (MF) stack pointer stack slot 0(SP). if (getTargetMachine().Options.GuaranteedTailCallOpt && CallConv == CallingConv::Fast) MF.getInfo()->setHasFastCall(); // Count how many bytes are to be pushed on the stack, including the linkage // area, parameter list area and the part of the local variable space which // contains copies of aggregates which are passed by value. // Assign locations to all of the outgoing arguments. SmallVector ArgLocs; PPCCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext()); // Reserve space for the linkage area on the stack. CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(), PtrByteSize); if (useSoftFloat()) CCInfo.PreAnalyzeCallOperands(Outs); if (isVarArg) { // Handle fixed and variable vector arguments differently. // Fixed vector arguments go into registers as long as registers are // available. Variable vector arguments always go into memory. unsigned NumArgs = Outs.size(); for (unsigned i = 0; i != NumArgs; ++i) { MVT ArgVT = Outs[i].VT; ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; bool Result; if (Outs[i].IsFixed) { Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); } else { Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); } if (Result) { #ifndef NDEBUG errs() << "Call operand #" << i << " has unhandled type " << EVT(ArgVT).getEVTString() << "\n"; #endif llvm_unreachable(nullptr); } } } else { // All arguments are treated the same. CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4); } CCInfo.clearWasPPCF128(); // Assign locations to all of the outgoing aggregate by value arguments. SmallVector ByValArgLocs; CCState CCByValInfo(CallConv, isVarArg, MF, ByValArgLocs, *DAG.getContext()); // Reserve stack space for the allocations in CCInfo. CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal); // Size of the linkage area, parameter list area and the part of the local // space variable where copies of aggregates which are passed by value are // stored. unsigned NumBytes = CCByValInfo.getNextStackOffset(); // Calculate by how many bytes the stack has to be adjusted in case of tail // call optimization. int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), dl); SDValue CallSeqStart = Chain; // Load the return address and frame pointer so it can be moved somewhere else // later. SDValue LROp, FPOp; Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl); // Set up a copy of the stack pointer for use loading and storing any // arguments that may not fit in the registers available for argument // passing. SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32); SmallVector, 8> RegsToPass; SmallVector TailCallArguments; SmallVector MemOpChains; bool seenFloatArg = false; // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; SDValue Arg = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; if (Flags.isByVal()) { // Argument is an aggregate which is passed by value, thus we need to // create a copy of it in the local variable space of the current stack // frame (which is the stack frame of the caller) and pass the address of // this copy to the callee. assert((j < ByValArgLocs.size()) && "Index out of bounds!"); CCValAssign &ByValVA = ByValArgLocs[j++]; assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!"); // Memory reserved in the local variable space of the callers stack frame. unsigned LocMemOffset = ByValVA.getLocMemOffset(); SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()), StackPtr, PtrOff); // Create a copy of the argument in the local area of the current // stack frame. SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, CallSeqStart.getNode()->getOperand(0), Flags, DAG, dl); // This must go outside the CALLSEQ_START..END. SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, CallSeqStart.getNode()->getOperand(1), SDLoc(MemcpyCall)); DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), NewCallSeqStart.getNode()); Chain = CallSeqStart = NewCallSeqStart; // Pass the address of the aggregate copy on the stack either in a // physical register or in the parameter list area of the current stack // frame to the callee. Arg = PtrOff; } if (VA.isRegLoc()) { if (Arg.getValueType() == MVT::i1) Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg); seenFloatArg |= VA.getLocVT().isFloatingPoint(); // Put argument in a physical register. RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); } else { // Put argument in the parameter list area of the current stack frame. assert(VA.isMemLoc()); unsigned LocMemOffset = VA.getLocMemOffset(); if (!isTailCall) { SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()), StackPtr, PtrOff); MemOpChains.push_back( DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo())); } else { // Calculate and remember argument location. CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset, TailCallArguments); } } } if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } // Set CR bit 6 to true if this is a vararg call with floating args passed in // registers. if (isVarArg) { SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, InFlag }; Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1)); InFlag = Chain.getValue(1); } if (isTailCall) PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp, TailCallArguments); return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, /* unused except on PPC64 ELFv1 */ false, DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, NumBytes, Ins, InVals, CS); } // Copy an argument into memory, being careful to do this outside the // call sequence for the call to which the argument belongs. SDValue PPCTargetLowering::createMemcpyOutsideCallSeq( SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl) const { SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, CallSeqStart.getNode()->getOperand(0), Flags, DAG, dl); // The MEMCPY must go outside the CALLSEQ_START..END. SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, CallSeqStart.getNode()->getOperand(1), SDLoc(MemcpyCall)); DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), NewCallSeqStart.getNode()); return NewCallSeqStart; } SDValue PPCTargetLowering::LowerCall_64SVR4( SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool isTailCall, bool isPatchPoint, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals, ImmutableCallSite *CS) const { bool isELFv2ABI = Subtarget.isELFv2ABI(); bool isLittleEndian = Subtarget.isLittleEndian(); unsigned NumOps = Outs.size(); bool hasNest = false; bool IsSibCall = false; EVT PtrVT = getPointerTy(DAG.getDataLayout()); unsigned PtrByteSize = 8; MachineFunction &MF = DAG.getMachineFunction(); if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt) IsSibCall = true; // Mark this function as potentially containing a function that contains a // tail call. As a consequence the frame pointer will be used for dynamicalloc // and restoring the callers stack pointer in this functions epilog. This is // done because by tail calling the called function might overwrite the value // in this function's (MF) stack pointer stack slot 0(SP). if (getTargetMachine().Options.GuaranteedTailCallOpt && CallConv == CallingConv::Fast) MF.getInfo()->setHasFastCall(); assert(!(CallConv == CallingConv::Fast && isVarArg) && "fastcc not supported on varargs functions"); // Count how many bytes are to be pushed on the stack, including the linkage // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage // area is 32 bytes reserved space for [SP][CR][LR][TOC]. unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); unsigned NumBytes = LinkageSize; unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; unsigned &QFPR_idx = FPR_idx; static const MCPhysReg GPR[] = { PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; static const MCPhysReg VSRH[] = { PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8, PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 }; const unsigned NumGPRs = array_lengthof(GPR); const unsigned NumFPRs = 13; const unsigned NumVRs = array_lengthof(VR); const unsigned NumQFPRs = NumFPRs; // When using the fast calling convention, we don't provide backing for // arguments that will be in registers. unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0; // Add up all the space actually used. for (unsigned i = 0; i != NumOps; ++i) { ISD::ArgFlagsTy Flags = Outs[i].Flags; EVT ArgVT = Outs[i].VT; EVT OrigVT = Outs[i].ArgVT; if (Flags.isNest()) continue; if (CallConv == CallingConv::Fast) { if (Flags.isByVal()) NumGPRsUsed += (Flags.getByValSize()+7)/8; else switch (ArgVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unexpected ValueType for argument!"); case MVT::i1: case MVT::i32: case MVT::i64: if (++NumGPRsUsed <= NumGPRs) continue; break; case MVT::v4i32: case MVT::v8i16: case MVT::v16i8: case MVT::v2f64: case MVT::v2i64: case MVT::v1i128: if (++NumVRsUsed <= NumVRs) continue; break; case MVT::v4f32: // When using QPX, this is handled like a FP register, otherwise, it // is an Altivec register. if (Subtarget.hasQPX()) { if (++NumFPRsUsed <= NumFPRs) continue; } else { if (++NumVRsUsed <= NumVRs) continue; } break; case MVT::f32: case MVT::f64: case MVT::v4f64: // QPX case MVT::v4i1: // QPX if (++NumFPRsUsed <= NumFPRs) continue; break; } } /* Respect alignment of argument on the stack. */ unsigned Align = CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); NumBytes = ((NumBytes + Align - 1) / Align) * Align; NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); if (Flags.isInConsecutiveRegsLast()) NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; } unsigned NumBytesActuallyUsed = NumBytes; // The prolog code of the callee may store up to 8 GPR argument registers to // the stack, allowing va_start to index over them in memory if its varargs. // Because we cannot tell if this is needed on the caller side, we have to // conservatively assume that it is needed. As such, make sure we have at // least enough stack space for the caller to store the 8 GPRs. // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area. NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize); // Tail call needs the stack to be aligned. if (getTargetMachine().Options.GuaranteedTailCallOpt && CallConv == CallingConv::Fast) NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes); int SPDiff = 0; // Calculate by how many bytes the stack has to be adjusted in case of tail // call optimization. if (!IsSibCall) SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); // To protect arguments on the stack from being clobbered in a tail call, // force all the loads to happen before doing any other lowering. if (isTailCall) Chain = DAG.getStackArgumentTokenFactor(Chain); // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass if (!IsSibCall) Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), dl); SDValue CallSeqStart = Chain; // Load the return address and frame pointer so it can be move somewhere else // later. SDValue LROp, FPOp; Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl); // Set up a copy of the stack pointer for use loading and storing any // arguments that may not fit in the registers available for argument // passing. SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64); // Figure out which arguments are going to go in registers, and which in // memory. Also, if this is a vararg function, floating point operations // must be stored to our stack, and loaded into integer regs as well, if // any integer regs are available for argument passing. unsigned ArgOffset = LinkageSize; SmallVector, 8> RegsToPass; SmallVector TailCallArguments; SmallVector MemOpChains; for (unsigned i = 0; i != NumOps; ++i) { SDValue Arg = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; EVT ArgVT = Outs[i].VT; EVT OrigVT = Outs[i].ArgVT; // PtrOff will be used to store the current argument to the stack if a // register cannot be found for it. SDValue PtrOff; // We re-align the argument offset for each argument, except when using the // fast calling convention, when we need to make sure we do that only when // we'll actually use a stack slot. auto ComputePtrOff = [&]() { /* Respect alignment of argument on the stack. */ unsigned Align = CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType()); PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); }; if (CallConv != CallingConv::Fast) { ComputePtrOff(); /* Compute GPR index associated with argument offset. */ GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; GPR_idx = std::min(GPR_idx, NumGPRs); } // Promote integers to 64-bit values. if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) { // FIXME: Should this use ANY_EXTEND if neither sext nor zext? unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); } // FIXME memcpy is used way more than necessary. Correctness first. // Note: "by value" is code for passing a structure by value, not // basic types. if (Flags.isByVal()) { // Note: Size includes alignment padding, so // struct x { short a; char b; } // will have Size = 4. With #pragma pack(1), it will have Size = 3. // These are the proper values we need for right-justifying the // aggregate in a parameter register. unsigned Size = Flags.getByValSize(); // An empty aggregate parameter takes up no storage and no // registers. if (Size == 0) continue; if (CallConv == CallingConv::Fast) ComputePtrOff(); // All aggregates smaller than 8 bytes must be passed right-justified. if (Size==1 || Size==2 || Size==4) { EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32); if (GPR_idx != NumGPRs) { SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, MachinePointerInfo(), VT); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); ArgOffset += PtrByteSize; continue; } } if (GPR_idx == NumGPRs && Size < 8) { SDValue AddPtr = PtrOff; if (!isLittleEndian) { SDValue Const = DAG.getConstant(PtrByteSize - Size, dl, PtrOff.getValueType()); AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); } Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, CallSeqStart, Flags, DAG, dl); ArgOffset += PtrByteSize; continue; } // Copy entire object into memory. There are cases where gcc-generated // code assumes it is there, even if it could be put entirely into // registers. (This is not what the doc says.) // FIXME: The above statement is likely due to a misunderstanding of the // documents. All arguments must be copied into the parameter area BY // THE CALLEE in the event that the callee takes the address of any // formal argument. That has not yet been implemented. However, it is // reasonable to use the stack area as a staging area for the register // load. // Skip this for small aggregates, as we will use the same slot for a // right-justified copy, below. if (Size >= 8) Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, CallSeqStart, Flags, DAG, dl); // When a register is available, pass a small aggregate right-justified. if (Size < 8 && GPR_idx != NumGPRs) { // The easiest way to get this right-justified in a register // is to copy the structure into the rightmost portion of a // local variable slot, then load the whole slot into the // register. // FIXME: The memcpy seems to produce pretty awful code for // small aggregates, particularly for packed ones. // FIXME: It would be preferable to use the slot in the // parameter save area instead of a new local variable. SDValue AddPtr = PtrOff; if (!isLittleEndian) { SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType()); AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); } Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, CallSeqStart, Flags, DAG, dl); // Load the slot into the register. SDValue Load = DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo()); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); // Done with this argument. ArgOffset += PtrByteSize; continue; } // For aggregates larger than PtrByteSize, copy the pieces of the // object that fit into registers from the parameter save area. for (unsigned j=0; j gpr moves. // In the non-vararg case, this can only ever happen in the // presence of f32 array types, since otherwise we never run // out of FPRs before running out of GPRs. SDValue ArgVal; // Double values are always passed in a single GPR. if (Arg.getValueType() != MVT::f32) { ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg); // Non-array float values are extended and passed in a GPR. } else if (!Flags.isInConsecutiveRegs()) { ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal); // If we have an array of floats, we collect every odd element // together with its predecessor into one GPR. } else if (ArgOffset % PtrByteSize != 0) { SDValue Lo, Hi; Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]); Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); if (!isLittleEndian) std::swap(Lo, Hi); ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); // The final element, if even, goes into the first half of a GPR. } else if (Flags.isInConsecutiveRegsLast()) { ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal); if (!isLittleEndian) ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal, DAG.getConstant(32, dl, MVT::i32)); // Non-final even elements are skipped; they will be handled // together the with subsequent argument on the next go-around. } else ArgVal = SDValue(); if (ArgVal.getNode()) RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal)); } else { if (CallConv == CallingConv::Fast) ComputePtrOff(); // Single-precision floating-point values are mapped to the // second (rightmost) word of the stack doubleword. if (Arg.getValueType() == MVT::f32 && !isLittleEndian && !Flags.isInConsecutiveRegs()) { SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType()); PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); } LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, true, isTailCall, false, MemOpChains, TailCallArguments, dl); NeededLoad = true; } // When passing an array of floats, the array occupies consecutive // space in the argument area; only round up to the next doubleword // at the end of the array. Otherwise, each float takes 8 bytes. if (CallConv != CallingConv::Fast || NeededLoad) { ArgOffset += (Arg.getValueType() == MVT::f32 && Flags.isInConsecutiveRegs()) ? 4 : 8; if (Flags.isInConsecutiveRegsLast()) ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; } break; } case MVT::v4f32: case MVT::v4i32: case MVT::v8i16: case MVT::v16i8: case MVT::v2f64: case MVT::v2i64: case MVT::v1i128: if (!Subtarget.hasQPX()) { // These can be scalar arguments or elements of a vector array type // passed directly. The latter are used to implement ELFv2 homogenous // vector aggregates. // For a varargs call, named arguments go into VRs or on the stack as // usual; unnamed arguments always go to the stack or the corresponding // GPRs when within range. For now, we always put the value in both // locations (or even all three). if (isVarArg) { // We could elide this store in the case where the object fits // entirely in R registers. Maybe later. SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()); MemOpChains.push_back(Store); if (VR_idx != NumVRs) { SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo()); MemOpChains.push_back(Load.getValue(1)); unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 || Arg.getSimpleValueType() == MVT::v2i64) ? VSRH[VR_idx] : VR[VR_idx]; ++VR_idx; RegsToPass.push_back(std::make_pair(VReg, Load)); } ArgOffset += 16; for (unsigned i=0; i<16; i+=PtrByteSize) { if (GPR_idx == NumGPRs) break; SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, DAG.getConstant(i, dl, PtrVT)); SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo()); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); } break; } // Non-varargs Altivec params go into VRs or on the stack. if (VR_idx != NumVRs) { unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 || Arg.getSimpleValueType() == MVT::v2i64) ? VSRH[VR_idx] : VR[VR_idx]; ++VR_idx; RegsToPass.push_back(std::make_pair(VReg, Arg)); } else { if (CallConv == CallingConv::Fast) ComputePtrOff(); LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, true, isTailCall, true, MemOpChains, TailCallArguments, dl); if (CallConv == CallingConv::Fast) ArgOffset += 16; } if (CallConv != CallingConv::Fast) ArgOffset += 16; break; } // not QPX assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 && "Invalid QPX parameter type"); /* fall through */ case MVT::v4f64: case MVT::v4i1: { bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32; if (isVarArg) { // We could elide this store in the case where the object fits // entirely in R registers. Maybe later. SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()); MemOpChains.push_back(Store); if (QFPR_idx != NumQFPRs) { SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl, Store, PtrOff, MachinePointerInfo()); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load)); } ArgOffset += (IsF32 ? 16 : 32); for (unsigned i = 0; i < (IsF32 ? 16U : 32U); i += PtrByteSize) { if (GPR_idx == NumGPRs) break; SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, DAG.getConstant(i, dl, PtrVT)); SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo()); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); } break; } // Non-varargs QPX params go into registers or on the stack. if (QFPR_idx != NumQFPRs) { RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg)); } else { if (CallConv == CallingConv::Fast) ComputePtrOff(); LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, true, isTailCall, true, MemOpChains, TailCallArguments, dl); if (CallConv == CallingConv::Fast) ArgOffset += (IsF32 ? 16 : 32); } if (CallConv != CallingConv::Fast) ArgOffset += (IsF32 ? 16 : 32); break; } } } assert(NumBytesActuallyUsed == ArgOffset); (void)NumBytesActuallyUsed; if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); // Check if this is an indirect call (MTCTR/BCTRL). // See PrepareCall() for more information about calls through function // pointers in the 64-bit SVR4 ABI. if (!isTailCall && !isPatchPoint && !isFunctionGlobalAddress(Callee) && !isa(Callee)) { // Load r2 into a virtual register and store it to the TOC save area. setUsesTOCBasePtr(DAG); SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64); // TOC save area offset. unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); Chain = DAG.getStore( Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset)); // In the ELFv2 ABI, R12 must contain the address of an indirect callee. // This does not mean the MTCTR instruction must use R12; it's easier // to model this as an extra parameter, so do that. if (isELFv2ABI && !isPatchPoint) RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee)); } // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } if (isTailCall && !IsSibCall) PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp, TailCallArguments); return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, hasNest, DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, NumBytes, Ins, InVals, CS); } SDValue PPCTargetLowering::LowerCall_Darwin( SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool isTailCall, bool isPatchPoint, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals, ImmutableCallSite *CS) const { unsigned NumOps = Outs.size(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); bool isPPC64 = PtrVT == MVT::i64; unsigned PtrByteSize = isPPC64 ? 8 : 4; MachineFunction &MF = DAG.getMachineFunction(); // Mark this function as potentially containing a function that contains a // tail call. As a consequence the frame pointer will be used for dynamicalloc // and restoring the callers stack pointer in this functions epilog. This is // done because by tail calling the called function might overwrite the value // in this function's (MF) stack pointer stack slot 0(SP). if (getTargetMachine().Options.GuaranteedTailCallOpt && CallConv == CallingConv::Fast) MF.getInfo()->setHasFastCall(); // Count how many bytes are to be pushed on the stack, including the linkage // area, and parameter passing area. We start with 24/48 bytes, which is // prereserved space for [SP][CR][LR][3 x unused]. unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); unsigned NumBytes = LinkageSize; // Add up all the space actually used. // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually // they all go in registers, but we must reserve stack space for them for // possible use by the caller. In varargs or 64-bit calls, parameters are // assigned stack space in order, with padding so Altivec parameters are // 16-byte aligned. unsigned nAltivecParamsAtEnd = 0; for (unsigned i = 0; i != NumOps; ++i) { ISD::ArgFlagsTy Flags = Outs[i].Flags; EVT ArgVT = Outs[i].VT; // Varargs Altivec parameters are padded to a 16 byte boundary. if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) { if (!isVarArg && !isPPC64) { // Non-varargs Altivec parameters go after all the non-Altivec // parameters; handle those later so we know how much padding we need. nAltivecParamsAtEnd++; continue; } // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary. NumBytes = ((NumBytes+15)/16)*16; } NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); } // Allow for Altivec parameters at the end, if needed. if (nAltivecParamsAtEnd) { NumBytes = ((NumBytes+15)/16)*16; NumBytes += 16*nAltivecParamsAtEnd; } // The prolog code of the callee may store up to 8 GPR argument registers to // the stack, allowing va_start to index over them in memory if its varargs. // Because we cannot tell if this is needed on the caller side, we have to // conservatively assume that it is needed. As such, make sure we have at // least enough stack space for the caller to store the 8 GPRs. NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize); // Tail call needs the stack to be aligned. if (getTargetMachine().Options.GuaranteedTailCallOpt && CallConv == CallingConv::Fast) NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes); // Calculate by how many bytes the stack has to be adjusted in case of tail // call optimization. int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); // To protect arguments on the stack from being clobbered in a tail call, // force all the loads to happen before doing any other lowering. if (isTailCall) Chain = DAG.getStackArgumentTokenFactor(Chain); // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), dl); SDValue CallSeqStart = Chain; // Load the return address and frame pointer so it can be move somewhere else // later. SDValue LROp, FPOp; Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl); // Set up a copy of the stack pointer for use loading and storing any // arguments that may not fit in the registers available for argument // passing. SDValue StackPtr; if (isPPC64) StackPtr = DAG.getRegister(PPC::X1, MVT::i64); else StackPtr = DAG.getRegister(PPC::R1, MVT::i32); // Figure out which arguments are going to go in registers, and which in // memory. Also, if this is a vararg function, floating point operations // must be stored to our stack, and loaded into integer regs as well, if // any integer regs are available for argument passing. unsigned ArgOffset = LinkageSize; unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; static const MCPhysReg GPR_32[] = { // 32-bit registers. PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, }; static const MCPhysReg GPR_64[] = { // 64-bit registers. PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; const unsigned NumGPRs = array_lengthof(GPR_32); const unsigned NumFPRs = 13; const unsigned NumVRs = array_lengthof(VR); const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32; SmallVector, 8> RegsToPass; SmallVector TailCallArguments; SmallVector MemOpChains; for (unsigned i = 0; i != NumOps; ++i) { SDValue Arg = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; // PtrOff will be used to store the current argument to the stack if a // register cannot be found for it. SDValue PtrOff; PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType()); PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); // On PPC64, promote integers to 64-bit values. if (isPPC64 && Arg.getValueType() == MVT::i32) { // FIXME: Should this use ANY_EXTEND if neither sext nor zext? unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); } // FIXME memcpy is used way more than necessary. Correctness first. // Note: "by value" is code for passing a structure by value, not // basic types. if (Flags.isByVal()) { unsigned Size = Flags.getByValSize(); // Very small objects are passed right-justified. Everything else is // passed left-justified. if (Size==1 || Size==2) { EVT VT = (Size==1) ? MVT::i8 : MVT::i16; if (GPR_idx != NumGPRs) { SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, MachinePointerInfo(), VT); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); ArgOffset += PtrByteSize; } else { SDValue Const = DAG.getConstant(PtrByteSize - Size, dl, PtrOff.getValueType()); SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, CallSeqStart, Flags, DAG, dl); ArgOffset += PtrByteSize; } continue; } // Copy entire object into memory. There are cases where gcc-generated // code assumes it is there, even if it could be put entirely into // registers. (This is not what the doc says.) Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, CallSeqStart, Flags, DAG, dl); // For small aggregates (Darwin only) and aggregates >= PtrByteSize, // copy the pieces of the object that fit into registers from the // parameter save area. for (unsigned j=0; j NumVRs) { unsigned j = 0; // Offset is aligned; skip 1st 12 params which go in V registers. ArgOffset = ((ArgOffset+15)/16)*16; ArgOffset += 12*16; for (unsigned i = 0; i != NumOps; ++i) { SDValue Arg = OutVals[i]; EVT ArgType = Outs[i].VT; if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 || ArgType==MVT::v8i16 || ArgType==MVT::v16i8) { if (++j > NumVRs) { SDValue PtrOff; // We are emitting Altivec params in order. LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, isPPC64, isTailCall, true, MemOpChains, TailCallArguments, dl); ArgOffset += 16; } } } } if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); // On Darwin, R12 must contain the address of an indirect callee. This does // not mean the MTCTR instruction must use R12; it's easier to model this as // an extra parameter, so do that. if (!isTailCall && !isFunctionGlobalAddress(Callee) && !isa(Callee) && !isBLACompatibleAddress(Callee, DAG)) RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 : PPC::R12), Callee)); // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } if (isTailCall) PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp, TailCallArguments); return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, /* unused except on PPC64 ELFv1 */ false, DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, NumBytes, Ins, InVals, CS); } bool PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl &Outs, LLVMContext &Context) const { SmallVector RVLocs; CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); return CCInfo.CheckReturn(Outs, RetCC_PPC); } SDValue PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &dl, SelectionDAG &DAG) const { SmallVector RVLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeReturn(Outs, RetCC_PPC); SDValue Flag; SmallVector RetOps(1, Chain); // Copy the result values into the output registers. for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); SDValue Arg = OutVals[i]; switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::AExt: Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); break; case CCValAssign::ZExt: Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); break; case CCValAssign::SExt: Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); break; } Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); const MCPhysReg *I = TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction()); if (I) { for (; *I; ++I) { if (PPC::G8RCRegClass.contains(*I)) RetOps.push_back(DAG.getRegister(*I, MVT::i64)); else if (PPC::F8RCRegClass.contains(*I)) RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64))); else if (PPC::CRRCRegClass.contains(*I)) RetOps.push_back(DAG.getRegister(*I, MVT::i1)); else if (PPC::VRRCRegClass.contains(*I)) RetOps.push_back(DAG.getRegister(*I, MVT::Other)); else llvm_unreachable("Unexpected register class in CSRsViaCopy!"); } } RetOps[0] = Chain; // Update chain. // Add the flag if we have it. if (Flag.getNode()) RetOps.push_back(Flag); return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps); } SDValue PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); // Get the corect type for integers. EVT IntVT = Op.getValueType(); // Get the inputs. SDValue Chain = Op.getOperand(0); SDValue FPSIdx = getFramePointerFrameIndex(DAG); // Build a DYNAREAOFFSET node. SDValue Ops[2] = {Chain, FPSIdx}; SDVTList VTs = DAG.getVTList(IntVT); return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops); } SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const { // When we pop the dynamic allocation we need to restore the SP link. SDLoc dl(Op); // Get the corect type for pointers. EVT PtrVT = getPointerTy(DAG.getDataLayout()); // Construct the stack pointer operand. bool isPPC64 = Subtarget.isPPC64(); unsigned SP = isPPC64 ? PPC::X1 : PPC::R1; SDValue StackPtr = DAG.getRegister(SP, PtrVT); // Get the operands for the STACKRESTORE. SDValue Chain = Op.getOperand(0); SDValue SaveSP = Op.getOperand(1); // Load the old link SP. SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo()); // Restore the stack pointer. Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP); // Store the old link SP. return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo()); } SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); bool isPPC64 = Subtarget.isPPC64(); EVT PtrVT = getPointerTy(MF.getDataLayout()); // Get current frame pointer save index. The users of this index will be // primarily DYNALLOC instructions. PPCFunctionInfo *FI = MF.getInfo(); int RASI = FI->getReturnAddrSaveIndex(); // If the frame pointer save index hasn't been defined yet. if (!RASI) { // Find out what the fix offset of the frame pointer save area. int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset(); // Allocate the frame index for frame pointer save area. RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, false); // Save the result. FI->setReturnAddrSaveIndex(RASI); } return DAG.getFrameIndex(RASI, PtrVT); } SDValue PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { MachineFunction &MF = DAG.getMachineFunction(); bool isPPC64 = Subtarget.isPPC64(); EVT PtrVT = getPointerTy(MF.getDataLayout()); // Get current frame pointer save index. The users of this index will be // primarily DYNALLOC instructions. PPCFunctionInfo *FI = MF.getInfo(); int FPSI = FI->getFramePointerSaveIndex(); // If the frame pointer save index hasn't been defined yet. if (!FPSI) { // Find out what the fix offset of the frame pointer save area. int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset(); // Allocate the frame index for frame pointer save area. FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); // Save the result. FI->setFramePointerSaveIndex(FPSI); } return DAG.getFrameIndex(FPSI, PtrVT); } SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { // Get the inputs. SDValue Chain = Op.getOperand(0); SDValue Size = Op.getOperand(1); SDLoc dl(Op); // Get the corect type for pointers. EVT PtrVT = getPointerTy(DAG.getDataLayout()); // Negate the size. SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT, DAG.getConstant(0, dl, PtrVT), Size); // Construct a node for the frame pointer save index. SDValue FPSIdx = getFramePointerFrameIndex(DAG); // Build a DYNALLOC node. SDValue Ops[3] = { Chain, NegSize, FPSIdx }; SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other); return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops); } +SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op, + SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + + bool isPPC64 = Subtarget.isPPC64(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + + int FI = MF.getFrameInfo()->CreateFixedObject(isPPC64 ? 8 : 4, 0, false); + return DAG.getFrameIndex(FI, PtrVT); +} + SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL, DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0), Op.getOperand(1)); } SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other, Op.getOperand(0), Op.getOperand(1)); } SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { if (Op.getValueType().isVector()) return LowerVectorLoad(Op, DAG); assert(Op.getValueType() == MVT::i1 && "Custom lowering only for i1 loads"); // First, load 8 bits into 32 bits, then truncate to 1 bit. SDLoc dl(Op); LoadSDNode *LD = cast(Op); SDValue Chain = LD->getChain(); SDValue BasePtr = LD->getBasePtr(); MachineMemOperand *MMO = LD->getMemOperand(); SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain, BasePtr, MVT::i8, MMO); SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD); SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) }; return DAG.getMergeValues(Ops, dl); } SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { if (Op.getOperand(1).getValueType().isVector()) return LowerVectorStore(Op, DAG); assert(Op.getOperand(1).getValueType() == MVT::i1 && "Custom lowering only for i1 stores"); // First, zero extend to 32 bits, then use a truncating store to 8 bits. SDLoc dl(Op); StoreSDNode *ST = cast(Op); SDValue Chain = ST->getChain(); SDValue BasePtr = ST->getBasePtr(); SDValue Value = ST->getValue(); MachineMemOperand *MMO = ST->getMemOperand(); Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()), Value); return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO); } // FIXME: Remove this once the ANDI glue bug is fixed: SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { assert(Op.getValueType() == MVT::i1 && "Custom lowering only for i1 results"); SDLoc DL(Op); return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1, Op.getOperand(0)); } /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when /// possible. SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { // Not FP? Not a fsel. if (!Op.getOperand(0).getValueType().isFloatingPoint() || !Op.getOperand(2).getValueType().isFloatingPoint()) return Op; // We might be able to do better than this under some circumstances, but in // general, fsel-based lowering of select is a finite-math-only optimization. // For more information, see section F.3 of the 2.06 ISA specification. if (!DAG.getTarget().Options.NoInfsFPMath || !DAG.getTarget().Options.NoNaNsFPMath) return Op; // TODO: Propagate flags from the select rather than global settings. SDNodeFlags Flags; Flags.setNoInfs(true); Flags.setNoNaNs(true); ISD::CondCode CC = cast(Op.getOperand(4))->get(); EVT ResVT = Op.getValueType(); EVT CmpVT = Op.getOperand(0).getValueType(); SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); SDValue TV = Op.getOperand(2), FV = Op.getOperand(3); SDLoc dl(Op); // If the RHS of the comparison is a 0.0, we don't need to do the // subtraction at all. SDValue Sel1; if (isFloatingPointZero(RHS)) switch (CC) { default: break; // SETUO etc aren't handled by fsel. case ISD::SETNE: std::swap(TV, FV); case ISD::SETEQ: if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); return DAG.getNode(PPCISD::FSEL, dl, ResVT, DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV); case ISD::SETULT: case ISD::SETLT: std::swap(TV, FV); // fsel is natively setge, swap operands for setlt case ISD::SETOGE: case ISD::SETGE: if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); case ISD::SETUGT: case ISD::SETGT: std::swap(TV, FV); // fsel is natively setge, swap operands for setlt case ISD::SETOLE: case ISD::SETLE: if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); return DAG.getNode(PPCISD::FSEL, dl, ResVT, DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV); } SDValue Cmp; switch (CC) { default: break; // SETUO etc aren't handled by fsel. case ISD::SETNE: std::swap(TV, FV); case ISD::SETEQ: Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); return DAG.getNode(PPCISD::FSEL, dl, ResVT, DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV); case ISD::SETULT: case ISD::SETLT: Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); case ISD::SETOGE: case ISD::SETGE: Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); case ISD::SETUGT: case ISD::SETGT: Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); case ISD::SETOLE: case ISD::SETLE: Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); } return Op; } void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, SelectionDAG &DAG, const SDLoc &dl) const { assert(Op.getOperand(0).getValueType().isFloatingPoint()); SDValue Src = Op.getOperand(0); if (Src.getValueType() == MVT::f32) Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); SDValue Tmp; switch (Op.getSimpleValueType().SimpleTy) { default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); case MVT::i32: Tmp = DAG.getNode( Op.getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIWZ : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ), dl, MVT::f64, Src); break; case MVT::i64: assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && "i64 FP_TO_UINT is supported only with FPCVT"); Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ, dl, MVT::f64, Src); break; } // Convert the FP value to an int value through memory. bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() && (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()); SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64); int FI = cast(FIPtr)->getIndex(); MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); // Emit a store to the stack slot. SDValue Chain; if (i32Stack) { MachineFunction &MF = DAG.getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4); SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr }; Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO); } else Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, MPI); // Result is a load from the stack slot. If loading 4 bytes, make sure to // add in a bias on big endian. if (Op.getValueType() == MVT::i32 && !i32Stack) { FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, DAG.getConstant(4, dl, FIPtr.getValueType())); MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4); } RLI.Chain = Chain; RLI.Ptr = FIPtr; RLI.MPI = MPI; } /// \brief Custom lowers floating point to integer conversions to use /// the direct move instructions available in ISA 2.07 to avoid the /// need for load/store combinations. SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op, SelectionDAG &DAG, const SDLoc &dl) const { assert(Op.getOperand(0).getValueType().isFloatingPoint()); SDValue Src = Op.getOperand(0); if (Src.getValueType() == MVT::f32) Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); SDValue Tmp; switch (Op.getSimpleValueType().SimpleTy) { default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); case MVT::i32: Tmp = DAG.getNode( Op.getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIWZ : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ), dl, MVT::f64, Src); Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp); break; case MVT::i64: assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && "i64 FP_TO_UINT is supported only with FPCVT"); Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ, dl, MVT::f64, Src); Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp); break; } return Tmp; } SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, const SDLoc &dl) const { if (Subtarget.hasDirectMove() && Subtarget.isPPC64()) return LowerFP_TO_INTDirectMove(Op, DAG, dl); ReuseLoadInfo RLI; LowerFP_TO_INTForReuse(Op, RLI, DAG, dl); return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI, RLI.Alignment, RLI.IsInvariant ? MachineMemOperand::MOInvariant : MachineMemOperand::MONone, RLI.AAInfo, RLI.Ranges); } // We're trying to insert a regular store, S, and then a load, L. If the // incoming value, O, is a load, we might just be able to have our load use the // address used by O. However, we don't know if anything else will store to // that address before we can load from it. To prevent this situation, we need // to insert our load, L, into the chain as a peer of O. To do this, we give L // the same chain operand as O, we create a token factor from the chain results // of O and L, and we replace all uses of O's chain result with that token // factor (see spliceIntoChain below for this last part). bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI, SelectionDAG &DAG, ISD::LoadExtType ET) const { SDLoc dl(Op); if (ET == ISD::NON_EXTLOAD && (Op.getOpcode() == ISD::FP_TO_UINT || Op.getOpcode() == ISD::FP_TO_SINT) && isOperationLegalOrCustom(Op.getOpcode(), Op.getOperand(0).getValueType())) { LowerFP_TO_INTForReuse(Op, RLI, DAG, dl); return true; } LoadSDNode *LD = dyn_cast(Op); if (!LD || LD->getExtensionType() != ET || LD->isVolatile() || LD->isNonTemporal()) return false; if (LD->getMemoryVT() != MemVT) return false; RLI.Ptr = LD->getBasePtr(); if (LD->isIndexed() && !LD->getOffset().isUndef()) { assert(LD->getAddressingMode() == ISD::PRE_INC && "Non-pre-inc AM on PPC?"); RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr, LD->getOffset()); } RLI.Chain = LD->getChain(); RLI.MPI = LD->getPointerInfo(); RLI.IsInvariant = LD->isInvariant(); RLI.Alignment = LD->getAlignment(); RLI.AAInfo = LD->getAAInfo(); RLI.Ranges = LD->getRanges(); RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1); return true; } // Given the head of the old chain, ResChain, insert a token factor containing // it and NewResChain, and make users of ResChain now be users of that token // factor. void PPCTargetLowering::spliceIntoChain(SDValue ResChain, SDValue NewResChain, SelectionDAG &DAG) const { if (!ResChain) return; SDLoc dl(NewResChain); SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, NewResChain, DAG.getUNDEF(MVT::Other)); assert(TF.getNode() != NewResChain.getNode() && "A new TF really is required here"); DAG.ReplaceAllUsesOfValueWith(ResChain, TF); DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain); } /// \brief Analyze profitability of direct move /// prefer float load to int load plus direct move /// when there is no integer use of int load static bool directMoveIsProfitable(const SDValue &Op) { SDNode *Origin = Op.getOperand(0).getNode(); if (Origin->getOpcode() != ISD::LOAD) return true; for (SDNode::use_iterator UI = Origin->use_begin(), UE = Origin->use_end(); UI != UE; ++UI) { // Only look at the users of the loaded value. if (UI.getUse().get().getResNo() != 0) continue; if (UI->getOpcode() != ISD::SINT_TO_FP && UI->getOpcode() != ISD::UINT_TO_FP) return true; } return false; } /// \brief Custom lowers integer to floating point conversions to use /// the direct move instructions available in ISA 2.07 to avoid the /// need for load/store combinations. SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op, SelectionDAG &DAG, const SDLoc &dl) const { assert((Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::f64) && "Invalid floating point type as target of conversion"); assert(Subtarget.hasFPCVT() && "Int to FP conversions with direct moves require FPCVT"); SDValue FP; SDValue Src = Op.getOperand(0); bool SinglePrec = Op.getValueType() == MVT::f32; bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32; bool Signed = Op.getOpcode() == ISD::SINT_TO_FP; unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) : (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU); if (WordInt) { FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ, dl, MVT::f64, Src); FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP); } else { FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src); FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP); } return FP; } SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) { if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64) return SDValue(); SDValue Value = Op.getOperand(0); // The values are now known to be -1 (false) or 1 (true). To convert this // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64); Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); if (Op.getValueType() != MVT::v4f64) Value = DAG.getNode(ISD::FP_ROUND, dl, Op.getValueType(), Value, DAG.getIntPtrConstant(1, dl)); return Value; } // Don't handle ppc_fp128 here; let it be lowered to a libcall. if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) return SDValue(); if (Op.getOperand(0).getValueType() == MVT::i1) return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0), DAG.getConstantFP(1.0, dl, Op.getValueType()), DAG.getConstantFP(0.0, dl, Op.getValueType())); // If we have direct moves, we can do all the conversion, skip the store/load // however, without FPCVT we can't do most conversions. if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) && Subtarget.isPPC64() && Subtarget.hasFPCVT()) return LowerINT_TO_FPDirectMove(Op, DAG, dl); assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && "UINT_TO_FP is supported only with FPCVT"); // If we have FCFIDS, then use it when converting to single-precision. // Otherwise, convert to double-precision and then round. unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS : PPCISD::FCFIDS) : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU : PPCISD::FCFID); MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? MVT::f32 : MVT::f64; if (Op.getOperand(0).getValueType() == MVT::i64) { SDValue SINT = Op.getOperand(0); // When converting to single-precision, we actually need to convert // to double-precision first and then round to single-precision. // To avoid double-rounding effects during that operation, we have // to prepare the input operand. Bits that might be truncated when // converting to double-precision are replaced by a bit that won't // be lost at this stage, but is below the single-precision rounding // position. // // However, if -enable-unsafe-fp-math is in effect, accept double // rounding to avoid the extra overhead. if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT() && !DAG.getTarget().Options.UnsafeFPMath) { // Twiddle input to make sure the low 11 bits are zero. (If this // is the case, we are guaranteed the value will fit into the 53 bit // mantissa of an IEEE double-precision value without rounding.) // If any of those low 11 bits were not zero originally, make sure // bit 12 (value 2048) is set instead, so that the final rounding // to single-precision gets the correct result. SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64, SINT, DAG.getConstant(2047, dl, MVT::i64)); Round = DAG.getNode(ISD::ADD, dl, MVT::i64, Round, DAG.getConstant(2047, dl, MVT::i64)); Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT); Round = DAG.getNode(ISD::AND, dl, MVT::i64, Round, DAG.getConstant(-2048, dl, MVT::i64)); // However, we cannot use that value unconditionally: if the magnitude // of the input value is small, the bit-twiddling we did above might // end up visibly changing the output. Fortunately, in that case, we // don't need to twiddle bits since the original input will convert // exactly to double-precision floating-point already. Therefore, // construct a conditional to use the original value if the top 11 // bits are all sign-bit copies, and use the rounded value computed // above otherwise. SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64, SINT, DAG.getConstant(53, dl, MVT::i32)); Cond = DAG.getNode(ISD::ADD, dl, MVT::i64, Cond, DAG.getConstant(1, dl, MVT::i64)); Cond = DAG.getSetCC(dl, MVT::i32, Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT); SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT); } ReuseLoadInfo RLI; SDValue Bits; MachineFunction &MF = DAG.getMachineFunction(); if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) { Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI, RLI.Alignment, RLI.IsInvariant ? MachineMemOperand::MOInvariant : MachineMemOperand::MONone, RLI.AAInfo, RLI.Ranges); spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); } else if (Subtarget.hasLFIWAX() && canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) { MachineMemOperand *MMO = MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, RLI.Alignment, RLI.AAInfo, RLI.Ranges); SDValue Ops[] = { RLI.Chain, RLI.Ptr }; Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl, DAG.getVTList(MVT::f64, MVT::Other), Ops, MVT::i32, MMO); spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); } else if (Subtarget.hasFPCVT() && canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) { MachineMemOperand *MMO = MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, RLI.Alignment, RLI.AAInfo, RLI.Ranges); SDValue Ops[] = { RLI.Chain, RLI.Ptr }; Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl, DAG.getVTList(MVT::f64, MVT::Other), Ops, MVT::i32, MMO); spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); } else if (((Subtarget.hasLFIWAX() && SINT.getOpcode() == ISD::SIGN_EXTEND) || (Subtarget.hasFPCVT() && SINT.getOpcode() == ISD::ZERO_EXTEND)) && SINT.getOperand(0).getValueType() == MVT::i32) { MachineFrameInfo *FrameInfo = MF.getFrameInfo(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); int FrameIdx = FrameInfo->CreateStackObject(4, 4, false); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx, MachinePointerInfo::getFixedStack( DAG.getMachineFunction(), FrameIdx)); assert(cast(Store)->getMemoryVT() == MVT::i32 && "Expected an i32 store"); RLI.Ptr = FIdx; RLI.Chain = Store; RLI.MPI = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); RLI.Alignment = 4; MachineMemOperand *MMO = MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, RLI.Alignment, RLI.AAInfo, RLI.Ranges); SDValue Ops[] = { RLI.Chain, RLI.Ptr }; Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ? PPCISD::LFIWZX : PPCISD::LFIWAX, dl, DAG.getVTList(MVT::f64, MVT::Other), Ops, MVT::i32, MMO); } else Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT); SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits); if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0, dl)); return FP; } assert(Op.getOperand(0).getValueType() == MVT::i32 && "Unhandled INT_TO_FP type in custom expander!"); // Since we only generate this in 64-bit mode, we can take advantage of // 64-bit registers. In particular, sign extend the input value into the // 64-bit register with extsw, store the WHOLE 64-bit value into the stack // then lfd it and fcfid it. MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *FrameInfo = MF.getFrameInfo(); EVT PtrVT = getPointerTy(MF.getDataLayout()); SDValue Ld; if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) { ReuseLoadInfo RLI; bool ReusingLoad; if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI, DAG))) { int FrameIdx = FrameInfo->CreateStackObject(4, 4, false); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, MachinePointerInfo::getFixedStack( DAG.getMachineFunction(), FrameIdx)); assert(cast(Store)->getMemoryVT() == MVT::i32 && "Expected an i32 store"); RLI.Ptr = FIdx; RLI.Chain = Store; RLI.MPI = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); RLI.Alignment = 4; } MachineMemOperand *MMO = MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, RLI.Alignment, RLI.AAInfo, RLI.Ranges); SDValue Ops[] = { RLI.Chain, RLI.Ptr }; Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::LFIWZX : PPCISD::LFIWAX, dl, DAG.getVTList(MVT::f64, MVT::Other), Ops, MVT::i32, MMO); if (ReusingLoad) spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG); } else { assert(Subtarget.isPPC64() && "i32->FP without LFIWAX supported only on PPC64"); int FrameIdx = FrameInfo->CreateStackObject(8, 8, false); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Op.getOperand(0)); // STD the extended value into the stack slot. SDValue Store = DAG.getStore( DAG.getEntryNode(), dl, Ext64, FIdx, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx)); // Load the value as a double. Ld = DAG.getLoad( MVT::f64, dl, Store, FIdx, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx)); } // FCFID it and return it. SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld); if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0, dl)); return FP; } SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); /* The rounding mode is in bits 30:31 of FPSR, and has the following settings: 00 Round to nearest 01 Round to 0 10 Round to +inf 11 Round to -inf FLT_ROUNDS, on the other hand, expects the following: -1 Undefined 0 Round to 0 1 Round to nearest 2 Round to +inf 3 Round to -inf To perform the conversion, we do: ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1)) */ MachineFunction &MF = DAG.getMachineFunction(); EVT VT = Op.getValueType(); EVT PtrVT = getPointerTy(MF.getDataLayout()); // Save FP Control Word to register EVT NodeTys[] = { MVT::f64, // return register MVT::Glue // unused in this context }; SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None); // Save FP register to stack slot int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, StackSlot, MachinePointerInfo()); // Load FP Control Word from low 32 bits of stack slot. SDValue Four = DAG.getConstant(4, dl, PtrVT); SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four); SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo()); // Transform as necessary SDValue CWD1 = DAG.getNode(ISD::AND, dl, MVT::i32, CWD, DAG.getConstant(3, dl, MVT::i32)); SDValue CWD2 = DAG.getNode(ISD::SRL, dl, MVT::i32, DAG.getNode(ISD::AND, dl, MVT::i32, DAG.getNode(ISD::XOR, dl, MVT::i32, CWD, DAG.getConstant(3, dl, MVT::i32)), DAG.getConstant(3, dl, MVT::i32)), DAG.getConstant(1, dl, MVT::i32)); SDValue RetVal = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2); return DAG.getNode((VT.getSizeInBits() < 16 ? ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal); } SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); unsigned BitWidth = VT.getSizeInBits(); SDLoc dl(Op); assert(Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && "Unexpected SHL!"); // Expand into a bunch of logical ops. Note that these ops // depend on the PPC behavior for oversized shift amounts. SDValue Lo = Op.getOperand(0); SDValue Hi = Op.getOperand(1); SDValue Amt = Op.getOperand(2); EVT AmtVT = Amt.getValueType(); SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Amt); SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt); SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1); SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3); SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, DAG.getConstant(-BitWidth, dl, AmtVT)); SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5); SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt); SDValue OutOps[] = { OutLo, OutHi }; return DAG.getMergeValues(OutOps, dl); } SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc dl(Op); unsigned BitWidth = VT.getSizeInBits(); assert(Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && "Unexpected SRL!"); // Expand into a bunch of logical ops. Note that these ops // depend on the PPC behavior for oversized shift amounts. SDValue Lo = Op.getOperand(0); SDValue Hi = Op.getOperand(1); SDValue Amt = Op.getOperand(2); EVT AmtVT = Amt.getValueType(); SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Amt); SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, DAG.getConstant(-BitWidth, dl, AmtVT)); SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5); SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt); SDValue OutOps[] = { OutLo, OutHi }; return DAG.getMergeValues(OutOps, dl); } SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); EVT VT = Op.getValueType(); unsigned BitWidth = VT.getSizeInBits(); assert(Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && "Unexpected SRA!"); // Expand into a bunch of logical ops, followed by a select_cc. SDValue Lo = Op.getOperand(0); SDValue Hi = Op.getOperand(1); SDValue Amt = Op.getOperand(2); EVT AmtVT = Amt.getValueType(); SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Amt); SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, DAG.getConstant(-BitWidth, dl, AmtVT)); SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5); SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt); SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT), Tmp4, Tmp6, ISD::SETLE); SDValue OutOps[] = { OutLo, OutHi }; return DAG.getMergeValues(OutOps, dl); } //===----------------------------------------------------------------------===// // Vector related lowering. // /// BuildSplatI - Build a canonical splati of Val with an element size of /// SplatSize. Cast the result to VT. static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl) { assert(Val >= -16 && Val <= 15 && "vsplti is out of range!"); static const MVT VTys[] = { // canonical VT to use for each size. MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32 }; EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1]; // Force vspltis[hw] -1 to vspltisb -1 to canonicalize. if (Val == -1) SplatSize = 1; EVT CanonicalVT = VTys[SplatSize-1]; // Build a canonical splat for this value. return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT)); } /// BuildIntrinsicOp - Return a unary operator intrinsic node with the /// specified intrinsic ID. static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT = MVT::Other) { if (DestVT == MVT::Other) DestVT = Op.getValueType(); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, DAG.getConstant(IID, dl, MVT::i32), Op); } /// BuildIntrinsicOp - Return a binary operator intrinsic node with the /// specified intrinsic ID. static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT = MVT::Other) { if (DestVT == MVT::Other) DestVT = LHS.getValueType(); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, DAG.getConstant(IID, dl, MVT::i32), LHS, RHS); } /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the /// specified intrinsic ID. static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1, SDValue Op2, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT = MVT::Other) { if (DestVT == MVT::Other) DestVT = Op0.getValueType(); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2); } /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified /// amount. The result has the specified value type. static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl) { // Force LHS/RHS to be the right type. LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS); RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS); int Ops[16]; for (unsigned i = 0; i != 16; ++i) Ops[i] = i + Amt; SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops); return DAG.getNode(ISD::BITCAST, dl, VT, T); } // If this is a case we can't handle, return null and let the default // expansion code take care of it. If we CAN select this case, and if it // selects to a single instruction, return Op. Otherwise, if we can codegen // this case more efficiently than a constant pool load, lower it to the // sequence of ops that should be used. SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); BuildVectorSDNode *BVN = dyn_cast(Op.getNode()); assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"); if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) { // We first build an i32 vector, load it into a QPX register, // then convert it to a floating-point vector and compare it // to a zero vector to get the boolean result. MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); assert(BVN->getNumOperands() == 4 && "BUILD_VECTOR for v4i1 does not have 4 operands"); bool IsConst = true; for (unsigned i = 0; i < 4; ++i) { if (BVN->getOperand(i).isUndef()) continue; if (!isa(BVN->getOperand(i))) { IsConst = false; break; } } if (IsConst) { Constant *One = ConstantFP::get(Type::getFloatTy(*DAG.getContext()), 1.0); Constant *NegOne = ConstantFP::get(Type::getFloatTy(*DAG.getContext()), -1.0); Constant *CV[4]; for (unsigned i = 0; i < 4; ++i) { if (BVN->getOperand(i).isUndef()) CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext())); else if (isNullConstant(BVN->getOperand(i))) CV[i] = NegOne; else CV[i] = One; } Constant *CP = ConstantVector::get(CV); SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()), 16 /* alignment */); SDValue Ops[] = {DAG.getEntryNode(), CPIdx}; SDVTList VTs = DAG.getVTList({MVT::v4i1, /*chain*/ MVT::Other}); return DAG.getMemIntrinsicNode( PPCISD::QVLFSb, dl, VTs, Ops, MVT::v4f32, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); } SmallVector Stores; for (unsigned i = 0; i < 4; ++i) { if (BVN->getOperand(i).isUndef()) continue; unsigned Offset = 4*i; SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType()); Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize(); if (StoreSize > 4) { Stores.push_back( DAG.getTruncStore(DAG.getEntryNode(), dl, BVN->getOperand(i), Idx, PtrInfo.getWithOffset(Offset), MVT::i32)); } else { SDValue StoreValue = BVN->getOperand(i); if (StoreSize < 4) StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue); Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, StoreValue, Idx, PtrInfo.getWithOffset(Offset))); } } SDValue StoreChain; if (!Stores.empty()) StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); else StoreChain = DAG.getEntryNode(); // Now load from v4i32 into the QPX register; this will extend it to // v4i64 but not yet convert it to a floating point. Nevertheless, this // is typed as v4f64 because the QPX register integer states are not // explicitly represented. SDValue Ops[] = {StoreChain, DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, dl, MVT::i32), FIdx}; SDVTList VTs = DAG.getVTList({MVT::v4f64, /*chain*/ MVT::Other}); SDValue LoadedVect = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, VTs, Ops, MVT::v4i32, PtrInfo); LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, dl, MVT::i32), LoadedVect); SDValue FPZeros = DAG.getConstantFP(0.0, dl, MVT::v4f64); return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ); } // All other QPX vectors are handled by generic code. if (Subtarget.hasQPX()) return SDValue(); // Check if this is a splat of a constant value. APInt APSplatBits, APSplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0, !Subtarget.isLittleEndian()) || SplatBitSize > 32) return SDValue(); unsigned SplatBits = APSplatBits.getZExtValue(); unsigned SplatUndef = APSplatUndef.getZExtValue(); unsigned SplatSize = SplatBitSize / 8; // First, handle single instruction cases. // All zeros? if (SplatBits == 0) { // Canonicalize all zero vectors to be v4i32. if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) { SDValue Z = DAG.getConstant(0, dl, MVT::v4i32); Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z); } return Op; } // If the sign extended value is in the range [-16,15], use VSPLTI[bhw]. int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >> (32-SplatBitSize)); if (SextVal >= -16 && SextVal <= 15) return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl); // Two instruction sequences. // If this value is in the range [-32,30] and is even, use: // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2) // If this value is in the range [17,31] and is odd, use: // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16) // If this value is in the range [-31,-17] and is odd, use: // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16) // Note the last two are three-instruction sequences. if (SextVal >= -32 && SextVal <= 31) { // To avoid having these optimizations undone by constant folding, // we convert to a pseudo that will be expanded later into one of // the above forms. SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32); EVT VT = (SplatSize == 1 ? MVT::v16i8 : (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32)); SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32); SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize); if (VT == Op.getValueType()) return RetVal; else return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal); } // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important // for fneg/fabs. if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) { // Make -1 and vspltisw -1: SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl); // Make the VSLW intrinsic, computing 0x8000_0000. SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV, OnesV, DAG, dl); // xor by OnesV to invert it. Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV); return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); } // Check to see if this is a wide variety of vsplti*, binop self cases. static const signed char SplatCsts[] = { -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16 }; for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) { // Indirect through the SplatCsts array so that we favor 'vsplti -1' for // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1' int i = SplatCsts[idx]; // Figure out what shift amount will be used by altivec if shifted by i in // this splat size. unsigned TypeShiftAmt = i & (SplatBitSize-1); // vsplti + shl self. if (SextVal == (int)((unsigned)i << TypeShiftAmt)) { SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); static const unsigned IIDs[] = { // Intrinsic to use for each size. Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, Intrinsic::ppc_altivec_vslw }; Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); } // vsplti + srl self. if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); static const unsigned IIDs[] = { // Intrinsic to use for each size. Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0, Intrinsic::ppc_altivec_vsrw }; Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); } // vsplti + sra self. if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); static const unsigned IIDs[] = { // Intrinsic to use for each size. Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0, Intrinsic::ppc_altivec_vsraw }; Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); } // vsplti + rol self. if (SextVal == (int)(((unsigned)i << TypeShiftAmt) | ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) { SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); static const unsigned IIDs[] = { // Intrinsic to use for each size. Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0, Intrinsic::ppc_altivec_vrlw }; Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); } // t = vsplti c, result = vsldoi t, t, 1 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) { SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1; return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); } // t = vsplti c, result = vsldoi t, t, 2 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) { SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2; return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); } // t = vsplti c, result = vsldoi t, t, 3 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) { SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3; return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); } } return SDValue(); } /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit /// the specified operations to build the shuffle. static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl) { unsigned OpNum = (PFEntry >> 26) & 0x0F; unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); enum { OP_COPY = 0, // Copy, used for things like to say it is <0,1,2,3> OP_VMRGHW, OP_VMRGLW, OP_VSPLTISW0, OP_VSPLTISW1, OP_VSPLTISW2, OP_VSPLTISW3, OP_VSLDOI4, OP_VSLDOI8, OP_VSLDOI12 }; if (OpNum == OP_COPY) { if (LHSID == (1*9+2)*9+3) return LHS; assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); return RHS; } SDValue OpLHS, OpRHS; OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); int ShufIdxs[16]; switch (OpNum) { default: llvm_unreachable("Unknown i32 permute!"); case OP_VMRGHW: ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3; ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19; ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7; ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23; break; case OP_VMRGLW: ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11; ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27; ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15; ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31; break; case OP_VSPLTISW0: for (unsigned i = 0; i != 16; ++i) ShufIdxs[i] = (i&3)+0; break; case OP_VSPLTISW1: for (unsigned i = 0; i != 16; ++i) ShufIdxs[i] = (i&3)+4; break; case OP_VSPLTISW2: for (unsigned i = 0; i != 16; ++i) ShufIdxs[i] = (i&3)+8; break; case OP_VSPLTISW3: for (unsigned i = 0; i != 16; ++i) ShufIdxs[i] = (i&3)+12; break; case OP_VSLDOI4: return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl); case OP_VSLDOI8: return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl); case OP_VSLDOI12: return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl); } EVT VT = OpLHS.getValueType(); OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS); OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS); SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs); return DAG.getNode(ISD::BITCAST, dl, VT, T); } /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this /// is a shuffle we can handle in a single instruction, return it. Otherwise, /// return the code it can be lowered into. Worst case, it can always be /// lowered into a vperm. SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); ShuffleVectorSDNode *SVOp = cast(Op); EVT VT = Op.getValueType(); bool isLittleEndian = Subtarget.isLittleEndian(); unsigned ShiftElts, InsertAtByte; bool Swap; if (Subtarget.hasP9Vector() && PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap, isLittleEndian)) { if (Swap) std::swap(V1, V2); SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2); if (ShiftElts) { SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2, DAG.getConstant(ShiftElts, dl, MVT::i32)); SDValue Ins = DAG.getNode(PPCISD::XXINSERT, dl, MVT::v4i32, Conv1, Shl, DAG.getConstant(InsertAtByte, dl, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); } SDValue Ins = DAG.getNode(PPCISD::XXINSERT, dl, MVT::v4i32, Conv1, Conv2, DAG.getConstant(InsertAtByte, dl, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); } if (Subtarget.hasVSX()) { if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) { int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG); SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv, DAG.getConstant(SplatIdx, dl, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat); } // Left shifts of 8 bytes are actually swaps. Convert accordingly. if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) { SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1); SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv); return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap); } } if (Subtarget.hasQPX()) { if (VT.getVectorNumElements() != 4) return SDValue(); if (V2.isUndef()) V2 = V1; int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp); if (AlignIdx != -1) { return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2, DAG.getConstant(AlignIdx, dl, MVT::i32)); } else if (SVOp->isSplat()) { int SplatIdx = SVOp->getSplatIndex(); if (SplatIdx >= 4) { std::swap(V1, V2); SplatIdx -= 4; } return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1, DAG.getConstant(SplatIdx, dl, MVT::i32)); } // Lower this into a qvgpci/qvfperm pair. // Compute the qvgpci literal unsigned idx = 0; for (unsigned i = 0; i < 4; ++i) { int m = SVOp->getMaskElt(i); unsigned mm = m >= 0 ? (unsigned) m : i; idx |= mm << (3-i)*3; } SDValue V3 = DAG.getNode(PPCISD::QVGPCI, dl, MVT::v4f64, DAG.getConstant(idx, dl, MVT::i32)); return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3); } // Cases that are handled by instructions that take permute immediates // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be // selected by the instruction selector. if (V2.isUndef()) { if (PPC::isSplatShuffleMask(SVOp, 1) || PPC::isSplatShuffleMask(SVOp, 2) || PPC::isSplatShuffleMask(SVOp, 4) || PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) || PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) || PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 || PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) || PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) || PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) || PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) || PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) || PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) || (Subtarget.hasP8Altivec() && ( PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) || PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) || PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) { return Op; } } // Altivec has a variety of "shuffle immediates" that take two vector inputs // and produce a fixed permutation. If any of these match, do not lower to // VPERM. unsigned int ShuffleKind = isLittleEndian ? 2 : 0; if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) || PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) || PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 || PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) || PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) || PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) || PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) || PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) || PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) || (Subtarget.hasP8Altivec() && ( PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) || PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) || PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG)))) return Op; // Check to see if this is a shuffle of 4-byte values. If so, we can use our // perfect shuffle table to emit an optimal matching sequence. ArrayRef PermMask = SVOp->getMask(); unsigned PFIndexes[4]; bool isFourElementShuffle = true; for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number unsigned EltNo = 8; // Start out undef. for (unsigned j = 0; j != 4; ++j) { // Intra-element byte. if (PermMask[i*4+j] < 0) continue; // Undef, ignore it. unsigned ByteSource = PermMask[i*4+j]; if ((ByteSource & 3) != j) { isFourElementShuffle = false; break; } if (EltNo == 8) { EltNo = ByteSource/4; } else if (EltNo != ByteSource/4) { isFourElementShuffle = false; break; } } PFIndexes[i] = EltNo; } // If this shuffle can be expressed as a shuffle of 4-byte elements, use the // perfect shuffle vector to determine if it is cost effective to do this as // discrete instructions, or whether we should use a vperm. // For now, we skip this for little endian until such time as we have a // little-endian perfect shuffle table. if (isFourElementShuffle && !isLittleEndian) { // Compute the index in the perfect shuffle table. unsigned PFTableIndex = PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; unsigned Cost = (PFEntry >> 30); // Determining when to avoid vperm is tricky. Many things affect the cost // of vperm, particularly how many times the perm mask needs to be computed. // For example, if the perm mask can be hoisted out of a loop or is already // used (perhaps because there are multiple permutes with the same shuffle // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of // the loop requires an extra register. // // As a compromise, we only emit discrete instructions if the shuffle can be // generated in 3 or fewer operations. When we have loop information // available, if this block is within a loop, we should avoid using vperm // for 3-operation perms and use a constant pool load instead. if (Cost < 3) return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); } // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant // vector that will get spilled to the constant pool. if (V2.isUndef()) V2 = V1; // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except // that it is in input element units, not in bytes. Convert now. // For little endian, the order of the input vectors is reversed, and // the permutation mask is complemented with respect to 31. This is // necessary to produce proper semantics with the big-endian-biased vperm // instruction. EVT EltVT = V1.getValueType().getVectorElementType(); unsigned BytesPerElement = EltVT.getSizeInBits()/8; SmallVector ResultMask; for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i]; for (unsigned j = 0; j != BytesPerElement; ++j) if (isLittleEndian) ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j), dl, MVT::i32)); else ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl, MVT::i32)); } SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask); if (isLittleEndian) return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V2, V1, VPermMask); else return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V1, V2, VPermMask); } /// getVectorCompareInfo - Given an intrinsic, return false if it is not a /// vector comparison. If it is, return true and fill in Opc/isDot with /// information about the intrinsic. static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget) { unsigned IntrinsicID = cast(Intrin.getOperand(0))->getZExtValue(); CompareOpc = -1; isDot = false; switch (IntrinsicID) { default: return false; // Comparison predicates. case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpequd_p: if (Subtarget.hasP8Altivec()) { CompareOpc = 199; isDot = 1; } else return false; break; case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtsd_p: if (Subtarget.hasP8Altivec()) { CompareOpc = 967; isDot = 1; } else return false; break; case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtud_p: if (Subtarget.hasP8Altivec()) { CompareOpc = 711; isDot = 1; } else return false; break; // VSX predicate comparisons use the same infrastructure case Intrinsic::ppc_vsx_xvcmpeqdp_p: case Intrinsic::ppc_vsx_xvcmpgedp_p: case Intrinsic::ppc_vsx_xvcmpgtdp_p: case Intrinsic::ppc_vsx_xvcmpeqsp_p: case Intrinsic::ppc_vsx_xvcmpgesp_p: case Intrinsic::ppc_vsx_xvcmpgtsp_p: if (Subtarget.hasVSX()) { switch (IntrinsicID) { case Intrinsic::ppc_vsx_xvcmpeqdp_p: CompareOpc = 99; break; case Intrinsic::ppc_vsx_xvcmpgedp_p: CompareOpc = 115; break; case Intrinsic::ppc_vsx_xvcmpgtdp_p: CompareOpc = 107; break; case Intrinsic::ppc_vsx_xvcmpeqsp_p: CompareOpc = 67; break; case Intrinsic::ppc_vsx_xvcmpgesp_p: CompareOpc = 83; break; case Intrinsic::ppc_vsx_xvcmpgtsp_p: CompareOpc = 75; break; } isDot = 1; } else return false; break; // Normal Comparisons. case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpequd: if (Subtarget.hasP8Altivec()) { CompareOpc = 199; isDot = 0; } else return false; break; case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtsd: if (Subtarget.hasP8Altivec()) { CompareOpc = 967; isDot = 0; } else return false; break; case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtud: if (Subtarget.hasP8Altivec()) { CompareOpc = 711; isDot = 0; } else return false; break; } return true; } /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom /// lower, do it, otherwise return null. SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { unsigned IntrinsicID = cast(Op.getOperand(0))->getZExtValue(); if (IntrinsicID == Intrinsic::thread_pointer) { // Reads the thread pointer register, used for __builtin_thread_pointer. bool is64bit = Subtarget.isPPC64(); return DAG.getRegister(is64bit ? PPC::X13 : PPC::R2, is64bit ? MVT::i64 : MVT::i32); } // If this is a lowered altivec predicate compare, CompareOpc is set to the // opcode number of the comparison. SDLoc dl(Op); int CompareOpc; bool isDot; if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget)) return SDValue(); // Don't custom lower most intrinsics. // If this is a non-dot comparison, make the VCMP node and we are done. if (!isDot) { SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(), Op.getOperand(1), Op.getOperand(2), DAG.getConstant(CompareOpc, dl, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp); } // Create the PPCISD altivec 'dot' comparison node. SDValue Ops[] = { Op.getOperand(2), // LHS Op.getOperand(3), // RHS DAG.getConstant(CompareOpc, dl, MVT::i32) }; EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue }; SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops); // Now that we have the comparison, emit a copy from the CR to a GPR. // This is flagged to the above dot comparison. SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32, DAG.getRegister(PPC::CR6, MVT::i32), CompNode.getValue(1)); // Unpack the result based on how the target uses it. unsigned BitNo; // Bit # of CR6. bool InvertBit; // Invert result? switch (cast(Op.getOperand(1))->getZExtValue()) { default: // Can't happen, don't crash on invalid number though. case 0: // Return the value of the EQ bit of CR6. BitNo = 0; InvertBit = false; break; case 1: // Return the inverted value of the EQ bit of CR6. BitNo = 0; InvertBit = true; break; case 2: // Return the value of the LT bit of CR6. BitNo = 2; InvertBit = false; break; case 3: // Return the inverted value of the LT bit of CR6. BitNo = 2; InvertBit = true; break; } // Shift the bit into the low position. Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags, DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32)); // Isolate the bit. Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags, DAG.getConstant(1, dl, MVT::i32)); // If we are supposed to, toggle the bit. if (InvertBit) Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags, DAG.getConstant(1, dl, MVT::i32)); return Flags; } SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int // instructions), but for smaller types, we need to first extend up to v2i32 // before doing going farther. if (Op.getValueType() == MVT::v2i64) { EVT ExtVT = cast(Op.getOperand(1))->getVT(); if (ExtVT != MVT::v2i32) { Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)); Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op, DAG.getValueType(EVT::getVectorVT(*DAG.getContext(), ExtVT.getVectorElementType(), 4))); Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op); Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op, DAG.getValueType(MVT::v2i32)); } return Op; } return SDValue(); } SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); // Create a stack slot that is 16-byte aligned. MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); // Store the input value into Value#0 of the stack slot. SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, MachinePointerInfo()); // Load it out. return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo()); } SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); SDNode *N = Op.getNode(); assert(N->getOperand(0).getValueType() == MVT::v4i1 && "Unknown extract_vector_elt type"); SDValue Value = N->getOperand(0); // The first part of this is like the store lowering except that we don't // need to track the chain. // The values are now known to be -1 (false) or 1 (true). To convert this // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to // understand how to form the extending load. SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64); Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); // Now convert to an integer and store. Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32), Value); MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); SDValue StoreChain = DAG.getEntryNode(); SDValue Ops[] = {StoreChain, DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32), Value, FIdx}; SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other); StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, VTs, Ops, MVT::v4i32, PtrInfo); // Extract the value requested. unsigned Offset = 4*cast(N->getOperand(1))->getZExtValue(); SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType()); Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); SDValue IntVal = DAG.getLoad(MVT::i32, dl, StoreChain, Idx, PtrInfo.getWithOffset(Offset)); if (!Subtarget.useCRBits()) return IntVal; return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal); } /// Lowering for QPX v4i1 loads SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); LoadSDNode *LN = cast(Op.getNode()); SDValue LoadChain = LN->getChain(); SDValue BasePtr = LN->getBasePtr(); if (Op.getValueType() == MVT::v4f64 || Op.getValueType() == MVT::v4f32) { EVT MemVT = LN->getMemoryVT(); unsigned Alignment = LN->getAlignment(); // If this load is properly aligned, then it is legal. if (Alignment >= MemVT.getStoreSize()) return Op; EVT ScalarVT = Op.getValueType().getScalarType(), ScalarMemVT = MemVT.getScalarType(); unsigned Stride = ScalarMemVT.getStoreSize(); SDValue Vals[4], LoadChains[4]; for (unsigned Idx = 0; Idx < 4; ++Idx) { SDValue Load; if (ScalarVT != ScalarMemVT) Load = DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain, BasePtr, LN->getPointerInfo().getWithOffset(Idx * Stride), ScalarMemVT, MinAlign(Alignment, Idx * Stride), LN->getMemOperand()->getFlags(), LN->getAAInfo()); else Load = DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr, LN->getPointerInfo().getWithOffset(Idx * Stride), MinAlign(Alignment, Idx * Stride), LN->getMemOperand()->getFlags(), LN->getAAInfo()); if (Idx == 0 && LN->isIndexed()) { assert(LN->getAddressingMode() == ISD::PRE_INC && "Unknown addressing mode on vector load"); Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(), LN->getAddressingMode()); } Vals[Idx] = Load; LoadChains[Idx] = Load.getValue(1); BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, DAG.getConstant(Stride, dl, BasePtr.getValueType())); } SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); SDValue Value = DAG.getBuildVector(Op.getValueType(), dl, Vals); if (LN->isIndexed()) { SDValue RetOps[] = { Value, Vals[0].getValue(1), TF }; return DAG.getMergeValues(RetOps, dl); } SDValue RetOps[] = { Value, TF }; return DAG.getMergeValues(RetOps, dl); } assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower"); assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported"); // To lower v4i1 from a byte array, we load the byte elements of the // vector and then reuse the BUILD_VECTOR logic. SDValue VectElmts[4], VectElmtChains[4]; for (unsigned i = 0; i < 4; ++i) { SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType()); Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx); VectElmts[i] = DAG.getExtLoad( ISD::EXTLOAD, dl, MVT::i32, LoadChain, Idx, LN->getPointerInfo().getWithOffset(i), MVT::i8, /* Alignment = */ 1, LN->getMemOperand()->getFlags(), LN->getAAInfo()); VectElmtChains[i] = VectElmts[i].getValue(1); } LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains); SDValue Value = DAG.getBuildVector(MVT::v4i1, dl, VectElmts); SDValue RVals[] = { Value, LoadChain }; return DAG.getMergeValues(RVals, dl); } /// Lowering for QPX v4i1 stores SDValue PPCTargetLowering::LowerVectorStore(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); StoreSDNode *SN = cast(Op.getNode()); SDValue StoreChain = SN->getChain(); SDValue BasePtr = SN->getBasePtr(); SDValue Value = SN->getValue(); if (Value.getValueType() == MVT::v4f64 || Value.getValueType() == MVT::v4f32) { EVT MemVT = SN->getMemoryVT(); unsigned Alignment = SN->getAlignment(); // If this store is properly aligned, then it is legal. if (Alignment >= MemVT.getStoreSize()) return Op; EVT ScalarVT = Value.getValueType().getScalarType(), ScalarMemVT = MemVT.getScalarType(); unsigned Stride = ScalarMemVT.getStoreSize(); SDValue Stores[4]; for (unsigned Idx = 0; Idx < 4; ++Idx) { SDValue Ex = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value, DAG.getConstant(Idx, dl, getVectorIdxTy(DAG.getDataLayout()))); SDValue Store; if (ScalarVT != ScalarMemVT) Store = DAG.getTruncStore(StoreChain, dl, Ex, BasePtr, SN->getPointerInfo().getWithOffset(Idx * Stride), ScalarMemVT, MinAlign(Alignment, Idx * Stride), SN->getMemOperand()->getFlags(), SN->getAAInfo()); else Store = DAG.getStore(StoreChain, dl, Ex, BasePtr, SN->getPointerInfo().getWithOffset(Idx * Stride), MinAlign(Alignment, Idx * Stride), SN->getMemOperand()->getFlags(), SN->getAAInfo()); if (Idx == 0 && SN->isIndexed()) { assert(SN->getAddressingMode() == ISD::PRE_INC && "Unknown addressing mode on vector store"); Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(), SN->getAddressingMode()); } BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, DAG.getConstant(Stride, dl, BasePtr.getValueType())); Stores[Idx] = Store; } SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); if (SN->isIndexed()) { SDValue RetOps[] = { TF, Stores[0].getValue(1) }; return DAG.getMergeValues(RetOps, dl); } return TF; } assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported"); assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower"); // The values are now known to be -1 (false) or 1 (true). To convert this // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to // understand how to form the extending load. SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64); Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); // Now convert to an integer and store. Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32), Value); MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); SDValue Ops[] = {StoreChain, DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32), Value, FIdx}; SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other); StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, VTs, Ops, MVT::v4i32, PtrInfo); // Move data into the byte array. SDValue Loads[4], LoadChains[4]; for (unsigned i = 0; i < 4; ++i) { unsigned Offset = 4*i; SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType()); Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); Loads[i] = DAG.getLoad(MVT::i32, dl, StoreChain, Idx, PtrInfo.getWithOffset(Offset)); LoadChains[i] = Loads[i].getValue(1); } StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); SDValue Stores[4]; for (unsigned i = 0; i < 4; ++i) { SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType()); Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx); Stores[i] = DAG.getTruncStore( StoreChain, dl, Loads[i], Idx, SN->getPointerInfo().getWithOffset(i), MVT::i8, /* Alignment = */ 1, SN->getMemOperand()->getFlags(), SN->getAAInfo()); } StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); return StoreChain; } SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); if (Op.getValueType() == MVT::v4i32) { SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl); SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt. SDValue RHSSwap = // = vrlw RHS, 16 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl); // Shrinkify inputs to v8i16. LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS); RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS); RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap); // Low parts multiplied together, generating 32-bit results (we ignore the // top parts). SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh, LHS, RHS, DAG, dl, MVT::v4i32); SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm, LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32); // Shift the high parts up 16 bits. HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, Neg16, DAG, dl); return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd); } else if (Op.getValueType() == MVT::v8i16) { SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl); return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm, LHS, RHS, Zero, DAG, dl); } else if (Op.getValueType() == MVT::v16i8) { SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); bool isLittleEndian = Subtarget.isLittleEndian(); // Multiply the even 8-bit parts, producing 16-bit sums. SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub, LHS, RHS, DAG, dl, MVT::v8i16); EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts); // Multiply the odd 8-bit parts, producing 16-bit sums. SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub, LHS, RHS, DAG, dl, MVT::v8i16); OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts); // Merge the results together. Because vmuleub and vmuloub are // instructions with a big-endian bias, we must reverse the // element numbering and reverse the meaning of "odd" and "even" // when generating little endian code. int Ops[16]; for (unsigned i = 0; i != 8; ++i) { if (isLittleEndian) { Ops[i*2 ] = 2*i; Ops[i*2+1] = 2*i+16; } else { Ops[i*2 ] = 2*i+1; Ops[i*2+1] = 2*i+1+16; } } if (isLittleEndian) return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops); else return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops); } else { llvm_unreachable("Unknown mul to lower!"); } } /// LowerOperation - Provide custom lowering hooks for some operations. /// SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Wasn't expecting to be able to lower this!"); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::VAARG: return LowerVAARG(Op, DAG); case ISD::VACOPY: return LowerVACOPY(Op, DAG); case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::GET_DYNAMIC_AREA_OFFSET: return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG); + + case ISD::EH_DWARF_CFA: + return LowerEH_DWARF_CFA(Op, DAG); case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG); case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG); case ISD::LOAD: return LowerLOAD(Op, DAG); case ISD::STORE: return LowerSTORE(Op, DAG); case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::FP_TO_UINT: case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op)); case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG); case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); // Lower 64-bit shifts. case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG); case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG); case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG); // Vector-related lowering. case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); // For counter-based loop handling. case ISD::INTRINSIC_W_CHAIN: return SDValue(); // Frame & Return address. case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); } } void PPCTargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl&Results, SelectionDAG &DAG) const { SDLoc dl(N); switch (N->getOpcode()) { default: llvm_unreachable("Do not know how to custom type legalize this operation!"); case ISD::READCYCLECOUNTER: { SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0)); Results.push_back(RTB); Results.push_back(RTB.getValue(1)); Results.push_back(RTB.getValue(2)); break; } case ISD::INTRINSIC_W_CHAIN: { if (cast(N->getOperand(1))->getZExtValue() != Intrinsic::ppc_is_decremented_ctr_nonzero) break; assert(N->getValueType(0) == MVT::i1 && "Unexpected result type for CTR decrement intrinsic"); EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), N->getValueType(0)); SDVTList VTs = DAG.getVTList(SVT, MVT::Other); SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0), N->getOperand(1)); Results.push_back(NewInt); Results.push_back(NewInt.getValue(1)); break; } case ISD::VAARG: { if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64()) return; EVT VT = N->getValueType(0); if (VT == MVT::i64) { SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG); Results.push_back(NewNode); Results.push_back(NewNode.getValue(1)); } return; } case ISD::FP_ROUND_INREG: { assert(N->getValueType(0) == MVT::ppcf128); assert(N->getOperand(0).getValueType() == MVT::ppcf128); SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, N->getOperand(0), DAG.getIntPtrConstant(0, dl)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, N->getOperand(0), DAG.getIntPtrConstant(1, dl)); // Add the two halves of the long double in round-to-zero mode. SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi); // We know the low half is about to be thrown away, so just use something // convenient. Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128, FPreg, FPreg)); return; } case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: // LowerFP_TO_INT() can only handle f32 and f64. if (N->getOperand(0).getValueType() == MVT::ppcf128) return; Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl)); return; } } //===----------------------------------------------------------------------===// // Other Lowering Code //===----------------------------------------------------------------------===// static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); Function *Func = Intrinsic::getDeclaration(M, Id); return Builder.CreateCall(Func, {}); } // The mappings for emitLeading/TrailingFence is taken from // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html Instruction* PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const { if (Ord == AtomicOrdering::SequentiallyConsistent) return callIntrinsic(Builder, Intrinsic::ppc_sync); if (isReleaseOrStronger(Ord)) return callIntrinsic(Builder, Intrinsic::ppc_lwsync); return nullptr; } Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const { if (IsLoad && isAcquireOrStronger(Ord)) return callIntrinsic(Builder, Intrinsic::ppc_lwsync); // FIXME: this is too conservative, a dependent branch + isync is enough. // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification. return nullptr; } MachineBasicBlock * PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, unsigned AtomicSize, unsigned BinOpcode) const { // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. const TargetInstrInfo *TII = Subtarget.getInstrInfo(); auto LoadMnemonic = PPC::LDARX; auto StoreMnemonic = PPC::STDCX; switch (AtomicSize) { default: llvm_unreachable("Unexpected size of atomic entity"); case 1: LoadMnemonic = PPC::LBARX; StoreMnemonic = PPC::STBCX; assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4"); break; case 2: LoadMnemonic = PPC::LHARX; StoreMnemonic = PPC::STHCX; assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4"); break; case 4: LoadMnemonic = PPC::LWARX; StoreMnemonic = PPC::STWCX; break; case 8: LoadMnemonic = PPC::LDARX; StoreMnemonic = PPC::STDCX; break; } const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction *F = BB->getParent(); MachineFunction::iterator It = ++BB->getIterator(); unsigned dest = MI.getOperand(0).getReg(); unsigned ptrA = MI.getOperand(1).getReg(); unsigned ptrB = MI.getOperand(2).getReg(); unsigned incr = MI.getOperand(3).getReg(); DebugLoc dl = MI.getDebugLoc(); MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(It, loopMBB); F->insert(It, exitMBB); exitMBB->splice(exitMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); MachineRegisterInfo &RegInfo = F->getRegInfo(); unsigned TmpReg = (!BinOpcode) ? incr : RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass); // thisMBB: // ... // fallthrough --> loopMBB BB->addSuccessor(loopMBB); // loopMBB: // l[wd]arx dest, ptr // add r0, dest, incr // st[wd]cx. r0, ptr // bne- loopMBB // fallthrough --> exitMBB BB = loopMBB; BuildMI(BB, dl, TII->get(LoadMnemonic), dest) .addReg(ptrA).addReg(ptrB); if (BinOpcode) BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest); BuildMI(BB, dl, TII->get(StoreMnemonic)) .addReg(TmpReg).addReg(ptrA).addReg(ptrB); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); BB->addSuccessor(loopMBB); BB->addSuccessor(exitMBB); // exitMBB: // ... BB = exitMBB; return BB; } MachineBasicBlock * PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, bool is8bit, // operation unsigned BinOpcode) const { // If we support part-word atomic mnemonics, just use them if (Subtarget.hasPartwordAtomics()) return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode); // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. const TargetInstrInfo *TII = Subtarget.getInstrInfo(); // In 64 bit mode we have to use 64 bits for addresses, even though the // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address // registers without caring whether they're 32 or 64, but here we're // doing actual arithmetic on the addresses. bool is64bit = Subtarget.isPPC64(); unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction *F = BB->getParent(); MachineFunction::iterator It = ++BB->getIterator(); unsigned dest = MI.getOperand(0).getReg(); unsigned ptrA = MI.getOperand(1).getReg(); unsigned ptrB = MI.getOperand(2).getReg(); unsigned incr = MI.getOperand(3).getReg(); DebugLoc dl = MI.getDebugLoc(); MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(It, loopMBB); F->insert(It, exitMBB); exitMBB->splice(exitMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); MachineRegisterInfo &RegInfo = F->getRegInfo(); const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; unsigned PtrReg = RegInfo.createVirtualRegister(RC); unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); unsigned ShiftReg = RegInfo.createVirtualRegister(RC); unsigned Incr2Reg = RegInfo.createVirtualRegister(RC); unsigned MaskReg = RegInfo.createVirtualRegister(RC); unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC); unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); unsigned Ptr1Reg; unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC); // thisMBB: // ... // fallthrough --> loopMBB BB->addSuccessor(loopMBB); // The 4-byte load must be aligned, while a char or short may be // anywhere in the word. Hence all this nasty bookkeeping code. // add ptr1, ptrA, ptrB [copy if ptrA==0] // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] // xori shift, shift1, 24 [16] // rlwinm ptr, ptr1, 0, 0, 29 // slw incr2, incr, shift // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] // slw mask, mask2, shift // loopMBB: // lwarx tmpDest, ptr // add tmp, tmpDest, incr2 // andc tmp2, tmpDest, mask // and tmp3, tmp, mask // or tmp4, tmp3, tmp2 // stwcx. tmp4, ptr // bne- loopMBB // fallthrough --> exitMBB // srw dest, tmpDest, shift if (ptrA != ZeroReg) { Ptr1Reg = RegInfo.createVirtualRegister(RC); BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) .addReg(ptrA).addReg(ptrB); } else { Ptr1Reg = ptrB; } BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); if (is64bit) BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) .addReg(Ptr1Reg).addImm(0).addImm(61); else BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg) .addReg(incr).addReg(ShiftReg); if (is8bit) BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); else { BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535); } BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) .addReg(Mask2Reg).addReg(ShiftReg); BB = loopMBB; BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) .addReg(ZeroReg).addReg(PtrReg); if (BinOpcode) BuildMI(BB, dl, TII->get(BinOpcode), TmpReg) .addReg(Incr2Reg).addReg(TmpDestReg); BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg) .addReg(TmpDestReg).addReg(MaskReg); BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg) .addReg(TmpReg).addReg(MaskReg); BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg) .addReg(Tmp3Reg).addReg(Tmp2Reg); BuildMI(BB, dl, TII->get(PPC::STWCX)) .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); BB->addSuccessor(loopMBB); BB->addSuccessor(exitMBB); // exitMBB: // ... BB = exitMBB; BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg) .addReg(ShiftReg); return BB; } llvm::MachineBasicBlock * PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const { DebugLoc DL = MI.getDebugLoc(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); const BasicBlock *BB = MBB->getBasicBlock(); MachineFunction::iterator I = ++MBB->getIterator(); // Memory Reference MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin(); MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end(); unsigned DstReg = MI.getOperand(0).getReg(); const TargetRegisterClass *RC = MRI.getRegClass(DstReg); assert(RC->hasType(MVT::i32) && "Invalid destination!"); unsigned mainDstReg = MRI.createVirtualRegister(RC); unsigned restoreDstReg = MRI.createVirtualRegister(RC); MVT PVT = getPointerTy(MF->getDataLayout()); assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"); // For v = setjmp(buf), we generate // // thisMBB: // SjLjSetup mainMBB // bl mainMBB // v_restore = 1 // b sinkMBB // // mainMBB: // buf[LabelOffset] = LR // v_main = 0 // // sinkMBB: // v = phi(main, restore) // MachineBasicBlock *thisMBB = MBB; MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); MF->insert(I, mainMBB); MF->insert(I, sinkMBB); MachineInstrBuilder MIB; // Transfer the remainder of BB and its successor edges to sinkMBB. sinkMBB->splice(sinkMBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)), MBB->end()); sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); // Note that the structure of the jmp_buf used here is not compatible // with that used by libc, and is not designed to be. Specifically, it // stores only those 'reserved' registers that LLVM does not otherwise // understand how to spill. Also, by convention, by the time this // intrinsic is called, Clang has already stored the frame address in the // first slot of the buffer and stack address in the third. Following the // X86 target code, we'll store the jump address in the second slot. We also // need to save the TOC pointer (R2) to handle jumps between shared // libraries, and that will be stored in the fourth slot. The thread // identifier (R13) is not affected. // thisMBB: const int64_t LabelOffset = 1 * PVT.getStoreSize(); const int64_t TOCOffset = 3 * PVT.getStoreSize(); const int64_t BPOffset = 4 * PVT.getStoreSize(); // Prepare IP either in reg. const TargetRegisterClass *PtrRC = getRegClassFor(PVT); unsigned LabelReg = MRI.createVirtualRegister(PtrRC); unsigned BufReg = MI.getOperand(1).getReg(); if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) { setUsesTOCBasePtr(*MBB->getParent()); MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD)) .addReg(PPC::X2) .addImm(TOCOffset) .addReg(BufReg); MIB.setMemRefs(MMOBegin, MMOEnd); } // Naked functions never have a base pointer, and so we use r1. For all // other functions, this decision must be delayed until during PEI. unsigned BaseReg; if (MF->getFunction()->hasFnAttribute(Attribute::Naked)) BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1; else BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP; MIB = BuildMI(*thisMBB, MI, DL, TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW)) .addReg(BaseReg) .addImm(BPOffset) .addReg(BufReg); MIB.setMemRefs(MMOBegin, MMOEnd); // Setup MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB); const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); MIB.addRegMask(TRI->getNoPreservedMask()); BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1); MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup)) .addMBB(mainMBB); MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB); thisMBB->addSuccessor(mainMBB, BranchProbability::getZero()); thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne()); // mainMBB: // mainDstReg = 0 MIB = BuildMI(mainMBB, DL, TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg); // Store IP if (Subtarget.isPPC64()) { MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD)) .addReg(LabelReg) .addImm(LabelOffset) .addReg(BufReg); } else { MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW)) .addReg(LabelReg) .addImm(LabelOffset) .addReg(BufReg); } MIB.setMemRefs(MMOBegin, MMOEnd); BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0); mainMBB->addSuccessor(sinkMBB); // sinkMBB: BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(PPC::PHI), DstReg) .addReg(mainDstReg).addMBB(mainMBB) .addReg(restoreDstReg).addMBB(thisMBB); MI.eraseFromParent(); return sinkMBB; } MachineBasicBlock * PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const { DebugLoc DL = MI.getDebugLoc(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); // Memory Reference MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin(); MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end(); MVT PVT = getPointerTy(MF->getDataLayout()); assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"); const TargetRegisterClass *RC = (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; unsigned Tmp = MRI.createVirtualRegister(RC); // Since FP is only updated here but NOT referenced, it's treated as GPR. unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31; unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1; unsigned BP = (PVT == MVT::i64) ? PPC::X30 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29 : PPC::R30); MachineInstrBuilder MIB; const int64_t LabelOffset = 1 * PVT.getStoreSize(); const int64_t SPOffset = 2 * PVT.getStoreSize(); const int64_t TOCOffset = 3 * PVT.getStoreSize(); const int64_t BPOffset = 4 * PVT.getStoreSize(); unsigned BufReg = MI.getOperand(0).getReg(); // Reload FP (the jumped-to function may not have had a // frame pointer, and if so, then its r31 will be restored // as necessary). if (PVT == MVT::i64) { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP) .addImm(0) .addReg(BufReg); } else { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP) .addImm(0) .addReg(BufReg); } MIB.setMemRefs(MMOBegin, MMOEnd); // Reload IP if (PVT == MVT::i64) { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp) .addImm(LabelOffset) .addReg(BufReg); } else { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp) .addImm(LabelOffset) .addReg(BufReg); } MIB.setMemRefs(MMOBegin, MMOEnd); // Reload SP if (PVT == MVT::i64) { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP) .addImm(SPOffset) .addReg(BufReg); } else { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP) .addImm(SPOffset) .addReg(BufReg); } MIB.setMemRefs(MMOBegin, MMOEnd); // Reload BP if (PVT == MVT::i64) { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP) .addImm(BPOffset) .addReg(BufReg); } else { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP) .addImm(BPOffset) .addReg(BufReg); } MIB.setMemRefs(MMOBegin, MMOEnd); // Reload TOC if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) { setUsesTOCBasePtr(*MBB->getParent()); MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2) .addImm(TOCOffset) .addReg(BufReg); MIB.setMemRefs(MMOBegin, MMOEnd); } // Jump BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp); BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR)); MI.eraseFromParent(); return MBB; } MachineBasicBlock * PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const { if (MI.getOpcode() == TargetOpcode::STACKMAP || MI.getOpcode() == TargetOpcode::PATCHPOINT) { if (Subtarget.isPPC64() && Subtarget.isSVR4ABI() && MI.getOpcode() == TargetOpcode::PATCHPOINT) { // Call lowering should have added an r2 operand to indicate a dependence // on the TOC base pointer value. It can't however, because there is no // way to mark the dependence as implicit there, and so the stackmap code // will confuse it with a regular operand. Instead, add the dependence // here. setUsesTOCBasePtr(*BB->getParent()); MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true)); } return emitPatchPoint(MI, BB); } if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 || MI.getOpcode() == PPC::EH_SjLj_SetJmp64) { return emitEHSjLjSetJmp(MI, BB); } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 || MI.getOpcode() == PPC::EH_SjLj_LongJmp64) { return emitEHSjLjLongJmp(MI, BB); } const TargetInstrInfo *TII = Subtarget.getInstrInfo(); // To "insert" these instructions we actually have to insert their // control-flow patterns. const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction::iterator It = ++BB->getIterator(); MachineFunction *F = BB->getParent(); if (Subtarget.hasISEL() && (MI.getOpcode() == PPC::SELECT_CC_I4 || MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) { SmallVector Cond; if (MI.getOpcode() == PPC::SELECT_CC_I4 || MI.getOpcode() == PPC::SELECT_CC_I8) Cond.push_back(MI.getOperand(4)); else Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET)); Cond.push_back(MI.getOperand(1)); DebugLoc dl = MI.getDebugLoc(); TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond, MI.getOperand(2).getReg(), MI.getOperand(3).getReg()); } else if (MI.getOpcode() == PPC::SELECT_CC_I4 || MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_CC_F4 || MI.getOpcode() == PPC::SELECT_CC_F8 || MI.getOpcode() == PPC::SELECT_CC_QFRC || MI.getOpcode() == PPC::SELECT_CC_QSRC || MI.getOpcode() == PPC::SELECT_CC_QBRC || MI.getOpcode() == PPC::SELECT_CC_VRRC || MI.getOpcode() == PPC::SELECT_CC_VSFRC || MI.getOpcode() == PPC::SELECT_CC_VSSRC || MI.getOpcode() == PPC::SELECT_CC_VSRC || MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 || MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 || MI.getOpcode() == PPC::SELECT_QFRC || MI.getOpcode() == PPC::SELECT_QSRC || MI.getOpcode() == PPC::SELECT_QBRC || MI.getOpcode() == PPC::SELECT_VRRC || MI.getOpcode() == PPC::SELECT_VSFRC || MI.getOpcode() == PPC::SELECT_VSSRC || MI.getOpcode() == PPC::SELECT_VSRC) { // The incoming instruction knows the destination vreg to set, the // condition code register to branch on, the true/false values to // select between, and a branch opcode to use. // thisMBB: // ... // TrueVal = ... // cmpTY ccX, r1, r2 // bCC copy1MBB // fallthrough --> copy0MBB MachineBasicBlock *thisMBB = BB; MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); DebugLoc dl = MI.getDebugLoc(); F->insert(It, copy0MBB); F->insert(It, sinkMBB); // Transfer the remainder of BB and its successor edges to sinkMBB. sinkMBB->splice(sinkMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); sinkMBB->transferSuccessorsAndUpdatePHIs(BB); // Next, add the true and fallthrough blocks as its successors. BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 || MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 || MI.getOpcode() == PPC::SELECT_QFRC || MI.getOpcode() == PPC::SELECT_QSRC || MI.getOpcode() == PPC::SELECT_QBRC || MI.getOpcode() == PPC::SELECT_VRRC || MI.getOpcode() == PPC::SELECT_VSFRC || MI.getOpcode() == PPC::SELECT_VSSRC || MI.getOpcode() == PPC::SELECT_VSRC) { BuildMI(BB, dl, TII->get(PPC::BC)) .addReg(MI.getOperand(1).getReg()) .addMBB(sinkMBB); } else { unsigned SelectPred = MI.getOperand(4).getImm(); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(SelectPred) .addReg(MI.getOperand(1).getReg()) .addMBB(sinkMBB); } // copy0MBB: // %FalseValue = ... // # fallthrough to sinkMBB BB = copy0MBB; // Update machine-CFG edges BB->addSuccessor(sinkMBB); // sinkMBB: // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... BB = sinkMBB; BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg()) .addReg(MI.getOperand(3).getReg()) .addMBB(copy0MBB) .addReg(MI.getOperand(2).getReg()) .addMBB(thisMBB); } else if (MI.getOpcode() == PPC::ReadTB) { // To read the 64-bit time-base register on a 32-bit target, we read the // two halves. Should the counter have wrapped while it was being read, we // need to try again. // ... // readLoop: // mfspr Rx,TBU # load from TBU // mfspr Ry,TB # load from TB // mfspr Rz,TBU # load from TBU // cmpw crX,Rx,Rz # check if 'old'='new' // bne readLoop # branch if they're not equal // ... MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); DebugLoc dl = MI.getDebugLoc(); F->insert(It, readMBB); F->insert(It, sinkMBB); // Transfer the remainder of BB and its successor edges to sinkMBB. sinkMBB->splice(sinkMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); sinkMBB->transferSuccessorsAndUpdatePHIs(BB); BB->addSuccessor(readMBB); BB = readMBB; MachineRegisterInfo &RegInfo = F->getRegInfo(); unsigned ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); unsigned LoReg = MI.getOperand(0).getReg(); unsigned HiReg = MI.getOperand(1).getReg(); BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269); BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268); BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269); unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg) .addReg(HiReg).addReg(ReadAgainReg); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE).addReg(CmpReg).addMBB(readMBB); BB->addSuccessor(readMBB); BB->addSuccessor(sinkMBB); } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32) BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64) BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32) BB = EmitAtomicBinary(MI, BB, 4, PPC::AND); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64) BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32) BB = EmitAtomicBinary(MI, BB, 4, PPC::OR); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64) BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32) BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64) BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32) BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64) BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32) BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64) BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8); else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, 0); else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, 0); else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32) BB = EmitAtomicBinary(MI, BB, 4, 0); else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64) BB = EmitAtomicBinary(MI, BB, 8, 0); else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 || MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 || (Subtarget.hasPartwordAtomics() && MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) || (Subtarget.hasPartwordAtomics() && MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) { bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64; auto LoadMnemonic = PPC::LDARX; auto StoreMnemonic = PPC::STDCX; switch (MI.getOpcode()) { default: llvm_unreachable("Compare and swap of unknown size"); case PPC::ATOMIC_CMP_SWAP_I8: LoadMnemonic = PPC::LBARX; StoreMnemonic = PPC::STBCX; assert(Subtarget.hasPartwordAtomics() && "No support partword atomics."); break; case PPC::ATOMIC_CMP_SWAP_I16: LoadMnemonic = PPC::LHARX; StoreMnemonic = PPC::STHCX; assert(Subtarget.hasPartwordAtomics() && "No support partword atomics."); break; case PPC::ATOMIC_CMP_SWAP_I32: LoadMnemonic = PPC::LWARX; StoreMnemonic = PPC::STWCX; break; case PPC::ATOMIC_CMP_SWAP_I64: LoadMnemonic = PPC::LDARX; StoreMnemonic = PPC::STDCX; break; } unsigned dest = MI.getOperand(0).getReg(); unsigned ptrA = MI.getOperand(1).getReg(); unsigned ptrB = MI.getOperand(2).getReg(); unsigned oldval = MI.getOperand(3).getReg(); unsigned newval = MI.getOperand(4).getReg(); DebugLoc dl = MI.getDebugLoc(); MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(It, loop1MBB); F->insert(It, loop2MBB); F->insert(It, midMBB); F->insert(It, exitMBB); exitMBB->splice(exitMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); // thisMBB: // ... // fallthrough --> loopMBB BB->addSuccessor(loop1MBB); // loop1MBB: // l[bhwd]arx dest, ptr // cmp[wd] dest, oldval // bne- midMBB // loop2MBB: // st[bhwd]cx. newval, ptr // bne- loopMBB // b exitBB // midMBB: // st[bhwd]cx. dest, ptr // exitBB: BB = loop1MBB; BuildMI(BB, dl, TII->get(LoadMnemonic), dest) .addReg(ptrA).addReg(ptrB); BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0) .addReg(oldval).addReg(dest); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); BB->addSuccessor(loop2MBB); BB->addSuccessor(midMBB); BB = loop2MBB; BuildMI(BB, dl, TII->get(StoreMnemonic)) .addReg(newval).addReg(ptrA).addReg(ptrB); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); BB->addSuccessor(loop1MBB); BB->addSuccessor(exitMBB); BB = midMBB; BuildMI(BB, dl, TII->get(StoreMnemonic)) .addReg(dest).addReg(ptrA).addReg(ptrB); BB->addSuccessor(exitMBB); // exitMBB: // ... BB = exitMBB; } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 || MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) { // We must use 64-bit registers for addresses when targeting 64-bit, // since we're actually doing arithmetic on them. Other registers // can be 32-bit. bool is64bit = Subtarget.isPPC64(); bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8; unsigned dest = MI.getOperand(0).getReg(); unsigned ptrA = MI.getOperand(1).getReg(); unsigned ptrB = MI.getOperand(2).getReg(); unsigned oldval = MI.getOperand(3).getReg(); unsigned newval = MI.getOperand(4).getReg(); DebugLoc dl = MI.getDebugLoc(); MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(It, loop1MBB); F->insert(It, loop2MBB); F->insert(It, midMBB); F->insert(It, exitMBB); exitMBB->splice(exitMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); MachineRegisterInfo &RegInfo = F->getRegInfo(); const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; unsigned PtrReg = RegInfo.createVirtualRegister(RC); unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); unsigned ShiftReg = RegInfo.createVirtualRegister(RC); unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC); unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC); unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC); unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC); unsigned MaskReg = RegInfo.createVirtualRegister(RC); unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); unsigned Ptr1Reg; unsigned TmpReg = RegInfo.createVirtualRegister(RC); unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; // thisMBB: // ... // fallthrough --> loopMBB BB->addSuccessor(loop1MBB); // The 4-byte load must be aligned, while a char or short may be // anywhere in the word. Hence all this nasty bookkeeping code. // add ptr1, ptrA, ptrB [copy if ptrA==0] // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] // xori shift, shift1, 24 [16] // rlwinm ptr, ptr1, 0, 0, 29 // slw newval2, newval, shift // slw oldval2, oldval,shift // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] // slw mask, mask2, shift // and newval3, newval2, mask // and oldval3, oldval2, mask // loop1MBB: // lwarx tmpDest, ptr // and tmp, tmpDest, mask // cmpw tmp, oldval3 // bne- midMBB // loop2MBB: // andc tmp2, tmpDest, mask // or tmp4, tmp2, newval3 // stwcx. tmp4, ptr // bne- loop1MBB // b exitBB // midMBB: // stwcx. tmpDest, ptr // exitBB: // srw dest, tmpDest, shift if (ptrA != ZeroReg) { Ptr1Reg = RegInfo.createVirtualRegister(RC); BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) .addReg(ptrA).addReg(ptrB); } else { Ptr1Reg = ptrB; } BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); if (is64bit) BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) .addReg(Ptr1Reg).addImm(0).addImm(61); else BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg) .addReg(newval).addReg(ShiftReg); BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg) .addReg(oldval).addReg(ShiftReg); if (is8bit) BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); else { BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg) .addReg(Mask3Reg).addImm(65535); } BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) .addReg(Mask2Reg).addReg(ShiftReg); BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg) .addReg(NewVal2Reg).addReg(MaskReg); BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg) .addReg(OldVal2Reg).addReg(MaskReg); BB = loop1MBB; BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) .addReg(ZeroReg).addReg(PtrReg); BuildMI(BB, dl, TII->get(PPC::AND),TmpReg) .addReg(TmpDestReg).addReg(MaskReg); BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0) .addReg(TmpReg).addReg(OldVal3Reg); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); BB->addSuccessor(loop2MBB); BB->addSuccessor(midMBB); BB = loop2MBB; BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg) .addReg(TmpDestReg).addReg(MaskReg); BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg) .addReg(Tmp2Reg).addReg(NewVal3Reg); BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg) .addReg(ZeroReg).addReg(PtrReg); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); BB->addSuccessor(loop1MBB); BB->addSuccessor(exitMBB); BB = midMBB; BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg) .addReg(ZeroReg).addReg(PtrReg); BB->addSuccessor(exitMBB); // exitMBB: // ... BB = exitMBB; BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg) .addReg(ShiftReg); } else if (MI.getOpcode() == PPC::FADDrtz) { // This pseudo performs an FADD with rounding mode temporarily forced // to round-to-zero. We emit this via custom inserter since the FPSCR // is not modeled at the SelectionDAG level. unsigned Dest = MI.getOperand(0).getReg(); unsigned Src1 = MI.getOperand(1).getReg(); unsigned Src2 = MI.getOperand(2).getReg(); DebugLoc dl = MI.getDebugLoc(); MachineRegisterInfo &RegInfo = F->getRegInfo(); unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); // Save FPSCR value. BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg); // Set rounding mode to round-to-zero. BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31); BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30); // Perform addition. BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2); // Restore FPSCR value. BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg); } else if (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT || MI.getOpcode() == PPC::ANDIo_1_GT_BIT || MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 || MI.getOpcode() == PPC::ANDIo_1_GT_BIT8) { unsigned Opcode = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 || MI.getOpcode() == PPC::ANDIo_1_GT_BIT8) ? PPC::ANDIo8 : PPC::ANDIo; bool isEQ = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT || MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8); MachineRegisterInfo &RegInfo = F->getRegInfo(); unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ? &PPC::GPRCRegClass : &PPC::G8RCRegClass); DebugLoc dl = MI.getDebugLoc(); BuildMI(*BB, MI, dl, TII->get(Opcode), Dest) .addReg(MI.getOperand(1).getReg()) .addImm(1); BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), MI.getOperand(0).getReg()) .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT); } else if (MI.getOpcode() == PPC::TCHECK_RET) { DebugLoc Dl = MI.getDebugLoc(); MachineRegisterInfo &RegInfo = F->getRegInfo(); unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg); return BB; } else { llvm_unreachable("Unexpected instr type to insert"); } MI.eraseFromParent(); // The pseudo instruction is gone now. return BB; } //===----------------------------------------------------------------------===// // Target Optimization Hooks //===----------------------------------------------------------------------===// static std::string getRecipOp(const char *Base, EVT VT) { std::string RecipOp(Base); if (VT.getScalarType() == MVT::f64) RecipOp += "d"; else RecipOp += "f"; if (VT.isVector()) RecipOp = "vec-" + RecipOp; return RecipOp; } SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI, unsigned &RefinementSteps, bool &UseOneConstNR) const { EVT VT = Operand.getValueType(); if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) || (VT == MVT::f64 && Subtarget.hasFRSQRTE()) || (VT == MVT::v4f32 && Subtarget.hasAltivec()) || (VT == MVT::v2f64 && Subtarget.hasVSX()) || (VT == MVT::v4f32 && Subtarget.hasQPX()) || (VT == MVT::v4f64 && Subtarget.hasQPX())) { TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals; std::string RecipOp = getRecipOp("sqrt", VT); if (!Recips.isEnabled(RecipOp)) return SDValue(); RefinementSteps = Recips.getRefinementSteps(RecipOp); UseOneConstNR = true; return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand); } return SDValue(); } SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI, unsigned &RefinementSteps) const { EVT VT = Operand.getValueType(); if ((VT == MVT::f32 && Subtarget.hasFRES()) || (VT == MVT::f64 && Subtarget.hasFRE()) || (VT == MVT::v4f32 && Subtarget.hasAltivec()) || (VT == MVT::v2f64 && Subtarget.hasVSX()) || (VT == MVT::v4f32 && Subtarget.hasQPX()) || (VT == MVT::v4f64 && Subtarget.hasQPX())) { TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals; std::string RecipOp = getRecipOp("div", VT); if (!Recips.isEnabled(RecipOp)) return SDValue(); RefinementSteps = Recips.getRefinementSteps(RecipOp); return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand); } return SDValue(); } unsigned PPCTargetLowering::combineRepeatedFPDivisors() const { // Note: This functionality is used only when unsafe-fp-math is enabled, and // on cores with reciprocal estimates (which are used when unsafe-fp-math is // enabled for division), this functionality is redundant with the default // combiner logic (once the division -> reciprocal/multiply transformation // has taken place). As a result, this matters more for older cores than for // newer ones. // Combine multiple FDIVs with the same divisor into multiple FMULs by the // reciprocal if there are two or more FDIVs (for embedded cores with only // one FP pipeline) for three or more FDIVs (for generic OOO cores). switch (Subtarget.getDarwinDirective()) { default: return 3; case PPC::DIR_440: case PPC::DIR_A2: case PPC::DIR_E500mc: case PPC::DIR_E5500: return 2; } } // isConsecutiveLSLoc needs to work even if all adds have not yet been // collapsed, and so we need to look through chains of them. static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base, int64_t& Offset, SelectionDAG &DAG) { if (DAG.isBaseWithConstantOffset(Loc)) { Base = Loc.getOperand(0); Offset += cast(Loc.getOperand(1))->getSExtValue(); // The base might itself be a base plus an offset, and if so, accumulate // that as well. getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG); } } static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG) { if (VT.getSizeInBits() / 8 != Bytes) return false; SDValue BaseLoc = Base->getBasePtr(); if (Loc.getOpcode() == ISD::FrameIndex) { if (BaseLoc.getOpcode() != ISD::FrameIndex) return false; const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); int FI = cast(Loc)->getIndex(); int BFI = cast(BaseLoc)->getIndex(); int FS = MFI->getObjectSize(FI); int BFS = MFI->getObjectSize(BFI); if (FS != BFS || FS != (int)Bytes) return false; return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes); } SDValue Base1 = Loc, Base2 = BaseLoc; int64_t Offset1 = 0, Offset2 = 0; getBaseWithConstantOffset(Loc, Base1, Offset1, DAG); getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG); if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes)) return true; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const GlobalValue *GV1 = nullptr; const GlobalValue *GV2 = nullptr; Offset1 = 0; Offset2 = 0; bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1); bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); if (isGA1 && isGA2 && GV1 == GV2) return Offset1 == (Offset2 + Dist*Bytes); return false; } // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does // not enforce equality of the chain operands. static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG) { if (LSBaseSDNode *LS = dyn_cast(N)) { EVT VT = LS->getMemoryVT(); SDValue Loc = LS->getBasePtr(); return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG); } if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) { EVT VT; switch (cast(N->getOperand(1))->getZExtValue()) { default: return false; case Intrinsic::ppc_qpx_qvlfd: case Intrinsic::ppc_qpx_qvlfda: VT = MVT::v4f64; break; case Intrinsic::ppc_qpx_qvlfs: case Intrinsic::ppc_qpx_qvlfsa: VT = MVT::v4f32; break; case Intrinsic::ppc_qpx_qvlfcd: case Intrinsic::ppc_qpx_qvlfcda: VT = MVT::v2f64; break; case Intrinsic::ppc_qpx_qvlfcs: case Intrinsic::ppc_qpx_qvlfcsa: VT = MVT::v2f32; break; case Intrinsic::ppc_qpx_qvlfiwa: case Intrinsic::ppc_qpx_qvlfiwz: case Intrinsic::ppc_altivec_lvx: case Intrinsic::ppc_altivec_lvxl: case Intrinsic::ppc_vsx_lxvw4x: VT = MVT::v4i32; break; case Intrinsic::ppc_vsx_lxvd2x: VT = MVT::v2f64; break; case Intrinsic::ppc_altivec_lvebx: VT = MVT::i8; break; case Intrinsic::ppc_altivec_lvehx: VT = MVT::i16; break; case Intrinsic::ppc_altivec_lvewx: VT = MVT::i32; break; } return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG); } if (N->getOpcode() == ISD::INTRINSIC_VOID) { EVT VT; switch (cast(N->getOperand(1))->getZExtValue()) { default: return false; case Intrinsic::ppc_qpx_qvstfd: case Intrinsic::ppc_qpx_qvstfda: VT = MVT::v4f64; break; case Intrinsic::ppc_qpx_qvstfs: case Intrinsic::ppc_qpx_qvstfsa: VT = MVT::v4f32; break; case Intrinsic::ppc_qpx_qvstfcd: case Intrinsic::ppc_qpx_qvstfcda: VT = MVT::v2f64; break; case Intrinsic::ppc_qpx_qvstfcs: case Intrinsic::ppc_qpx_qvstfcsa: VT = MVT::v2f32; break; case Intrinsic::ppc_qpx_qvstfiw: case Intrinsic::ppc_qpx_qvstfiwa: case Intrinsic::ppc_altivec_stvx: case Intrinsic::ppc_altivec_stvxl: case Intrinsic::ppc_vsx_stxvw4x: VT = MVT::v4i32; break; case Intrinsic::ppc_vsx_stxvd2x: VT = MVT::v2f64; break; case Intrinsic::ppc_altivec_stvebx: VT = MVT::i8; break; case Intrinsic::ppc_altivec_stvehx: VT = MVT::i16; break; case Intrinsic::ppc_altivec_stvewx: VT = MVT::i32; break; } return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG); } return false; } // Return true is there is a nearyby consecutive load to the one provided // (regardless of alignment). We search up and down the chain, looking though // token factors and other loads (but nothing else). As a result, a true result // indicates that it is safe to create a new consecutive load adjacent to the // load provided. static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) { SDValue Chain = LD->getChain(); EVT VT = LD->getMemoryVT(); SmallSet LoadRoots; SmallVector Queue(1, Chain.getNode()); SmallSet Visited; // First, search up the chain, branching to follow all token-factor operands. // If we find a consecutive load, then we're done, otherwise, record all // nodes just above the top-level loads and token factors. while (!Queue.empty()) { SDNode *ChainNext = Queue.pop_back_val(); if (!Visited.insert(ChainNext).second) continue; if (MemSDNode *ChainLD = dyn_cast(ChainNext)) { if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) return true; if (!Visited.count(ChainLD->getChain().getNode())) Queue.push_back(ChainLD->getChain().getNode()); } else if (ChainNext->getOpcode() == ISD::TokenFactor) { for (const SDUse &O : ChainNext->ops()) if (!Visited.count(O.getNode())) Queue.push_back(O.getNode()); } else LoadRoots.insert(ChainNext); } // Second, search down the chain, starting from the top-level nodes recorded // in the first phase. These top-level nodes are the nodes just above all // loads and token factors. Starting with their uses, recursively look though // all loads (just the chain uses) and token factors to find a consecutive // load. Visited.clear(); Queue.clear(); for (SmallSet::iterator I = LoadRoots.begin(), IE = LoadRoots.end(); I != IE; ++I) { Queue.push_back(*I); while (!Queue.empty()) { SDNode *LoadRoot = Queue.pop_back_val(); if (!Visited.insert(LoadRoot).second) continue; if (MemSDNode *ChainLD = dyn_cast(LoadRoot)) if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) return true; for (SDNode::use_iterator UI = LoadRoot->use_begin(), UE = LoadRoot->use_end(); UI != UE; ++UI) if (((isa(*UI) && cast(*UI)->getChain().getNode() == LoadRoot) || UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI)) Queue.push_back(*UI); } } return false; } SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits"); // If we're tracking CR bits, we need to be careful that we don't have: // trunc(binary-ops(zext(x), zext(y))) // or // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...) // such that we're unnecessarily moving things into GPRs when it would be // better to keep them in CR bits. // Note that trunc here can be an actual i1 trunc, or can be the effective // truncation that comes from a setcc or select_cc. if (N->getOpcode() == ISD::TRUNCATE && N->getValueType(0) != MVT::i1) return SDValue(); if (N->getOperand(0).getValueType() != MVT::i32 && N->getOperand(0).getValueType() != MVT::i64) return SDValue(); if (N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) { // If we're looking at a comparison, then we need to make sure that the // high bits (all except for the first) don't matter the result. ISD::CondCode CC = cast(N->getOperand( N->getOpcode() == ISD::SETCC ? 2 : 4))->get(); unsigned OpBits = N->getOperand(0).getValueSizeInBits(); if (ISD::isSignedIntSetCC(CC)) { if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits || DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits) return SDValue(); } else if (ISD::isUnsignedIntSetCC(CC)) { if (!DAG.MaskedValueIsZero(N->getOperand(0), APInt::getHighBitsSet(OpBits, OpBits-1)) || !DAG.MaskedValueIsZero(N->getOperand(1), APInt::getHighBitsSet(OpBits, OpBits-1))) return SDValue(); } else { // This is neither a signed nor an unsigned comparison, just make sure // that the high bits are equal. APInt Op1Zero, Op1One; APInt Op2Zero, Op2One; DAG.computeKnownBits(N->getOperand(0), Op1Zero, Op1One); DAG.computeKnownBits(N->getOperand(1), Op2Zero, Op2One); // We don't really care about what is known about the first bit (if // anything), so clear it in all masks prior to comparing them. Op1Zero.clearBit(0); Op1One.clearBit(0); Op2Zero.clearBit(0); Op2One.clearBit(0); if (Op1Zero != Op2Zero || Op1One != Op2One) return SDValue(); } } // We now know that the higher-order bits are irrelevant, we just need to // make sure that all of the intermediate operations are bit operations, and // all inputs are extensions. if (N->getOperand(0).getOpcode() != ISD::AND && N->getOperand(0).getOpcode() != ISD::OR && N->getOperand(0).getOpcode() != ISD::XOR && N->getOperand(0).getOpcode() != ISD::SELECT && N->getOperand(0).getOpcode() != ISD::SELECT_CC && N->getOperand(0).getOpcode() != ISD::TRUNCATE && N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND && N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND && N->getOperand(0).getOpcode() != ISD::ANY_EXTEND) return SDValue(); if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) && N->getOperand(1).getOpcode() != ISD::AND && N->getOperand(1).getOpcode() != ISD::OR && N->getOperand(1).getOpcode() != ISD::XOR && N->getOperand(1).getOpcode() != ISD::SELECT && N->getOperand(1).getOpcode() != ISD::SELECT_CC && N->getOperand(1).getOpcode() != ISD::TRUNCATE && N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND && N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND && N->getOperand(1).getOpcode() != ISD::ANY_EXTEND) return SDValue(); SmallVector Inputs; SmallVector BinOps, PromOps; SmallPtrSet Visited; for (unsigned i = 0; i < 2; ++i) { if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND || N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND || N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) && N->getOperand(i).getOperand(0).getValueType() == MVT::i1) || isa(N->getOperand(i))) Inputs.push_back(N->getOperand(i)); else BinOps.push_back(N->getOperand(i)); if (N->getOpcode() == ISD::TRUNCATE) break; } // Visit all inputs, collect all binary operations (and, or, xor and // select) that are all fed by extensions. while (!BinOps.empty()) { SDValue BinOp = BinOps.back(); BinOps.pop_back(); if (!Visited.insert(BinOp.getNode()).second) continue; PromOps.push_back(BinOp); for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) { // The condition of the select is not promoted. if (BinOp.getOpcode() == ISD::SELECT && i == 0) continue; if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3) continue; if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) && BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) || isa(BinOp.getOperand(i))) { Inputs.push_back(BinOp.getOperand(i)); } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || BinOp.getOperand(i).getOpcode() == ISD::OR || BinOp.getOperand(i).getOpcode() == ISD::XOR || BinOp.getOperand(i).getOpcode() == ISD::SELECT || BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC || BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) { BinOps.push_back(BinOp.getOperand(i)); } else { // We have an input that is not an extension or another binary // operation; we'll abort this transformation. return SDValue(); } } } // Make sure that this is a self-contained cluster of operations (which // is not quite the same thing as saying that everything has only one // use). for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { if (isa(Inputs[i])) continue; for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), UE = Inputs[i].getNode()->use_end(); UI != UE; ++UI) { SDNode *User = *UI; if (User != N && !Visited.count(User)) return SDValue(); // Make sure that we're not going to promote the non-output-value // operand(s) or SELECT or SELECT_CC. // FIXME: Although we could sometimes handle this, and it does occur in // practice that one of the condition inputs to the select is also one of // the outputs, we currently can't deal with this. if (User->getOpcode() == ISD::SELECT) { if (User->getOperand(0) == Inputs[i]) return SDValue(); } else if (User->getOpcode() == ISD::SELECT_CC) { if (User->getOperand(0) == Inputs[i] || User->getOperand(1) == Inputs[i]) return SDValue(); } } } for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) { for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), UE = PromOps[i].getNode()->use_end(); UI != UE; ++UI) { SDNode *User = *UI; if (User != N && !Visited.count(User)) return SDValue(); // Make sure that we're not going to promote the non-output-value // operand(s) or SELECT or SELECT_CC. // FIXME: Although we could sometimes handle this, and it does occur in // practice that one of the condition inputs to the select is also one of // the outputs, we currently can't deal with this. if (User->getOpcode() == ISD::SELECT) { if (User->getOperand(0) == PromOps[i]) return SDValue(); } else if (User->getOpcode() == ISD::SELECT_CC) { if (User->getOperand(0) == PromOps[i] || User->getOperand(1) == PromOps[i]) return SDValue(); } } } // Replace all inputs with the extension operand. for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { // Constants may have users outside the cluster of to-be-promoted nodes, // and so we need to replace those as we do the promotions. if (isa(Inputs[i])) continue; else DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0)); } std::list PromOpHandles; for (auto &PromOp : PromOps) PromOpHandles.emplace_back(PromOp); // Replace all operations (these are all the same, but have a different // (i1) return type). DAG.getNode will validate that the types of // a binary operator match, so go through the list in reverse so that // we've likely promoted both operands first. Any intermediate truncations or // extensions disappear. while (!PromOpHandles.empty()) { SDValue PromOp = PromOpHandles.back().getValue(); PromOpHandles.pop_back(); if (PromOp.getOpcode() == ISD::TRUNCATE || PromOp.getOpcode() == ISD::SIGN_EXTEND || PromOp.getOpcode() == ISD::ZERO_EXTEND || PromOp.getOpcode() == ISD::ANY_EXTEND) { if (!isa(PromOp.getOperand(0)) && PromOp.getOperand(0).getValueType() != MVT::i1) { // The operand is not yet ready (see comment below). PromOpHandles.emplace_front(PromOp); continue; } SDValue RepValue = PromOp.getOperand(0); if (isa(RepValue)) RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue); DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue); continue; } unsigned C; switch (PromOp.getOpcode()) { default: C = 0; break; case ISD::SELECT: C = 1; break; case ISD::SELECT_CC: C = 2; break; } if ((!isa(PromOp.getOperand(C)) && PromOp.getOperand(C).getValueType() != MVT::i1) || (!isa(PromOp.getOperand(C+1)) && PromOp.getOperand(C+1).getValueType() != MVT::i1)) { // The to-be-promoted operands of this node have not yet been // promoted (this should be rare because we're going through the // list backward, but if one of the operands has several users in // this cluster of to-be-promoted nodes, it is possible). PromOpHandles.emplace_front(PromOp); continue; } SmallVector Ops(PromOp.getNode()->op_begin(), PromOp.getNode()->op_end()); // If there are any constant inputs, make sure they're replaced now. for (unsigned i = 0; i < 2; ++i) if (isa(Ops[C+i])) Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]); DAG.ReplaceAllUsesOfValueWith(PromOp, DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops)); } // Now we're left with the initial truncation itself. if (N->getOpcode() == ISD::TRUNCATE) return N->getOperand(0); // Otherwise, this is a comparison. The operands to be compared have just // changed type (to i1), but everything else is the same. return SDValue(N, 0); } SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); // If we're tracking CR bits, we need to be careful that we don't have: // zext(binary-ops(trunc(x), trunc(y))) // or // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...) // such that we're unnecessarily moving things into CR bits that can more // efficiently stay in GPRs. Note that if we're not certain that the high // bits are set as required by the final extension, we still may need to do // some masking to get the proper behavior. // This same functionality is important on PPC64 when dealing with // 32-to-64-bit extensions; these occur often when 32-bit values are used as // the return values of functions. Because it is so similar, it is handled // here as well. if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64) return SDValue(); if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) || (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64()))) return SDValue(); if (N->getOperand(0).getOpcode() != ISD::AND && N->getOperand(0).getOpcode() != ISD::OR && N->getOperand(0).getOpcode() != ISD::XOR && N->getOperand(0).getOpcode() != ISD::SELECT && N->getOperand(0).getOpcode() != ISD::SELECT_CC) return SDValue(); SmallVector Inputs; SmallVector BinOps(1, N->getOperand(0)), PromOps; SmallPtrSet Visited; // Visit all inputs, collect all binary operations (and, or, xor and // select) that are all fed by truncations. while (!BinOps.empty()) { SDValue BinOp = BinOps.back(); BinOps.pop_back(); if (!Visited.insert(BinOp.getNode()).second) continue; PromOps.push_back(BinOp); for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) { // The condition of the select is not promoted. if (BinOp.getOpcode() == ISD::SELECT && i == 0) continue; if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3) continue; if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || isa(BinOp.getOperand(i))) { Inputs.push_back(BinOp.getOperand(i)); } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || BinOp.getOperand(i).getOpcode() == ISD::OR || BinOp.getOperand(i).getOpcode() == ISD::XOR || BinOp.getOperand(i).getOpcode() == ISD::SELECT || BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) { BinOps.push_back(BinOp.getOperand(i)); } else { // We have an input that is not a truncation or another binary // operation; we'll abort this transformation. return SDValue(); } } } // The operands of a select that must be truncated when the select is // promoted because the operand is actually part of the to-be-promoted set. DenseMap SelectTruncOp[2]; // Make sure that this is a self-contained cluster of operations (which // is not quite the same thing as saying that everything has only one // use). for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { if (isa(Inputs[i])) continue; for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), UE = Inputs[i].getNode()->use_end(); UI != UE; ++UI) { SDNode *User = *UI; if (User != N && !Visited.count(User)) return SDValue(); // If we're going to promote the non-output-value operand(s) or SELECT or // SELECT_CC, record them for truncation. if (User->getOpcode() == ISD::SELECT) { if (User->getOperand(0) == Inputs[i]) SelectTruncOp[0].insert(std::make_pair(User, User->getOperand(0).getValueType())); } else if (User->getOpcode() == ISD::SELECT_CC) { if (User->getOperand(0) == Inputs[i]) SelectTruncOp[0].insert(std::make_pair(User, User->getOperand(0).getValueType())); if (User->getOperand(1) == Inputs[i]) SelectTruncOp[1].insert(std::make_pair(User, User->getOperand(1).getValueType())); } } } for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) { for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), UE = PromOps[i].getNode()->use_end(); UI != UE; ++UI) { SDNode *User = *UI; if (User != N && !Visited.count(User)) return SDValue(); // If we're going to promote the non-output-value operand(s) or SELECT or // SELECT_CC, record them for truncation. if (User->getOpcode() == ISD::SELECT) { if (User->getOperand(0) == PromOps[i]) SelectTruncOp[0].insert(std::make_pair(User, User->getOperand(0).getValueType())); } else if (User->getOpcode() == ISD::SELECT_CC) { if (User->getOperand(0) == PromOps[i]) SelectTruncOp[0].insert(std::make_pair(User, User->getOperand(0).getValueType())); if (User->getOperand(1) == PromOps[i]) SelectTruncOp[1].insert(std::make_pair(User, User->getOperand(1).getValueType())); } } } unsigned PromBits = N->getOperand(0).getValueSizeInBits(); bool ReallyNeedsExt = false; if (N->getOpcode() != ISD::ANY_EXTEND) { // If all of the inputs are not already sign/zero extended, then // we'll still need to do that at the end. for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { if (isa(Inputs[i])) continue; unsigned OpBits = Inputs[i].getOperand(0).getValueSizeInBits(); assert(PromBits < OpBits && "Truncation not to a smaller bit count?"); if ((N->getOpcode() == ISD::ZERO_EXTEND && !DAG.MaskedValueIsZero(Inputs[i].getOperand(0), APInt::getHighBitsSet(OpBits, OpBits-PromBits))) || (N->getOpcode() == ISD::SIGN_EXTEND && DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) < (OpBits-(PromBits-1)))) { ReallyNeedsExt = true; break; } } } // Replace all inputs, either with the truncation operand, or a // truncation or extension to the final output type. for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { // Constant inputs need to be replaced with the to-be-promoted nodes that // use them because they might have users outside of the cluster of // promoted nodes. if (isa(Inputs[i])) continue; SDValue InSrc = Inputs[i].getOperand(0); if (Inputs[i].getValueType() == N->getValueType(0)) DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc); else if (N->getOpcode() == ISD::SIGN_EXTEND) DAG.ReplaceAllUsesOfValueWith(Inputs[i], DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0))); else if (N->getOpcode() == ISD::ZERO_EXTEND) DAG.ReplaceAllUsesOfValueWith(Inputs[i], DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0))); else DAG.ReplaceAllUsesOfValueWith(Inputs[i], DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0))); } std::list PromOpHandles; for (auto &PromOp : PromOps) PromOpHandles.emplace_back(PromOp); // Replace all operations (these are all the same, but have a different // (promoted) return type). DAG.getNode will validate that the types of // a binary operator match, so go through the list in reverse so that // we've likely promoted both operands first. while (!PromOpHandles.empty()) { SDValue PromOp = PromOpHandles.back().getValue(); PromOpHandles.pop_back(); unsigned C; switch (PromOp.getOpcode()) { default: C = 0; break; case ISD::SELECT: C = 1; break; case ISD::SELECT_CC: C = 2; break; } if ((!isa(PromOp.getOperand(C)) && PromOp.getOperand(C).getValueType() != N->getValueType(0)) || (!isa(PromOp.getOperand(C+1)) && PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) { // The to-be-promoted operands of this node have not yet been // promoted (this should be rare because we're going through the // list backward, but if one of the operands has several users in // this cluster of to-be-promoted nodes, it is possible). PromOpHandles.emplace_front(PromOp); continue; } // For SELECT and SELECT_CC nodes, we do a similar check for any // to-be-promoted comparison inputs. if (PromOp.getOpcode() == ISD::SELECT || PromOp.getOpcode() == ISD::SELECT_CC) { if ((SelectTruncOp[0].count(PromOp.getNode()) && PromOp.getOperand(0).getValueType() != N->getValueType(0)) || (SelectTruncOp[1].count(PromOp.getNode()) && PromOp.getOperand(1).getValueType() != N->getValueType(0))) { PromOpHandles.emplace_front(PromOp); continue; } } SmallVector Ops(PromOp.getNode()->op_begin(), PromOp.getNode()->op_end()); // If this node has constant inputs, then they'll need to be promoted here. for (unsigned i = 0; i < 2; ++i) { if (!isa(Ops[C+i])) continue; if (Ops[C+i].getValueType() == N->getValueType(0)) continue; if (N->getOpcode() == ISD::SIGN_EXTEND) Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); else if (N->getOpcode() == ISD::ZERO_EXTEND) Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); else Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); } // If we've promoted the comparison inputs of a SELECT or SELECT_CC, // truncate them again to the original value type. if (PromOp.getOpcode() == ISD::SELECT || PromOp.getOpcode() == ISD::SELECT_CC) { auto SI0 = SelectTruncOp[0].find(PromOp.getNode()); if (SI0 != SelectTruncOp[0].end()) Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]); auto SI1 = SelectTruncOp[1].find(PromOp.getNode()); if (SI1 != SelectTruncOp[1].end()) Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]); } DAG.ReplaceAllUsesOfValueWith(PromOp, DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops)); } // Now we're left with the initial extension itself. if (!ReallyNeedsExt) return N->getOperand(0); // To zero extend, just mask off everything except for the first bit (in the // i1 case). if (N->getOpcode() == ISD::ZERO_EXTEND) return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0), DAG.getConstant(APInt::getLowBitsSet( N->getValueSizeInBits(0), PromBits), dl, N->getValueType(0))); assert(N->getOpcode() == ISD::SIGN_EXTEND && "Invalid extension type"); EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout()); SDValue ShiftCst = DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy); return DAG.getNode( ISD::SRA, dl, N->getValueType(0), DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst), ShiftCst); } SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N, DAGCombinerInfo &DCI) const { assert(N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"); SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); if (N->getValueType(0) != MVT::v2f64 || !Subtarget.hasVSX()) return SDValue(); // Looking for: // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1)) if (N->getOperand(0).getOpcode() != ISD::SINT_TO_FP && N->getOperand(0).getOpcode() != ISD::UINT_TO_FP) return SDValue(); if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP && N->getOperand(1).getOpcode() != ISD::UINT_TO_FP) return SDValue(); if (N->getOperand(0).getOpcode() != N->getOperand(1).getOpcode()) return SDValue(); SDValue Ext1 = N->getOperand(0).getOperand(0); SDValue Ext2 = N->getOperand(1).getOperand(0); if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT || Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT) return SDValue(); ConstantSDNode *Ext1Op = dyn_cast(Ext1.getOperand(1)); ConstantSDNode *Ext2Op = dyn_cast(Ext2.getOperand(1)); if (!Ext1Op || !Ext2Op) return SDValue(); if (Ext1.getValueType() != MVT::i32 || Ext2.getValueType() != MVT::i32) if (Ext1.getOperand(0) != Ext2.getOperand(0)) return SDValue(); int FirstElem = Ext1Op->getZExtValue(); int SecondElem = Ext2Op->getZExtValue(); int SubvecIdx; if (FirstElem == 0 && SecondElem == 1) SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0; else if (FirstElem == 2 && SecondElem == 3) SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1; else return SDValue(); SDValue SrcVec = Ext1.getOperand(0); auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ? PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP; return DAG.getNode(NodeType, dl, MVT::v2f64, SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl)); } SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const { assert((N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() == ISD::UINT_TO_FP) && "Need an int -> FP conversion node here"); if (!Subtarget.has64BitSupport()) return SDValue(); SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); SDValue Op(N, 0); // Don't handle ppc_fp128 here or i1 conversions. if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) return SDValue(); if (Op.getOperand(0).getValueType() == MVT::i1) return SDValue(); // For i32 intermediate values, unfortunately, the conversion functions // leave the upper 32 bits of the value are undefined. Within the set of // scalar instructions, we have no method for zero- or sign-extending the // value. Thus, we cannot handle i32 intermediate values here. if (Op.getOperand(0).getValueType() == MVT::i32) return SDValue(); assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && "UINT_TO_FP is supported only with FPCVT"); // If we have FCFIDS, then use it when converting to single-precision. // Otherwise, convert to double-precision and then round. unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS : PPCISD::FCFIDS) : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU : PPCISD::FCFID); MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? MVT::f32 : MVT::f64; // If we're converting from a float, to an int, and back to a float again, // then we don't need the store/load pair at all. if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT && Subtarget.hasFPCVT()) || (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) { SDValue Src = Op.getOperand(0).getOperand(0); if (Src.getValueType() == MVT::f32) { Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); DCI.AddToWorklist(Src.getNode()); } else if (Src.getValueType() != MVT::f64) { // Make sure that we don't pick up a ppc_fp128 source value. return SDValue(); } unsigned FCTOp = Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ; SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src); SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp); if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) { FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0, dl)); DCI.AddToWorklist(FP.getNode()); } return FP; } return SDValue(); } // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for // builtins) into loads with swaps. SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); SDValue Chain; SDValue Base; MachineMemOperand *MMO; switch (N->getOpcode()) { default: llvm_unreachable("Unexpected opcode for little endian VSX load"); case ISD::LOAD: { LoadSDNode *LD = cast(N); Chain = LD->getChain(); Base = LD->getBasePtr(); MMO = LD->getMemOperand(); // If the MMO suggests this isn't a load of a full vector, leave // things alone. For a built-in, we have to make the change for // correctness, so if there is a size problem that will be a bug. if (MMO->getSize() < 16) return SDValue(); break; } case ISD::INTRINSIC_W_CHAIN: { MemIntrinsicSDNode *Intrin = cast(N); Chain = Intrin->getChain(); // Similarly to the store case below, Intrin->getBasePtr() doesn't get // us what we want. Get operand 2 instead. Base = Intrin->getOperand(2); MMO = Intrin->getMemOperand(); break; } } MVT VecTy = N->getValueType(0).getSimpleVT(); SDValue LoadOps[] = { Chain, Base }; SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl, DAG.getVTList(MVT::v2f64, MVT::Other), LoadOps, MVT::v2f64, MMO); DCI.AddToWorklist(Load.getNode()); Chain = Load.getValue(1); SDValue Swap = DAG.getNode( PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load); DCI.AddToWorklist(Swap.getNode()); // Add a bitcast if the resulting load type doesn't match v2f64. if (VecTy != MVT::v2f64) { SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap); DCI.AddToWorklist(N.getNode()); // Package {bitcast value, swap's chain} to match Load's shape. return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other), N, Swap.getValue(1)); } return Swap; } // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for // builtins) into stores with swaps. SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); SDValue Chain; SDValue Base; unsigned SrcOpnd; MachineMemOperand *MMO; switch (N->getOpcode()) { default: llvm_unreachable("Unexpected opcode for little endian VSX store"); case ISD::STORE: { StoreSDNode *ST = cast(N); Chain = ST->getChain(); Base = ST->getBasePtr(); MMO = ST->getMemOperand(); SrcOpnd = 1; // If the MMO suggests this isn't a store of a full vector, leave // things alone. For a built-in, we have to make the change for // correctness, so if there is a size problem that will be a bug. if (MMO->getSize() < 16) return SDValue(); break; } case ISD::INTRINSIC_VOID: { MemIntrinsicSDNode *Intrin = cast(N); Chain = Intrin->getChain(); // Intrin->getBasePtr() oddly does not get what we want. Base = Intrin->getOperand(3); MMO = Intrin->getMemOperand(); SrcOpnd = 2; break; } } SDValue Src = N->getOperand(SrcOpnd); MVT VecTy = Src.getValueType().getSimpleVT(); // All stores are done as v2f64 and possible bit cast. if (VecTy != MVT::v2f64) { Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src); DCI.AddToWorklist(Src.getNode()); } SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src); DCI.AddToWorklist(Swap.getNode()); Chain = Swap.getValue(1); SDValue StoreOps[] = { Chain, Swap, Base }; SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl, DAG.getVTList(MVT::Other), StoreOps, VecTy, MMO); DCI.AddToWorklist(Store.getNode()); return Store; } SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); switch (N->getOpcode()) { default: break; case PPCISD::SHL: if (isNullConstant(N->getOperand(0))) // 0 << V -> 0. return N->getOperand(0); break; case PPCISD::SRL: if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0. return N->getOperand(0); break; case PPCISD::SRA: if (ConstantSDNode *C = dyn_cast(N->getOperand(0))) { if (C->isNullValue() || // 0 >>s V -> 0. C->isAllOnesValue()) // -1 >>s V -> -1. return N->getOperand(0); } break; case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: return DAGCombineExtBoolTrunc(N, DCI); case ISD::TRUNCATE: case ISD::SETCC: case ISD::SELECT_CC: return DAGCombineTruncBoolExt(N, DCI); case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: return combineFPToIntToFP(N, DCI); case ISD::STORE: { // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)). if (Subtarget.hasSTFIWX() && !cast(N)->isTruncatingStore() && N->getOperand(1).getOpcode() == ISD::FP_TO_SINT && N->getOperand(1).getValueType() == MVT::i32 && N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) { SDValue Val = N->getOperand(1).getOperand(0); if (Val.getValueType() == MVT::f32) { Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val); DCI.AddToWorklist(Val.getNode()); } Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val); DCI.AddToWorklist(Val.getNode()); SDValue Ops[] = { N->getOperand(0), Val, N->getOperand(2), DAG.getValueType(N->getOperand(1).getValueType()) }; Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, DAG.getVTList(MVT::Other), Ops, cast(N)->getMemoryVT(), cast(N)->getMemOperand()); DCI.AddToWorklist(Val.getNode()); return Val; } // Turn STORE (BSWAP) -> sthbrx/stwbrx. if (cast(N)->isUnindexed() && N->getOperand(1).getOpcode() == ISD::BSWAP && N->getOperand(1).getNode()->hasOneUse() && (N->getOperand(1).getValueType() == MVT::i32 || N->getOperand(1).getValueType() == MVT::i16 || (Subtarget.hasLDBRX() && Subtarget.isPPC64() && N->getOperand(1).getValueType() == MVT::i64))) { SDValue BSwapOp = N->getOperand(1).getOperand(0); // Do an any-extend to 32-bits if this is a half-word input. if (BSwapOp.getValueType() == MVT::i16) BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp); SDValue Ops[] = { N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(N->getOperand(1).getValueType()) }; return DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other), Ops, cast(N)->getMemoryVT(), cast(N)->getMemOperand()); } // For little endian, VSX stores require generating xxswapd/lxvd2x. EVT VT = N->getOperand(1).getValueType(); if (VT.isSimple()) { MVT StoreVT = VT.getSimpleVT(); if (Subtarget.hasVSX() && Subtarget.isLittleEndian() && (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 || StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32)) return expandVSXStoreForLE(N, DCI); } break; } case ISD::LOAD: { LoadSDNode *LD = cast(N); EVT VT = LD->getValueType(0); // For little endian, VSX loads require generating lxvd2x/xxswapd. if (VT.isSimple()) { MVT LoadVT = VT.getSimpleVT(); if (Subtarget.hasVSX() && Subtarget.isLittleEndian() && (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 || LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32)) return expandVSXLoadForLE(N, DCI); } // We sometimes end up with a 64-bit integer load, from which we extract // two single-precision floating-point numbers. This happens with // std::complex, and other similar structures, because of the way we // canonicalize structure copies. However, if we lack direct moves, // then the final bitcasts from the extracted integer values to the // floating-point numbers turn into store/load pairs. Even with direct moves, // just loading the two floating-point numbers is likely better. auto ReplaceTwoFloatLoad = [&]() { if (VT != MVT::i64) return false; if (LD->getExtensionType() != ISD::NON_EXTLOAD || LD->isVolatile()) return false; // We're looking for a sequence like this: // t13: i64,ch = load t0, t6, undef:i64 // t16: i64 = srl t13, Constant:i32<32> // t17: i32 = truncate t16 // t18: f32 = bitcast t17 // t19: i32 = truncate t13 // t20: f32 = bitcast t19 if (!LD->hasNUsesOfValue(2, 0)) return false; auto UI = LD->use_begin(); while (UI.getUse().getResNo() != 0) ++UI; SDNode *Trunc = *UI++; while (UI.getUse().getResNo() != 0) ++UI; SDNode *RightShift = *UI; if (Trunc->getOpcode() != ISD::TRUNCATE) std::swap(Trunc, RightShift); if (Trunc->getOpcode() != ISD::TRUNCATE || Trunc->getValueType(0) != MVT::i32 || !Trunc->hasOneUse()) return false; if (RightShift->getOpcode() != ISD::SRL || !isa(RightShift->getOperand(1)) || RightShift->getConstantOperandVal(1) != 32 || !RightShift->hasOneUse()) return false; SDNode *Trunc2 = *RightShift->use_begin(); if (Trunc2->getOpcode() != ISD::TRUNCATE || Trunc2->getValueType(0) != MVT::i32 || !Trunc2->hasOneUse()) return false; SDNode *Bitcast = *Trunc->use_begin(); SDNode *Bitcast2 = *Trunc2->use_begin(); if (Bitcast->getOpcode() != ISD::BITCAST || Bitcast->getValueType(0) != MVT::f32) return false; if (Bitcast2->getOpcode() != ISD::BITCAST || Bitcast2->getValueType(0) != MVT::f32) return false; if (Subtarget.isLittleEndian()) std::swap(Bitcast, Bitcast2); // Bitcast has the second float (in memory-layout order) and Bitcast2 // has the first one. SDValue BasePtr = LD->getBasePtr(); if (LD->isIndexed()) { assert(LD->getAddressingMode() == ISD::PRE_INC && "Non-pre-inc AM on PPC?"); BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, LD->getOffset()); } auto MMOFlags = LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile; SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr, LD->getPointerInfo(), LD->getAlignment(), MMOFlags, LD->getAAInfo()); SDValue AddPtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, DAG.getIntPtrConstant(4, dl)); SDValue FloatLoad2 = DAG.getLoad( MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr, LD->getPointerInfo().getWithOffset(4), MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo()); if (LD->isIndexed()) { // Note that DAGCombine should re-form any pre-increment load(s) from // what is produced here if that makes sense. DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr); } DCI.CombineTo(Bitcast2, FloatLoad); DCI.CombineTo(Bitcast, FloatLoad2); DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1), SDValue(FloatLoad2.getNode(), 1)); return true; }; if (ReplaceTwoFloatLoad()) return SDValue(N, 0); EVT MemVT = LD->getMemoryVT(); Type *Ty = MemVT.getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty); Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext()); unsigned ScalarABIAlignment = DAG.getDataLayout().getABITypeAlignment(STy); if (LD->isUnindexed() && VT.isVector() && ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) && // P8 and later hardware should just use LOAD. !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v4f32)) || (Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) && LD->getAlignment() >= ScalarABIAlignment)) && LD->getAlignment() < ABIAlignment) { // This is a type-legal unaligned Altivec or QPX load. SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); bool isLittleEndian = Subtarget.isLittleEndian(); // This implements the loading of unaligned vectors as described in // the venerable Apple Velocity Engine overview. Specifically: // https://developer.apple.com/hardwaredrivers/ve/alignment.html // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html // // The general idea is to expand a sequence of one or more unaligned // loads into an alignment-based permutation-control instruction (lvsl // or lvsr), a series of regular vector loads (which always truncate // their input address to an aligned address), and a series of // permutations. The results of these permutations are the requested // loaded values. The trick is that the last "extra" load is not taken // from the address you might suspect (sizeof(vector) bytes after the // last requested load), but rather sizeof(vector) - 1 bytes after the // last requested vector. The point of this is to avoid a page fault if // the base address happened to be aligned. This works because if the // base address is aligned, then adding less than a full vector length // will cause the last vector in the sequence to be (re)loaded. // Otherwise, the next vector will be fetched as you might suspect was // necessary. // We might be able to reuse the permutation generation from // a different base address offset from this one by an aligned amount. // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this // optimization later. Intrinsic::ID Intr, IntrLD, IntrPerm; MVT PermCntlTy, PermTy, LDTy; if (Subtarget.hasAltivec()) { Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr : Intrinsic::ppc_altivec_lvsl; IntrLD = Intrinsic::ppc_altivec_lvx; IntrPerm = Intrinsic::ppc_altivec_vperm; PermCntlTy = MVT::v16i8; PermTy = MVT::v4i32; LDTy = MVT::v4i32; } else { Intr = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld : Intrinsic::ppc_qpx_qvlpcls; IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd : Intrinsic::ppc_qpx_qvlfs; IntrPerm = Intrinsic::ppc_qpx_qvfperm; PermCntlTy = MVT::v4f64; PermTy = MVT::v4f64; LDTy = MemVT.getSimpleVT(); } SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy); // Create the new MMO for the new base load. It is like the original MMO, // but represents an area in memory almost twice the vector size centered // on the original address. If the address is unaligned, we might start // reading up to (sizeof(vector)-1) bytes below the address of the // original unaligned load. MachineFunction &MF = DAG.getMachineFunction(); MachineMemOperand *BaseMMO = MF.getMachineMemOperand(LD->getMemOperand(), -(long)MemVT.getStoreSize()+1, 2*MemVT.getStoreSize()-1); // Create the new base load. SDValue LDXIntID = DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout())); SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr }; SDValue BaseLoad = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, DAG.getVTList(PermTy, MVT::Other), BaseLoadOps, LDTy, BaseMMO); // Note that the value of IncOffset (which is provided to the next // load's pointer info offset value, and thus used to calculate the // alignment), and the value of IncValue (which is actually used to // increment the pointer value) are different! This is because we // require the next load to appear to be aligned, even though it // is actually offset from the base pointer by a lesser amount. int IncOffset = VT.getSizeInBits() / 8; int IncValue = IncOffset; // Walk (both up and down) the chain looking for another load at the real // (aligned) offset (the alignment of the other load does not matter in // this case). If found, then do not use the offset reduction trick, as // that will prevent the loads from being later combined (as they would // otherwise be duplicates). if (!findConsecutiveLoad(LD, DAG)) --IncValue; SDValue Increment = DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout())); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); MachineMemOperand *ExtraMMO = MF.getMachineMemOperand(LD->getMemOperand(), 1, 2*MemVT.getStoreSize()-1); SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr }; SDValue ExtraLoad = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, DAG.getVTList(PermTy, MVT::Other), ExtraLoadOps, LDTy, ExtraMMO); SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, BaseLoad.getValue(1), ExtraLoad.getValue(1)); // Because vperm has a big-endian bias, we must reverse the order // of the input vectors and complement the permute control vector // when generating little endian code. We have already handled the // latter by using lvsr instead of lvsl, so just reverse BaseLoad // and ExtraLoad here. SDValue Perm; if (isLittleEndian) Perm = BuildIntrinsicOp(IntrPerm, ExtraLoad, BaseLoad, PermCntl, DAG, dl); else Perm = BuildIntrinsicOp(IntrPerm, BaseLoad, ExtraLoad, PermCntl, DAG, dl); if (VT != PermTy) Perm = Subtarget.hasAltivec() ? DAG.getNode(ISD::BITCAST, dl, VT, Perm) : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX DAG.getTargetConstant(1, dl, MVT::i64)); // second argument is 1 because this rounding // is always exact. // The output of the permutation is our loaded result, the TokenFactor is // our new chain. DCI.CombineTo(N, Perm, TF); return SDValue(N, 0); } } break; case ISD::INTRINSIC_WO_CHAIN: { bool isLittleEndian = Subtarget.isLittleEndian(); unsigned IID = cast(N->getOperand(0))->getZExtValue(); Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr : Intrinsic::ppc_altivec_lvsl); if ((IID == Intr || IID == Intrinsic::ppc_qpx_qvlpcld || IID == Intrinsic::ppc_qpx_qvlpcls) && N->getOperand(1)->getOpcode() == ISD::ADD) { SDValue Add = N->getOperand(1); int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ? 5 /* 32 byte alignment */ : 4 /* 16 byte alignment */; if (DAG.MaskedValueIsZero( Add->getOperand(1), APInt::getAllOnesValue(Bits /* alignment */) .zext( Add.getValueType().getScalarType().getSizeInBits()))) { SDNode *BasePtr = Add->getOperand(0).getNode(); for (SDNode::use_iterator UI = BasePtr->use_begin(), UE = BasePtr->use_end(); UI != UE; ++UI) { if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && cast(UI->getOperand(0))->getZExtValue() == IID) { // We've found another LVSL/LVSR, and this address is an aligned // multiple of that one. The results will be the same, so use the // one we've just found instead. return SDValue(*UI, 0); } } } if (isa(Add->getOperand(1))) { SDNode *BasePtr = Add->getOperand(0).getNode(); for (SDNode::use_iterator UI = BasePtr->use_begin(), UE = BasePtr->use_end(); UI != UE; ++UI) { if (UI->getOpcode() == ISD::ADD && isa(UI->getOperand(1)) && (cast(Add->getOperand(1))->getZExtValue() - cast(UI->getOperand(1))->getZExtValue()) % (1ULL << Bits) == 0) { SDNode *OtherAdd = *UI; for (SDNode::use_iterator VI = OtherAdd->use_begin(), VE = OtherAdd->use_end(); VI != VE; ++VI) { if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && cast(VI->getOperand(0))->getZExtValue() == IID) { return SDValue(*VI, 0); } } } } } } } break; case ISD::INTRINSIC_W_CHAIN: { // For little endian, VSX loads require generating lxvd2x/xxswapd. if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) { switch (cast(N->getOperand(1))->getZExtValue()) { default: break; case Intrinsic::ppc_vsx_lxvw4x: case Intrinsic::ppc_vsx_lxvd2x: return expandVSXLoadForLE(N, DCI); } } break; } case ISD::INTRINSIC_VOID: { // For little endian, VSX stores require generating xxswapd/stxvd2x. if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) { switch (cast(N->getOperand(1))->getZExtValue()) { default: break; case Intrinsic::ppc_vsx_stxvw4x: case Intrinsic::ppc_vsx_stxvd2x: return expandVSXStoreForLE(N, DCI); } } break; } case ISD::BSWAP: // Turn BSWAP (LOAD) -> lhbrx/lwbrx. if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && N->getOperand(0).hasOneUse() && (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 || (Subtarget.hasLDBRX() && Subtarget.isPPC64() && N->getValueType(0) == MVT::i64))) { SDValue Load = N->getOperand(0); LoadSDNode *LD = cast(Load); // Create the byte-swapping load. SDValue Ops[] = { LD->getChain(), // Chain LD->getBasePtr(), // Ptr DAG.getValueType(N->getValueType(0)) // VT }; SDValue BSLoad = DAG.getMemIntrinsicNode(PPCISD::LBRX, dl, DAG.getVTList(N->getValueType(0) == MVT::i64 ? MVT::i64 : MVT::i32, MVT::Other), Ops, LD->getMemoryVT(), LD->getMemOperand()); // If this is an i16 load, insert the truncate. SDValue ResVal = BSLoad; if (N->getValueType(0) == MVT::i16) ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad); // First, combine the bswap away. This makes the value produced by the // load dead. DCI.CombineTo(N, ResVal); // Next, combine the load away, we give it a bogus result value but a real // chain result. The result value is dead because the bswap is dead. DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1)); // Return N so it doesn't get rechecked! return SDValue(N, 0); } break; case PPCISD::VCMP: { // If a VCMPo node already exists with exactly the same operands as this // node, use its result instead of this node (VCMPo computes both a CR6 and // a normal output). // if (!N->getOperand(0).hasOneUse() && !N->getOperand(1).hasOneUse() && !N->getOperand(2).hasOneUse()) { // Scan all of the users of the LHS, looking for VCMPo's that match. SDNode *VCMPoNode = nullptr; SDNode *LHSN = N->getOperand(0).getNode(); for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end(); UI != E; ++UI) if (UI->getOpcode() == PPCISD::VCMPo && UI->getOperand(1) == N->getOperand(1) && UI->getOperand(2) == N->getOperand(2) && UI->getOperand(0) == N->getOperand(0)) { VCMPoNode = *UI; break; } // If there is no VCMPo node, or if the flag value has a single use, don't // transform this. if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1)) break; // Look at the (necessarily single) use of the flag value. If it has a // chain, this transformation is more complex. Note that multiple things // could use the value result, which we should ignore. SDNode *FlagUser = nullptr; for (SDNode::use_iterator UI = VCMPoNode->use_begin(); FlagUser == nullptr; ++UI) { assert(UI != VCMPoNode->use_end() && "Didn't find user!"); SDNode *User = *UI; for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { if (User->getOperand(i) == SDValue(VCMPoNode, 1)) { FlagUser = User; break; } } } // If the user is a MFOCRF instruction, we know this is safe. // Otherwise we give up for right now. if (FlagUser->getOpcode() == PPCISD::MFOCRF) return SDValue(VCMPoNode, 0); } break; } case ISD::BRCOND: { SDValue Cond = N->getOperand(1); SDValue Target = N->getOperand(2); if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN && cast(Cond.getOperand(1))->getZExtValue() == Intrinsic::ppc_is_decremented_ctr_nonzero) { // We now need to make the intrinsic dead (it cannot be instruction // selected). DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0)); assert(Cond.getNode()->hasOneUse() && "Counter decrement has more than one use"); return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other, N->getOperand(0), Target); } } break; case ISD::BR_CC: { // If this is a branch on an altivec predicate comparison, lower this so // that we don't have to do a MFOCRF: instead, branch directly on CR6. This // lowering is done pre-legalize, because the legalizer lowers the predicate // compare down to code that is difficult to reassemble. ISD::CondCode CC = cast(N->getOperand(1))->get(); SDValue LHS = N->getOperand(2), RHS = N->getOperand(3); // Sometimes the promoted value of the intrinsic is ANDed by some non-zero // value. If so, pass-through the AND to get to the intrinsic. if (LHS.getOpcode() == ISD::AND && LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN && cast(LHS.getOperand(0).getOperand(1))->getZExtValue() == Intrinsic::ppc_is_decremented_ctr_nonzero && isa(LHS.getOperand(1)) && !isNullConstant(LHS.getOperand(1))) LHS = LHS.getOperand(0); if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN && cast(LHS.getOperand(1))->getZExtValue() == Intrinsic::ppc_is_decremented_ctr_nonzero && isa(RHS)) { assert((CC == ISD::SETEQ || CC == ISD::SETNE) && "Counter decrement comparison is not EQ or NE"); unsigned Val = cast(RHS)->getZExtValue(); bool isBDNZ = (CC == ISD::SETEQ && Val) || (CC == ISD::SETNE && !Val); // We now need to make the intrinsic dead (it cannot be instruction // selected). DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0)); assert(LHS.getNode()->hasOneUse() && "Counter decrement has more than one use"); return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other, N->getOperand(0), N->getOperand(4)); } int CompareOpc; bool isDot; if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN && isa(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) && getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) { assert(isDot && "Can't compare against a vector result!"); // If this is a comparison against something other than 0/1, then we know // that the condition is never/always true. unsigned Val = cast(RHS)->getZExtValue(); if (Val != 0 && Val != 1) { if (CC == ISD::SETEQ) // Cond never true, remove branch. return N->getOperand(0); // Always !=, turn it into an unconditional branch. return DAG.getNode(ISD::BR, dl, MVT::Other, N->getOperand(0), N->getOperand(4)); } bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0); // Create the PPCISD altivec 'dot' comparison node. SDValue Ops[] = { LHS.getOperand(2), // LHS of compare LHS.getOperand(3), // RHS of compare DAG.getConstant(CompareOpc, dl, MVT::i32) }; EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue }; SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops); // Unpack the result based on how the target uses it. PPC::Predicate CompOpc; switch (cast(LHS.getOperand(1))->getZExtValue()) { default: // Can't happen, don't crash on invalid number though. case 0: // Branch on the value of the EQ bit of CR6. CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE; break; case 1: // Branch on the inverted value of the EQ bit of CR6. CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ; break; case 2: // Branch on the value of the LT bit of CR6. CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE; break; case 3: // Branch on the inverted value of the LT bit of CR6. CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT; break; } return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0), DAG.getConstant(CompOpc, dl, MVT::i32), DAG.getRegister(PPC::CR6, MVT::i32), N->getOperand(4), CompNode.getValue(1)); } break; } case ISD::BUILD_VECTOR: return DAGCombineBuildVector(N, DCI); } return SDValue(); } SDValue PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, std::vector *Created) const { // fold (sdiv X, pow2) EVT VT = N->getValueType(0); if (VT == MVT::i64 && !Subtarget.isPPC64()) return SDValue(); if ((VT != MVT::i32 && VT != MVT::i64) || !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2())) return SDValue(); SDLoc DL(N); SDValue N0 = N->getOperand(0); bool IsNegPow2 = (-Divisor).isPowerOf2(); unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros(); SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT); SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt); if (Created) Created->push_back(Op.getNode()); if (IsNegPow2) { Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op); if (Created) Created->push_back(Op.getNode()); } return Op; } //===----------------------------------------------------------------------===// // Inline Assembly Support //===----------------------------------------------------------------------===// void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth) const { KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0); switch (Op.getOpcode()) { default: break; case PPCISD::LBRX: { // lhbrx is known to have the top bits cleared out. if (cast(Op.getOperand(2))->getVT() == MVT::i16) KnownZero = 0xFFFF0000; break; } case ISD::INTRINSIC_WO_CHAIN: { switch (cast(Op.getOperand(0))->getZExtValue()) { default: break; case Intrinsic::ppc_altivec_vcmpbfp_p: case Intrinsic::ppc_altivec_vcmpeqfp_p: case Intrinsic::ppc_altivec_vcmpequb_p: case Intrinsic::ppc_altivec_vcmpequh_p: case Intrinsic::ppc_altivec_vcmpequw_p: case Intrinsic::ppc_altivec_vcmpequd_p: case Intrinsic::ppc_altivec_vcmpgefp_p: case Intrinsic::ppc_altivec_vcmpgtfp_p: case Intrinsic::ppc_altivec_vcmpgtsb_p: case Intrinsic::ppc_altivec_vcmpgtsh_p: case Intrinsic::ppc_altivec_vcmpgtsw_p: case Intrinsic::ppc_altivec_vcmpgtsd_p: case Intrinsic::ppc_altivec_vcmpgtub_p: case Intrinsic::ppc_altivec_vcmpgtuh_p: case Intrinsic::ppc_altivec_vcmpgtuw_p: case Intrinsic::ppc_altivec_vcmpgtud_p: KnownZero = ~1U; // All bits but the low one are known to be zero. break; } } } } unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { switch (Subtarget.getDarwinDirective()) { default: break; case PPC::DIR_970: case PPC::DIR_PWR4: case PPC::DIR_PWR5: case PPC::DIR_PWR5X: case PPC::DIR_PWR6: case PPC::DIR_PWR6X: case PPC::DIR_PWR7: case PPC::DIR_PWR8: case PPC::DIR_PWR9: { if (!ML) break; const PPCInstrInfo *TII = Subtarget.getInstrInfo(); // For small loops (between 5 and 8 instructions), align to a 32-byte // boundary so that the entire loop fits in one instruction-cache line. uint64_t LoopSize = 0; for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I) for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) { LoopSize += TII->GetInstSizeInBytes(*J); if (LoopSize > 32) break; } if (LoopSize > 16 && LoopSize <= 32) return 5; break; } } return TargetLowering::getPrefLoopAlignment(ML); } /// getConstraintType - Given a constraint, return the type of /// constraint it is for this target. PPCTargetLowering::ConstraintType PPCTargetLowering::getConstraintType(StringRef Constraint) const { if (Constraint.size() == 1) { switch (Constraint[0]) { default: break; case 'b': case 'r': case 'f': case 'd': case 'v': case 'y': return C_RegisterClass; case 'Z': // FIXME: While Z does indicate a memory constraint, it specifically // indicates an r+r address (used in conjunction with the 'y' modifier // in the replacement string). Currently, we're forcing the base // register to be r0 in the asm printer (which is interpreted as zero) // and forming the complete address in the second register. This is // suboptimal. return C_Memory; } } else if (Constraint == "wc") { // individual CR bits. return C_RegisterClass; } else if (Constraint == "wa" || Constraint == "wd" || Constraint == "wf" || Constraint == "ws") { return C_RegisterClass; // VSX registers. } return TargetLowering::getConstraintType(Constraint); } /// Examine constraint type and operand type and determine a weight value. /// This object must already have been set up with the operand type /// and the current alternative constraint selected. TargetLowering::ConstraintWeight PPCTargetLowering::getSingleConstraintMatchWeight( AsmOperandInfo &info, const char *constraint) const { ConstraintWeight weight = CW_Invalid; Value *CallOperandVal = info.CallOperandVal; // If we don't have a value, we can't do a match, // but allow it at the lowest weight. if (!CallOperandVal) return CW_Default; Type *type = CallOperandVal->getType(); // Look at the constraint type. if (StringRef(constraint) == "wc" && type->isIntegerTy(1)) return CW_Register; // an individual CR bit. else if ((StringRef(constraint) == "wa" || StringRef(constraint) == "wd" || StringRef(constraint) == "wf") && type->isVectorTy()) return CW_Register; else if (StringRef(constraint) == "ws" && type->isDoubleTy()) return CW_Register; switch (*constraint) { default: weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); break; case 'b': if (type->isIntegerTy()) weight = CW_Register; break; case 'f': if (type->isFloatTy()) weight = CW_Register; break; case 'd': if (type->isDoubleTy()) weight = CW_Register; break; case 'v': if (type->isVectorTy()) weight = CW_Register; break; case 'y': weight = CW_Register; break; case 'Z': weight = CW_Memory; break; } return weight; } std::pair PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { if (Constraint.size() == 1) { // GCC RS6000 Constraint Letters switch (Constraint[0]) { case 'b': // R1-R31 if (VT == MVT::i64 && Subtarget.isPPC64()) return std::make_pair(0U, &PPC::G8RC_NOX0RegClass); return std::make_pair(0U, &PPC::GPRC_NOR0RegClass); case 'r': // R0-R31 if (VT == MVT::i64 && Subtarget.isPPC64()) return std::make_pair(0U, &PPC::G8RCRegClass); return std::make_pair(0U, &PPC::GPRCRegClass); // 'd' and 'f' constraints are both defined to be "the floating point // registers", where one is for 32-bit and the other for 64-bit. We don't // really care overly much here so just give them all the same reg classes. case 'd': case 'f': if (VT == MVT::f32 || VT == MVT::i32) return std::make_pair(0U, &PPC::F4RCRegClass); if (VT == MVT::f64 || VT == MVT::i64) return std::make_pair(0U, &PPC::F8RCRegClass); if (VT == MVT::v4f64 && Subtarget.hasQPX()) return std::make_pair(0U, &PPC::QFRCRegClass); if (VT == MVT::v4f32 && Subtarget.hasQPX()) return std::make_pair(0U, &PPC::QSRCRegClass); break; case 'v': if (VT == MVT::v4f64 && Subtarget.hasQPX()) return std::make_pair(0U, &PPC::QFRCRegClass); if (VT == MVT::v4f32 && Subtarget.hasQPX()) return std::make_pair(0U, &PPC::QSRCRegClass); if (Subtarget.hasAltivec()) return std::make_pair(0U, &PPC::VRRCRegClass); case 'y': // crrc return std::make_pair(0U, &PPC::CRRCRegClass); } } else if (Constraint == "wc" && Subtarget.useCRBits()) { // An individual CR bit. return std::make_pair(0U, &PPC::CRBITRCRegClass); } else if ((Constraint == "wa" || Constraint == "wd" || Constraint == "wf") && Subtarget.hasVSX()) { return std::make_pair(0U, &PPC::VSRCRegClass); } else if (Constraint == "ws" && Subtarget.hasVSX()) { if (VT == MVT::f32 && Subtarget.hasP8Vector()) return std::make_pair(0U, &PPC::VSSRCRegClass); else return std::make_pair(0U, &PPC::VSFRCRegClass); } std::pair R = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers // (which we call X[0-9]+). If a 64-bit value has been requested, and a // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent // register. // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use // the AsmName field from *RegisterInfo.td, then this would not be necessary. if (R.first && VT == MVT::i64 && Subtarget.isPPC64() && PPC::GPRCRegClass.contains(R.first)) return std::make_pair(TRI->getMatchingSuperReg(R.first, PPC::sub_32, &PPC::G8RCRegClass), &PPC::G8RCRegClass); // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same. if (!R.second && StringRef("{cc}").equals_lower(Constraint)) { R.first = PPC::CR0; R.second = &PPC::CRRCRegClass; } return R; } /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector&Ops, SelectionDAG &DAG) const { SDValue Result; // Only support length 1 constraints. if (Constraint.length() > 1) return; char Letter = Constraint[0]; switch (Letter) { default: break; case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': { ConstantSDNode *CST = dyn_cast(Op); if (!CST) return; // Must be an immediate to match. SDLoc dl(Op); int64_t Value = CST->getSExtValue(); EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative // numbers are printed as such. switch (Letter) { default: llvm_unreachable("Unknown constraint letter!"); case 'I': // "I" is a signed 16-bit constant. if (isInt<16>(Value)) Result = DAG.getTargetConstant(Value, dl, TCVT); break; case 'J': // "J" is a constant with only the high-order 16 bits nonzero. if (isShiftedUInt<16, 16>(Value)) Result = DAG.getTargetConstant(Value, dl, TCVT); break; case 'L': // "L" is a signed 16-bit constant shifted left 16 bits. if (isShiftedInt<16, 16>(Value)) Result = DAG.getTargetConstant(Value, dl, TCVT); break; case 'K': // "K" is a constant with only the low-order 16 bits nonzero. if (isUInt<16>(Value)) Result = DAG.getTargetConstant(Value, dl, TCVT); break; case 'M': // "M" is a constant that is greater than 31. if (Value > 31) Result = DAG.getTargetConstant(Value, dl, TCVT); break; case 'N': // "N" is a positive constant that is an exact power of two. if (Value > 0 && isPowerOf2_64(Value)) Result = DAG.getTargetConstant(Value, dl, TCVT); break; case 'O': // "O" is the constant zero. if (Value == 0) Result = DAG.getTargetConstant(Value, dl, TCVT); break; case 'P': // "P" is a constant whose negation is a signed 16-bit constant. if (isInt<16>(-Value)) Result = DAG.getTargetConstant(Value, dl, TCVT); break; } break; } } if (Result.getNode()) { Ops.push_back(Result); return; } // Handle standard constraint letters. TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); } // isLegalAddressingMode - Return true if the addressing mode represented // by AM is legal for this target, for a load/store of the specified type. bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const { // PPC does not allow r+i addressing modes for vectors! if (Ty->isVectorTy() && AM.BaseOffs != 0) return false; // PPC allows a sign-extended 16-bit immediate field. if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) return false; // No global is ever allowed as a base. if (AM.BaseGV) return false; // PPC only support r+r, switch (AM.Scale) { case 0: // "r+i" or just "i", depending on HasBaseReg. break; case 1: if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. return false; // Otherwise we have r+r or r+i. break; case 2: if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. return false; // Allow 2*r as r+r. break; default: // No other scales are supported. return false; } return true; } SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MFI->setReturnAddressIsTaken(true); if (verifyReturnAddressArgumentIsConstant(Op, DAG)) return SDValue(); SDLoc dl(Op); unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); // Make sure the function does not optimize away the store of the RA to // the stack. PPCFunctionInfo *FuncInfo = MF.getInfo(); FuncInfo->setLRStoreRequired(); bool isPPC64 = Subtarget.isPPC64(); auto PtrVT = getPointerTy(MF.getDataLayout()); if (Depth > 0) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); SDValue Offset = DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl, isPPC64 ? MVT::i64 : MVT::i32); return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset), MachinePointerInfo()); } // Just load the return address off the stack. SDValue RetAddrFI = getReturnAddrFrameIndex(DAG); return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI, MachinePointerInfo()); } SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MFI->setFrameAddressIsTaken(true); EVT PtrVT = getPointerTy(MF.getDataLayout()); bool isPPC64 = PtrVT == MVT::i64; // Naked functions never have a frame pointer, and so we use r1. For all // other functions, this decision must be delayed until during PEI. unsigned FrameReg; if (MF.getFunction()->hasFnAttribute(Attribute::Naked)) FrameReg = isPPC64 ? PPC::X1 : PPC::R1; else FrameReg = isPPC64 ? PPC::FP8 : PPC::FP; SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, PtrVT); while (Depth--) FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(), FrameAddr, MachinePointerInfo()); return FrameAddr; } // FIXME? Maybe this could be a TableGen attribute on some registers and // this table could be generated automatically from RegInfo. unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT, SelectionDAG &DAG) const { bool isPPC64 = Subtarget.isPPC64(); bool isDarwinABI = Subtarget.isDarwinABI(); if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) || (!isPPC64 && VT != MVT::i32)) report_fatal_error("Invalid register global variable type"); bool is64Bit = isPPC64 && VT == MVT::i64; unsigned Reg = StringSwitch(RegName) .Case("r1", is64Bit ? PPC::X1 : PPC::R1) .Case("r2", (isDarwinABI || isPPC64) ? 0 : PPC::R2) .Case("r13", (!isPPC64 && isDarwinABI) ? 0 : (is64Bit ? PPC::X13 : PPC::R13)) .Default(0); if (Reg) return Reg; report_fatal_error("Invalid register name global variable"); } bool PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The PowerPC target isn't yet aware of offsets. return false; } bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const { switch (Intrinsic) { case Intrinsic::ppc_qpx_qvlfd: case Intrinsic::ppc_qpx_qvlfs: case Intrinsic::ppc_qpx_qvlfcd: case Intrinsic::ppc_qpx_qvlfcs: case Intrinsic::ppc_qpx_qvlfiwa: case Intrinsic::ppc_qpx_qvlfiwz: case Intrinsic::ppc_altivec_lvx: case Intrinsic::ppc_altivec_lvxl: case Intrinsic::ppc_altivec_lvebx: case Intrinsic::ppc_altivec_lvehx: case Intrinsic::ppc_altivec_lvewx: case Intrinsic::ppc_vsx_lxvd2x: case Intrinsic::ppc_vsx_lxvw4x: { EVT VT; switch (Intrinsic) { case Intrinsic::ppc_altivec_lvebx: VT = MVT::i8; break; case Intrinsic::ppc_altivec_lvehx: VT = MVT::i16; break; case Intrinsic::ppc_altivec_lvewx: VT = MVT::i32; break; case Intrinsic::ppc_vsx_lxvd2x: VT = MVT::v2f64; break; case Intrinsic::ppc_qpx_qvlfd: VT = MVT::v4f64; break; case Intrinsic::ppc_qpx_qvlfs: VT = MVT::v4f32; break; case Intrinsic::ppc_qpx_qvlfcd: VT = MVT::v2f64; break; case Intrinsic::ppc_qpx_qvlfcs: VT = MVT::v2f32; break; default: VT = MVT::v4i32; break; } Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = VT; Info.ptrVal = I.getArgOperand(0); Info.offset = -VT.getStoreSize()+1; Info.size = 2*VT.getStoreSize()-1; Info.align = 1; Info.vol = false; Info.readMem = true; Info.writeMem = false; return true; } case Intrinsic::ppc_qpx_qvlfda: case Intrinsic::ppc_qpx_qvlfsa: case Intrinsic::ppc_qpx_qvlfcda: case Intrinsic::ppc_qpx_qvlfcsa: case Intrinsic::ppc_qpx_qvlfiwaa: case Intrinsic::ppc_qpx_qvlfiwza: { EVT VT; switch (Intrinsic) { case Intrinsic::ppc_qpx_qvlfda: VT = MVT::v4f64; break; case Intrinsic::ppc_qpx_qvlfsa: VT = MVT::v4f32; break; case Intrinsic::ppc_qpx_qvlfcda: VT = MVT::v2f64; break; case Intrinsic::ppc_qpx_qvlfcsa: VT = MVT::v2f32; break; default: VT = MVT::v4i32; break; } Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = VT; Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.size = VT.getStoreSize(); Info.align = 1; Info.vol = false; Info.readMem = true; Info.writeMem = false; return true; } case Intrinsic::ppc_qpx_qvstfd: case Intrinsic::ppc_qpx_qvstfs: case Intrinsic::ppc_qpx_qvstfcd: case Intrinsic::ppc_qpx_qvstfcs: case Intrinsic::ppc_qpx_qvstfiw: case Intrinsic::ppc_altivec_stvx: case Intrinsic::ppc_altivec_stvxl: case Intrinsic::ppc_altivec_stvebx: case Intrinsic::ppc_altivec_stvehx: case Intrinsic::ppc_altivec_stvewx: case Intrinsic::ppc_vsx_stxvd2x: case Intrinsic::ppc_vsx_stxvw4x: { EVT VT; switch (Intrinsic) { case Intrinsic::ppc_altivec_stvebx: VT = MVT::i8; break; case Intrinsic::ppc_altivec_stvehx: VT = MVT::i16; break; case Intrinsic::ppc_altivec_stvewx: VT = MVT::i32; break; case Intrinsic::ppc_vsx_stxvd2x: VT = MVT::v2f64; break; case Intrinsic::ppc_qpx_qvstfd: VT = MVT::v4f64; break; case Intrinsic::ppc_qpx_qvstfs: VT = MVT::v4f32; break; case Intrinsic::ppc_qpx_qvstfcd: VT = MVT::v2f64; break; case Intrinsic::ppc_qpx_qvstfcs: VT = MVT::v2f32; break; default: VT = MVT::v4i32; break; } Info.opc = ISD::INTRINSIC_VOID; Info.memVT = VT; Info.ptrVal = I.getArgOperand(1); Info.offset = -VT.getStoreSize()+1; Info.size = 2*VT.getStoreSize()-1; Info.align = 1; Info.vol = false; Info.readMem = false; Info.writeMem = true; return true; } case Intrinsic::ppc_qpx_qvstfda: case Intrinsic::ppc_qpx_qvstfsa: case Intrinsic::ppc_qpx_qvstfcda: case Intrinsic::ppc_qpx_qvstfcsa: case Intrinsic::ppc_qpx_qvstfiwa: { EVT VT; switch (Intrinsic) { case Intrinsic::ppc_qpx_qvstfda: VT = MVT::v4f64; break; case Intrinsic::ppc_qpx_qvstfsa: VT = MVT::v4f32; break; case Intrinsic::ppc_qpx_qvstfcda: VT = MVT::v2f64; break; case Intrinsic::ppc_qpx_qvstfcsa: VT = MVT::v2f32; break; default: VT = MVT::v4i32; break; } Info.opc = ISD::INTRINSIC_VOID; Info.memVT = VT; Info.ptrVal = I.getArgOperand(1); Info.offset = 0; Info.size = VT.getStoreSize(); Info.align = 1; Info.vol = false; Info.readMem = false; Info.writeMem = true; return true; } default: break; } return false; } /// getOptimalMemOpType - Returns the target specific optimal type for load /// and store operations as a result of memset, memcpy, and memmove /// lowering. If DstAlign is zero that means it's safe to destination /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it /// means there isn't a need to check it against alignment requirement, /// probably because the source does not need to be loaded. If 'IsMemset' is /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy /// source is constant so it does not need to be loaded. /// It returns EVT::Other if the type should be determined using generic /// target-independent logic. EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const { if (getTargetMachine().getOptLevel() != CodeGenOpt::None) { const Function *F = MF.getFunction(); // When expanding a memset, require at least two QPX instructions to cover // the cost of loading the value to be stored from the constant pool. if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) && (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) && !F->hasFnAttribute(Attribute::NoImplicitFloat)) { return MVT::v4f64; } // We should use Altivec/VSX loads and stores when available. For unaligned // addresses, unaligned VSX loads are only fast starting with the P8. if (Subtarget.hasAltivec() && Size >= 16 && (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) || ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector()))) return MVT::v4i32; } if (Subtarget.isPPC64()) { return MVT::i64; } return MVT::i32; } /// \brief Returns true if it is beneficial to convert a load of a constant /// to just the constant itself. bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); return !(BitSize == 0 || BitSize > 64); } bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) return false; unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); return NumBits1 == 64 && NumBits2 == 32; } bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { if (!VT1.isInteger() || !VT2.isInteger()) return false; unsigned NumBits1 = VT1.getSizeInBits(); unsigned NumBits2 = VT2.getSizeInBits(); return NumBits1 == 64 && NumBits2 == 32; } bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { // Generally speaking, zexts are not free, but they are free when they can be // folded with other operations. if (LoadSDNode *LD = dyn_cast(Val)) { EVT MemVT = LD->getMemoryVT(); if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 || (Subtarget.isPPC64() && MemVT == MVT::i32)) && (LD->getExtensionType() == ISD::NON_EXTLOAD || LD->getExtensionType() == ISD::ZEXTLOAD)) return true; } // FIXME: Add other cases... // - 32-bit shifts with a zext to i64 // - zext after ctlz, bswap, etc. // - zext after and by a constant mask return TargetLowering::isZExtFree(Val, VT2); } bool PPCTargetLowering::isFPExtFree(EVT VT) const { assert(VT.isFloatingPoint()); return true; } bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const { return isInt<16>(Imm) || isUInt<16>(Imm); } bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const { return isInt<16>(Imm) || isUInt<16>(Imm); } bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, unsigned, bool *Fast) const { if (DisablePPCUnaligned) return false; // PowerPC supports unaligned memory access for simple non-vector types. // Although accessing unaligned addresses is not as efficient as accessing // aligned addresses, it is generally more efficient than manual expansion, // and generally only traps for software emulation when crossing page // boundaries. if (!VT.isSimple()) return false; if (VT.getSimpleVT().isVector()) { if (Subtarget.hasVSX()) { if (VT != MVT::v2f64 && VT != MVT::v2i64 && VT != MVT::v4f32 && VT != MVT::v4i32) return false; } else { return false; } } if (VT == MVT::ppcf128) return false; if (Fast) *Fast = true; return true; } bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { VT = VT.getScalarType(); if (!VT.isSimple()) return false; switch (VT.getSimpleVT().SimpleTy) { case MVT::f32: case MVT::f64: return true; default: break; } return false; } const MCPhysReg * PPCTargetLowering::getScratchRegisters(CallingConv::ID) const { // LR is a callee-save register, but we must treat it as clobbered by any call // site. Hence we include LR in the scratch registers, which are in turn added // as implicit-defs for stackmaps and patchpoints. The same reasoning applies // to CTR, which is used by any indirect call. static const MCPhysReg ScratchRegs[] = { PPC::X12, PPC::LR8, PPC::CTR8, 0 }; return ScratchRegs; } unsigned PPCTargetLowering::getExceptionPointerRegister( const Constant *PersonalityFn) const { return Subtarget.isPPC64() ? PPC::X3 : PPC::R3; } unsigned PPCTargetLowering::getExceptionSelectorRegister( const Constant *PersonalityFn) const { return Subtarget.isPPC64() ? PPC::X4 : PPC::R4; } bool PPCTargetLowering::shouldExpandBuildVectorWithShuffles( EVT VT , unsigned DefinedValues) const { if (VT == MVT::v2i64) return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves if (Subtarget.hasVSX() || Subtarget.hasQPX()) return true; return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues); } Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const { if (DisableILPPref || Subtarget.enableMachineScheduler()) return TargetLowering::getSchedulingPreference(N); return Sched::ILP; } // Create a fast isel object. FastISel * PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const { return PPC::createFastISel(FuncInfo, LibInfo); } void PPCTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const { if (Subtarget.isDarwinABI()) return; if (!Subtarget.isPPC64()) return; // Update IsSplitCSR in PPCFunctionInfo PPCFunctionInfo *PFI = Entry->getParent()->getInfo(); PFI->setIsSplitCSR(true); } void PPCTargetLowering::insertCopiesSplitCSR( MachineBasicBlock *Entry, const SmallVectorImpl &Exits) const { const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent()); if (!IStart) return; const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); MachineBasicBlock::iterator MBBI = Entry->begin(); for (const MCPhysReg *I = IStart; *I; ++I) { const TargetRegisterClass *RC = nullptr; if (PPC::G8RCRegClass.contains(*I)) RC = &PPC::G8RCRegClass; else if (PPC::F8RCRegClass.contains(*I)) RC = &PPC::F8RCRegClass; else if (PPC::CRRCRegClass.contains(*I)) RC = &PPC::CRRCRegClass; else if (PPC::VRRCRegClass.contains(*I)) RC = &PPC::VRRCRegClass; else llvm_unreachable("Unexpected register class in CSRsViaCopy!"); unsigned NewVR = MRI->createVirtualRegister(RC); // Create copy from CSR to a virtual register. // FIXME: this currently does not emit CFI pseudo-instructions, it works // fine for CXX_FAST_TLS since the C++-style TLS access functions should be // nounwind. If we want to generalize this later, we may need to emit // CFI pseudo-instructions. assert(Entry->getParent()->getFunction()->hasFnAttribute( Attribute::NoUnwind) && "Function should be nounwind in insertCopiesSplitCSR!"); Entry->addLiveIn(*I); BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR) .addReg(*I); // Insert the copy-back instructions right before the terminator for (auto *Exit : Exits) BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(), TII->get(TargetOpcode::COPY), *I) .addReg(NewVR); } } // Override to enable LOAD_STACK_GUARD lowering on Linux. bool PPCTargetLowering::useLoadStackGuardNode() const { if (!Subtarget.isTargetLinux()) return TargetLowering::useLoadStackGuardNode(); return true; } // Override to disable global variable loading on Linux. void PPCTargetLowering::insertSSPDeclarations(Module &M) const { if (!Subtarget.isTargetLinux()) return TargetLowering::insertSSPDeclarations(M); } Index: projects/clang390-import/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- projects/clang390-import/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h (revision 305682) +++ projects/clang390-import/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h (revision 305683) @@ -1,963 +1,964 @@ //===-- PPCISelLowering.h - PPC32 DAG Lowering Interface --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file defines the interfaces that PPC uses to lower LLVM code into a // selection DAG. // //===----------------------------------------------------------------------===// #ifndef LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H #define LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H #include "PPC.h" #include "PPCInstrInfo.h" #include "PPCRegisterInfo.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetLowering.h" namespace llvm { namespace PPCISD { enum NodeType : unsigned { // Start the numbering where the builtin ops and target ops leave off. FIRST_NUMBER = ISD::BUILTIN_OP_END, /// FSEL - Traditional three-operand fsel node. /// FSEL, /// FCFID - The FCFID instruction, taking an f64 operand and producing /// and f64 value containing the FP representation of the integer that /// was temporarily in the f64 operand. FCFID, /// Newer FCFID[US] integer-to-floating-point conversion instructions for /// unsigned integers and single-precision outputs. FCFIDU, FCFIDS, FCFIDUS, /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 /// operand, producing an f64 value containing the integer representation /// of that FP value. FCTIDZ, FCTIWZ, /// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for /// unsigned integers. FCTIDUZ, FCTIWUZ, /// Reciprocal estimate instructions (unary FP ops). FRE, FRSQRTE, // VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking // three v4f32 operands and producing a v4f32 result. VMADDFP, VNMSUBFP, /// VPERM - The PPC VPERM Instruction. /// VPERM, /// XXSPLT - The PPC VSX splat instructions /// XXSPLT, /// XXINSERT - The PPC VSX insert instruction /// XXINSERT, /// VECSHL - The PPC VSX shift left instruction /// VECSHL, /// The CMPB instruction (takes two operands of i32 or i64). CMPB, /// Hi/Lo - These represent the high and low 16-bit parts of a global /// address respectively. These nodes have two operands, the first of /// which must be a TargetGlobalAddress, and the second of which must be a /// Constant. Selected naively, these turn into 'lis G+C' and 'li G+C', /// though these are usually folded into other nodes. Hi, Lo, /// The following two target-specific nodes are used for calls through /// function pointers in the 64-bit SVR4 ABI. /// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX) /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to /// compute an allocation on the stack. DYNALLOC, /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to /// compute an offset from native SP to the address of the most recent /// dynamic alloca. DYNAREAOFFSET, /// GlobalBaseReg - On Darwin, this node represents the result of the mflr /// at function entry, used for PIC code. GlobalBaseReg, /// These nodes represent the 32-bit PPC shifts that operate on 6-bit /// shift amounts. These nodes are generated by the multi-precision shift /// code. SRL, SRA, SHL, /// The combination of sra[wd]i and addze used to implemented signed /// integer division by a power of 2. The first operand is the dividend, /// and the second is the constant shift amount (representing the /// divisor). SRA_ADDZE, /// CALL - A direct function call. /// CALL_NOP is a call with the special NOP which follows 64-bit /// SVR4 calls. CALL, CALL_NOP, /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a /// MTCTR instruction. MTCTR, /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a /// BCTRL instruction. BCTRL, /// CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl /// instruction and the TOC reload required on SVR4 PPC64. BCTRL_LOAD_TOC, /// Return with a flag operand, matched by 'blr' RET_FLAG, /// R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction. /// This copies the bits corresponding to the specified CRREG into the /// resultant GPR. Bits corresponding to other CR regs are undefined. MFOCRF, /// Direct move from a VSX register to a GPR MFVSR, /// Direct move from a GPR to a VSX register (algebraic) MTVSRA, /// Direct move from a GPR to a VSX register (zero) MTVSRZ, /// Extract a subvector from signed integer vector and convert to FP. /// It is primarily used to convert a (widened) illegal integer vector /// type to a legal floating point vector type. /// For example v2i32 -> widened to v4i32 -> v2f64 SINT_VEC_TO_FP, /// Extract a subvector from unsigned integer vector and convert to FP. /// As with SINT_VEC_TO_FP, used for converting illegal types. UINT_VEC_TO_FP, // FIXME: Remove these once the ANDI glue bug is fixed: /// i1 = ANDIo_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the /// eq or gt bit of CR0 after executing andi. x, 1. This is used to /// implement truncation of i32 or i64 to i1. ANDIo_1_EQ_BIT, ANDIo_1_GT_BIT, // READ_TIME_BASE - A read of the 64-bit time-base register on a 32-bit // target (returns (Lo, Hi)). It takes a chain operand. READ_TIME_BASE, // EH_SJLJ_SETJMP - SjLj exception handling setjmp. EH_SJLJ_SETJMP, // EH_SJLJ_LONGJMP - SjLj exception handling longjmp. EH_SJLJ_LONGJMP, /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* /// instructions. For lack of better number, we use the opcode number /// encoding for the OPC field to identify the compare. For example, 838 /// is VCMPGTSH. VCMP, /// RESVEC, OUTFLAG = VCMPo(LHS, RHS, OPC) - Represents one of the /// altivec VCMP*o instructions. For lack of better number, we use the /// opcode number encoding for the OPC field to identify the compare. For /// example, 838 is VCMPGTSH. VCMPo, /// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This /// corresponds to the COND_BRANCH pseudo instruction. CRRC is the /// condition register to branch on, OPC is the branch opcode to use (e.g. /// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is /// an optional input flag argument. COND_BRANCH, /// CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based /// loops. BDNZ, BDZ, /// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding /// towards zero. Used only as part of the long double-to-int /// conversion sequence. FADDRTZ, /// F8RC = MFFS - This moves the FPSCR (not modeled) into the register. MFFS, /// TC_RETURN - A tail call return. /// operand #0 chain /// operand #1 callee (register or absolute) /// operand #2 stack adjustment /// operand #3 optional in flag TC_RETURN, /// ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls CR6SET, CR6UNSET, /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by initial-exec TLS /// on PPC32. PPC32_GOT, /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and /// local dynamic TLS on PPC32. PPC32_PICGOT, /// G8RC = ADDIS_GOT_TPREL_HA %X2, Symbol - Used by the initial-exec /// TLS model, produces an ADDIS8 instruction that adds the GOT /// base to sym\@got\@tprel\@ha. ADDIS_GOT_TPREL_HA, /// G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec /// TLS model, produces a LD instruction with base register G8RReg /// and offset sym\@got\@tprel\@l. This completes the addition that /// finds the offset of "sym" relative to the thread pointer. LD_GOT_TPREL_L, /// G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS /// model, produces an ADD instruction that adds the contents of /// G8RReg to the thread pointer. Symbol contains a relocation /// sym\@tls which is to be replaced by the thread pointer and /// identifies to the linker that the instruction is part of a /// TLS sequence. ADD_TLS, /// G8RC = ADDIS_TLSGD_HA %X2, Symbol - For the general-dynamic TLS /// model, produces an ADDIS8 instruction that adds the GOT base /// register to sym\@got\@tlsgd\@ha. ADDIS_TLSGD_HA, /// %X3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS /// model, produces an ADDI8 instruction that adds G8RReg to /// sym\@got\@tlsgd\@l and stores the result in X3. Hidden by /// ADDIS_TLSGD_L_ADDR until after register assignment. ADDI_TLSGD_L, /// %X3 = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS /// model, produces a call to __tls_get_addr(sym\@tlsgd). Hidden by /// ADDIS_TLSGD_L_ADDR until after register assignment. GET_TLS_ADDR, /// G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that /// combines ADDI_TLSGD_L and GET_TLS_ADDR until expansion following /// register assignment. ADDI_TLSGD_L_ADDR, /// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS /// model, produces an ADDIS8 instruction that adds the GOT base /// register to sym\@got\@tlsld\@ha. ADDIS_TLSLD_HA, /// %X3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS /// model, produces an ADDI8 instruction that adds G8RReg to /// sym\@got\@tlsld\@l and stores the result in X3. Hidden by /// ADDIS_TLSLD_L_ADDR until after register assignment. ADDI_TLSLD_L, /// %X3 = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS /// model, produces a call to __tls_get_addr(sym\@tlsld). Hidden by /// ADDIS_TLSLD_L_ADDR until after register assignment. GET_TLSLD_ADDR, /// G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that /// combines ADDI_TLSLD_L and GET_TLSLD_ADDR until expansion /// following register assignment. ADDI_TLSLD_L_ADDR, /// G8RC = ADDIS_DTPREL_HA %X3, Symbol - For the local-dynamic TLS /// model, produces an ADDIS8 instruction that adds X3 to /// sym\@dtprel\@ha. ADDIS_DTPREL_HA, /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS /// model, produces an ADDI8 instruction that adds G8RReg to /// sym\@got\@dtprel\@l. ADDI_DTPREL_L, /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded /// during instruction selection to optimize a BUILD_VECTOR into /// operations on splats. This is necessary to avoid losing these /// optimizations due to constant folding. VADD_SPLAT, /// CHAIN = SC CHAIN, Imm128 - System call. The 7-bit unsigned /// operand identifies the operating system entry point. SC, /// CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer. CLRBHRB, /// GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch /// history rolling buffer entry. MFBHRBE, /// CHAIN = RFEBB CHAIN, State - Return from event-based branch. RFEBB, /// VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little /// endian. Maps to an xxswapd instruction that corrects an lxvd2x /// or stxvd2x instruction. The chain is necessary because the /// sequence replaces a load and needs to provide the same number /// of outputs. XXSWAPD, /// An SDNode for swaps that are not associated with any loads/stores /// and thereby have no chain. SWAP_NO_CHAIN, /// QVFPERM = This corresponds to the QPX qvfperm instruction. QVFPERM, /// QVGPCI = This corresponds to the QPX qvgpci instruction. QVGPCI, /// QVALIGNI = This corresponds to the QPX qvaligni instruction. QVALIGNI, /// QVESPLATI = This corresponds to the QPX qvesplati instruction. QVESPLATI, /// QBFLT = Access the underlying QPX floating-point boolean /// representation. QBFLT, /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a /// byte-swapping store instruction. It byte-swaps the low "Type" bits of /// the GPRC input, then stores it through Ptr. Type can be either i16 or /// i32. STBRX = ISD::FIRST_TARGET_MEMORY_OPCODE, /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a /// byte-swapping load instruction. It loads "Type" bits, byte swaps it, /// then puts it in the bottom bits of the GPRC. TYPE can be either i16 /// or i32. LBRX, /// STFIWX - The STFIWX instruction. The first operand is an input token /// chain, then an f64 value to store, then an address to store it to. STFIWX, /// GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point /// load which sign-extends from a 32-bit integer value into the /// destination 64-bit register. LFIWAX, /// GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point /// load which zero-extends from a 32-bit integer value into the /// destination 64-bit register. LFIWZX, /// VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian. /// Maps directly to an lxvd2x instruction that will be followed by /// an xxswapd. LXVD2X, /// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian. /// Maps directly to an stxvd2x instruction that will be preceded by /// an xxswapd. STXVD2X, /// QBRC, CHAIN = QVLFSb CHAIN, Ptr /// The 4xf32 load used for v4i1 constants. QVLFSb, /// GPRC = TOC_ENTRY GA, TOC /// Loads the entry for GA from the TOC, where the TOC base is given by /// the last operand. TOC_ENTRY }; } /// Define some predicates that are used for node matching. namespace PPC { /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUHUM instruction. bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG); /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUWUM instruction. bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG); /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUDUM instruction. bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG); /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes). bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG); /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG); /// isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for /// a VMRGEW or VMRGOW instruction bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG); /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the /// shift amount, otherwise return -1. int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG); /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a splat of a single element that is suitable for input to /// VSPLTB/VSPLTH/VSPLTW. bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize); /// isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by /// the XXINSERTW instruction introduced in ISA 3.0. This is essentially any /// shuffle of v4f32/v4i32 vectors that just inserts one element from one /// vector into the other. This function will also set a couple of /// output parameters for how much the source vector needs to be shifted and /// what byte number needs to be specified for the instruction to put the /// element in the desired location of the target vector. bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE); /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG); /// get_VSPLTI_elt - If this is a build_vector of constants which can be /// formed by using a vspltis[bhw] instruction of the specified element /// size, return the constant being splatted. The ByteSize field indicates /// the number of bytes of each element [124] -> [bhw]. SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG); /// If this is a qvaligni shuffle mask, return the shift /// amount, otherwise return -1. int isQVALIGNIShuffleMask(SDNode *N); } class PPCTargetLowering : public TargetLowering { const PPCSubtarget &Subtarget; public: explicit PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI); /// getTargetNodeName() - This method returns the name of a target specific /// DAG node. const char *getTargetNodeName(unsigned Opcode) const override; /// getPreferredVectorAction - The code we generate when vector types are /// legalized by promoting the integer element type is often much worse /// than code we generate if we widen the type for applicable vector types. /// The issue with promoting is that the vector is scalaraized, individual /// elements promoted and then the vector is rebuilt. So say we load a pair /// of v4i8's and shuffle them. This will turn into a mess of 8 extending /// loads, moves back into VSR's (or memory ops if we don't have moves) and /// then the VPERM for the shuffle. All in all a very slow sequence. TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT) const override { if (VT.getVectorElementType().getSizeInBits() % 8 == 0) return TypeWidenVector; return TargetLoweringBase::getPreferredVectorAction(VT); } bool useSoftFloat() const override; MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { return MVT::i32; } bool isCheapToSpeculateCttz() const override { return true; } bool isCheapToSpeculateCtlz() const override { return true; } bool supportSplitCSR(MachineFunction *MF) const override { return MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS && MF->getFunction()->hasFnAttribute(Attribute::NoUnwind); } void initializeSplitCSR(MachineBasicBlock *Entry) const override; void insertCopiesSplitCSR( MachineBasicBlock *Entry, const SmallVectorImpl &Exits) const override; /// getSetCCResultType - Return the ISD::SETCC ValueType EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override; /// Return true if target always beneficiates from combining into FMA for a /// given value type. This must typically return false on targets where FMA /// takes more cycles to execute than FADD. bool enableAggressiveFMAFusion(EVT VT) const override; /// getPreIndexedAddressParts - returns true by value, base pointer and /// offset pointer and addressing mode by reference if the node's address /// can be legally represented as pre-indexed load / store address. bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override; /// SelectAddressRegReg - Given the specified addressed, check to see if it /// can be represented as an indexed [r+r] operation. Returns false if it /// can be more efficiently represented with [r+imm]. bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const; /// SelectAddressRegImm - Returns true if the address N can be represented /// by a base register plus a signed 16-bit displacement [r+imm], and if it /// is not better represented as reg+reg. If Aligned is true, only accept /// displacements suitable for STD and friends, i.e. multiples of 4. bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, bool Aligned) const; /// SelectAddressRegRegOnly - Given the specified addressed, force it to be /// represented as an indexed [r+r] operation. bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const; Sched::Preference getSchedulingPreference(SDNode *N) const override; /// LowerOperation - Provide custom lowering hooks for some operations. /// SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; /// ReplaceNodeResults - Replace the results of node with an illegal result /// type with new values built out of custom code. /// void ReplaceNodeResults(SDNode *N, SmallVectorImpl&Results, SelectionDAG &DAG) const override; SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const; SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const; SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, std::vector *Created) const override; unsigned getRegisterByName(const char* RegName, EVT VT, SelectionDAG &DAG) const override; void computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth = 0) const override; unsigned getPrefLoopAlignment(MachineLoop *ML) const override; bool shouldInsertFencesForAtomic(const Instruction *I) const override { return true; } Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const override; Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const override; MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override; MachineBasicBlock *EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned AtomicSize, unsigned BinOpcode) const; MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, bool is8bit, unsigned Opcode) const; MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const; MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const; ConstraintType getConstraintType(StringRef Constraint) const override; /// Examine constraint string and operand type and determine a weight value. /// The operand object must already have been set up with the operand type. ConstraintWeight getSingleConstraintMatchWeight( AsmOperandInfo &info, const char *constraint) const override; std::pair getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override; /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate /// function arguments in the caller parameter area. This is the actual /// alignment, not its logarithm. unsigned getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override; /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector &Ops, SelectionDAG &DAG) const override; unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override { if (ConstraintCode == "es") return InlineAsm::Constraint_es; else if (ConstraintCode == "o") return InlineAsm::Constraint_o; else if (ConstraintCode == "Q") return InlineAsm::Constraint_Q; else if (ConstraintCode == "Z") return InlineAsm::Constraint_Z; else if (ConstraintCode == "Zy") return InlineAsm::Constraint_Zy; return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); } /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override; /// isLegalICmpImmediate - Return true if the specified immediate is legal /// icmp immediate, that is the target has icmp instructions which can /// compare a register against the immediate without having to materialize /// the immediate into a register. bool isLegalICmpImmediate(int64_t Imm) const override; /// isLegalAddImmediate - Return true if the specified immediate is legal /// add immediate, that is the target has add instructions which can /// add a register and the immediate without having to materialize /// the immediate into a register. bool isLegalAddImmediate(int64_t Imm) const override; /// isTruncateFree - Return true if it's free to truncate a value of /// type Ty1 to type Ty2. e.g. On PPC it's free to truncate a i64 value in /// register X1 to i32 by referencing its sub-register R1. bool isTruncateFree(Type *Ty1, Type *Ty2) const override; bool isTruncateFree(EVT VT1, EVT VT2) const override; bool isZExtFree(SDValue Val, EVT VT2) const override; bool isFPExtFree(EVT VT) const override; /// \brief Returns true if it is beneficial to convert a load of a constant /// to just the constant itself. bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override; bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const override; /// getOptimalMemOpType - Returns the target specific optimal type for load /// and store operations as a result of memset, memcpy, and memmove /// lowering. If DstAlign is zero that means it's safe to destination /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it /// means there isn't a need to check it against alignment requirement, /// probably because the source does not need to be loaded. If 'IsMemset' is /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy /// source is constant so it does not need to be loaded. /// It returns EVT::Other if the type should be determined using generic /// target-independent logic. EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const override; /// Is unaligned memory access allowed for the given type, and is it fast /// relative to software emulation. bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, unsigned Align = 1, bool *Fast = nullptr) const override; /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be /// expanded to FMAs when this method returns true, otherwise fmuladd is /// expanded to fmul + fadd. bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; // Should we expand the build vector with shuffles? bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override; /// createFastISel - This method returns a target-specific FastISel object, /// or null if the target does not support "fast" instruction selection. FastISel *createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const override; /// \brief Returns true if an argument of type Ty needs to be passed in a /// contiguous block of registers in calling convention CallConv. bool functionArgumentNeedsConsecutiveRegisters( Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override { // We support any array type as "consecutive" block in the parameter // save area. The element type defines the alignment requirement and // whether the argument should go in GPRs, FPRs, or VRs if available. // // Note that clang uses this capability both to implement the ELFv2 // homogeneous float/vector aggregate ABI, and to avoid having to use // "byval" when passing aggregates that might fully fit in registers. return Ty->isArrayTy(); } /// If a physical register, this returns the register that receives the /// exception address on entry to an EH pad. unsigned getExceptionPointerRegister(const Constant *PersonalityFn) const override; /// If a physical register, this returns the register that receives the /// exception typeid on entry to a landing pad. unsigned getExceptionSelectorRegister(const Constant *PersonalityFn) const override; /// Override to support customized stack guard loading. bool useLoadStackGuardNode() const override; void insertSSPDeclarations(Module &M) const override; private: struct ReuseLoadInfo { SDValue Ptr; SDValue Chain; SDValue ResChain; MachinePointerInfo MPI; bool IsInvariant; unsigned Alignment; AAMDNodes AAInfo; const MDNode *Ranges; ReuseLoadInfo() : IsInvariant(false), Alignment(0), Ranges(nullptr) {} }; bool canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI, SelectionDAG &DAG, ISD::LoadExtType ET = ISD::NON_EXTLOAD) const; void spliceIntoChain(SDValue ResChain, SDValue NewResChain, SelectionDAG &DAG) const; void LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, SelectionDAG &DAG, const SDLoc &dl) const; SDValue LowerFP_TO_INTDirectMove(SDValue Op, SelectionDAG &DAG, const SDLoc &dl) const; SDValue LowerINT_TO_FPDirectMove(SDValue Op, SelectionDAG &DAG, const SDLoc &dl) const; SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const; SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const; bool IsEligibleForTailCallOptimization(SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, const SmallVectorImpl &Ins, SelectionDAG& DAG) const; bool IsEligibleForTailCallOptimization_64SVR4( SDValue Callee, CallingConv::ID CalleeCC, ImmutableCallSite *CS, bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &Ins, SelectionDAG& DAG) const; SDValue EmitTailCallLoadFPAndRetAddr(SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut, SDValue &FPOpOut, const SDLoc &dl) const; SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, const SDLoc &dl) const; SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const; SDValue FinishCall(CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg, bool isPatchPoint, bool hasNest, SelectionDAG &DAG, SmallVector, 8> &RegsToPass, SDValue InFlag, SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff, unsigned NumBytes, const SmallVectorImpl &Ins, SmallVectorImpl &InVals, ImmutableCallSite *CS) const; SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const override; SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const override; bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl &Outs, LLVMContext &Context) const override; SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &dl, SelectionDAG &DAG) const override; SDValue extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, SelectionDAG &DAG, SDValue ArgVal, const SDLoc &dl) const; SDValue LowerFormalArguments_Darwin( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const; SDValue LowerFormalArguments_64SVR4( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const; SDValue LowerFormalArguments_32SVR4( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const; SDValue createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl) const; SDValue LowerCall_Darwin(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool isTailCall, bool isPatchPoint, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals, ImmutableCallSite *CS) const; SDValue LowerCall_64SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool isTailCall, bool isPatchPoint, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals, ImmutableCallSite *CS) const; SDValue LowerCall_32SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool isTailCall, bool isPatchPoint, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals, ImmutableCallSite *CS) const; SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const; SDValue DAGCombineBuildVector(SDNode *N, DAGCombinerInfo &DCI) const; SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const; SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI, unsigned &RefinementSteps, bool &UseOneConstNR) const override; SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI, unsigned &RefinementSteps) const override; unsigned combineRepeatedFPDivisors() const override; CCAssignFn *useFastISelCCs(unsigned Flag) const; }; namespace PPC { FastISel *createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo); } bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State); bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State); bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State); } #endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H